In [None]:
import pandas as pd


file_path = '/content/data.csv'

try:
    data = pd.read_csv(
        file_path,
        error_bad_lines=False,
        warn_bad_lines=True
    )
except Exception as e:
    print(f"An error occurred: {e}")


data = data.dropna(subset=['start_station_name', 'end_station_name', 'start_station_id', 'end_station_id'])

for col in ['start_station_name', 'end_station_name', 'start_station_id', 'end_station_id']:
    data[col] = data[col].astype(str)


data = data[data['rideable_type'] != 'electric_bike']


data['ride_duration'] = (pd.to_datetime(data['ended_at']) - pd.to_datetime(data['started_at'])).dt.total_seconds()


data = data[data['start_station_name'] != data['end_station_name']]


data['station_pair'] = data.apply(
    lambda row: tuple(sorted([row['start_station_name'], row['end_station_name']])),
    axis=1
)


grouped = data.groupby('station_pair')


results = grouped.filter(lambda x: len(x) > 1).groupby('station_pair').agg({
    'ride_duration': 'median',
    'start_station_name': 'first',
    'end_station_name': 'first',
    'start_station_id': 'first',
    'end_station_id': 'first',
    'start_lat': 'first',
    'start_lng': 'first',
    'end_lat': 'first',
    'end_lng': 'first'
}).rename(columns={'ride_duration': 'median_ride_duration (seconds)'})


results = results.reset_index(drop=True)


In [None]:

results.to_csv('final_ride_durations.csv', index=False)


from google.colab import files
files.download('final_ride_durations.csv')
