In [1]:
# Dependencies

import pandas as pd
import numpy as np
import glob
import os
import timeit                                # To check performance
from datetime import datetime, date, time, timedelta
import matplotlib.pyplot as plt

In [2]:
# Import CSV

tic = timeit.default_timer()                            # Monitor performance

ecobici_rides_df = pd.read_csv("dataset_export_files/ecobici_ride_data-oneInHundredv2.csv", index_col=None, header=0)

toc = timeit.default_timer()                            # Monitor performance
print(f'Time (in seconds) to import CSV file: {round(toc - tic, 2)}')

Time (in seconds) to import CSV file: 0.53


In [3]:
ecobici_rides_df_test = ecobici_rides_df.copy()
ecobici_rides_df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 707039 entries, 0 to 707038
Data columns (total 6 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   Genero_Usuario         707039 non-null  object 
 1   Edad_Usuario           707039 non-null  int64  
 2   Ciclo_Estacion_Retiro  707039 non-null  int64  
 3   Ciclo_Estacion_Arribo  707039 non-null  int64  
 4   Usage_Timestamp        707039 non-null  object 
 5   Duration(Min)          707039 non-null  float64
dtypes: float64(1), int64(3), object(2)
memory usage: 32.4+ MB


In [4]:
ecobici_rides_df_test['Usage_Timestamp'] = pd.to_datetime(ecobici_rides_df_test['Usage_Timestamp'])


In [5]:
ecobici_rides_df_test

Unnamed: 0,Genero_Usuario,Edad_Usuario,Ciclo_Estacion_Retiro,Ciclo_Estacion_Arribo,Usage_Timestamp,Duration(Min)
0,M,28,85,85,2010-02-16 12:45:37.427,3.1
1,M,35,78,63,2010-02-19 13:52:30.700,12.1
2,M,34,85,23,2010-02-21 14:30:46.390,26.8
3,M,46,20,17,2010-02-22 16:17:10.470,6.4
4,F,28,74,46,2010-02-23 18:51:57.057,14.9
...,...,...,...,...,...,...
707034,M,27,316,54,2021-12-05 09:29:40.000,13.3
707035,M,49,158,83,2021-12-05 09:24:30.000,10.8
707036,M,33,417,313,2021-12-05 08:55:46.000,12.1
707037,F,23,467,260,2021-12-05 08:54:12.000,33.7


## Routes

### This next section imports and organizes the list of stations to be used in merges below

In [14]:
# Import the list of stations. This will be used as a merge below. 

stations = pd.read_csv("../Estaciones.csv", index_col='ID', header=0)
stations.head()

Unnamed: 0_level_0,Name,Lat,Lon,districtName
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
124,124 CLAUDIO BERNARD-DR. LICEAGA,19.422392,-99.150358,Ampliacion Granada
159,159 HUATABAMPO-EJE 1 PTE. AV. CUAUHTÉMOC,19.407517,-99.155373,Ampliacion Granada
241,E241 EJERCITO NAL-JUAN VAZQUEZ DE LA MELLA,19.43862,-99.20758,Ampliacion Granada
243,243 MIGUEL DE CERVANTES SAAVEDRA-LAGO FILT,19.440839,-99.196712,Ampliacion Granada
350,350 JOSE CLEMENTE OROZCO-CORREGGIO,19.384062,-99.181482,Ampliacion Granada


In [15]:
stations_test = stations.copy()
stations_test.head()

Unnamed: 0_level_0,Name,Lat,Lon,districtName
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
124,124 CLAUDIO BERNARD-DR. LICEAGA,19.422392,-99.150358,Ampliacion Granada
159,159 HUATABAMPO-EJE 1 PTE. AV. CUAUHTÉMOC,19.407517,-99.155373,Ampliacion Granada
241,E241 EJERCITO NAL-JUAN VAZQUEZ DE LA MELLA,19.43862,-99.20758,Ampliacion Granada
243,243 MIGUEL DE CERVANTES SAAVEDRA-LAGO FILT,19.440839,-99.196712,Ampliacion Granada
350,350 JOSE CLEMENTE OROZCO-CORREGGIO,19.384062,-99.181482,Ampliacion Granada


In [21]:
stations_sorted = stations_test.sort_values(by='ID')
stations_sorted.head(5)

Unnamed: 0_level_0,Name,Lat,Lon,districtName
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc
2,2 RIO GUADALQUIVIR-RIO BALSAS,19.431386,-99.171695,Cuauhtemoc
3,3 REFORMA-INSURGENTES,19.431655,-99.158668,Juarez
4,4 RIO NILO-RIO PANUCO,19.428309,-99.171713,Cuauhtemoc
5,5 RIO PANUCO-RIO TIBER,19.42972,-99.169363,Cuauhtemoc


### Top routes by year

This is how I created a json file for each year. I just repeated it for each year manually, thouth ideally that could be automated with some kind of iteration.

In [19]:
# Helper function that gets used below. It will create a dictionary with the unique routes as keys, 
#   and the total occurencies of that route as a value. 

def uniqueRoutes(yearRides):

    routes = {}

    for index, row in yearRides.iterrows():
        try:
            route = (row['Ciclo_Estacion_Retiro'], row['Ciclo_Estacion_Arribo'])
            if route not in routes: 
                routes[route] = 1
            else:
                routes[route] +=1
        except: pass

    return routes

In [24]:
# Filter the viajes by year 
routes_2016 = ecobici_rides_df_test.loc[ecobici_rides_df_test.Usage_Timestamp.dt.year==2016]

# Get the unique routes and their occurrences by calling the helper function above.  
unique_routes_2016 = uniqueRoutes(routes_2016)

# Sort the unique routes dictionary so that top routes are at beginning of list. 
sorted_unique_routes_2016 = dict(sorted(unique_routes_2016.items(), key=lambda item: item[1], reverse=True))

# Reorganize the dictionary of unique routes into a dataframe. This will make it easier to export to json
#   a few more steps below. 

lst = []
for (key, value) in sorted_unique_routes_2016.items():
   # Check if key is even then add pair to new dictionary
   lst.append([value, key[0], key[1]])

# Then rename column names. 
sorted_unique_routes_2016_df = pd.DataFrame(lst, columns=['Total_Rides',
                                                       'Start_Station_ID',
                                                       'End_Station_ID'])

# MERGE with the list of stations, so that the station data gets inserted into the list of routes. This
#  will help later to plot the routes in Leaflet. 

# Merge data for the Start Station first. 
sorted_unique_routes_2016_df = pd.merge(sorted_unique_routes_2016_df, stations_sorted, left_on=["Start_Station_ID"],
                          right_on=["ID"],
                          how = 'left')

# Merge data for the End Station next.            
sorted_unique_routes_2016_df = pd.merge(sorted_unique_routes_2016_df, stations_sorted, left_on=["End_Station_ID"],
                          right_on=["ID"],
                          how = 'left')

# Drop excess columns. 
# sorted_unique_routes_2016_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)

# Rename a bunch of columns 
sorted_unique_routes_2016_df = sorted_unique_routes_2016_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})

# Send it to JSON
sorted_unique_routes_2016_df.to_json("Routes/sorted_unique_routes_2016.json", orient='index') 
sorted_unique_routes_2016_df.head()

Unnamed: 0,Total_Rides,Start_Station_ID,End_Station_ID,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,72,211,217,211 NEWTON-HORACIO,19.433607,-99.188528,Polanco,217 EULER-AV. HORACIO,19.432888,-99.183605,Polanco
1,58,174,183,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.44111,-99.16164,San Rafael,183 GABINO BARRERA-GUILLERMO PRIETO,19.43767,-99.16402,San Rafael
2,51,174,257,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.44111,-99.16164,San Rafael,E257 MANUEL MARIA CONTRERAS-VILLALONGIN,19.4338,-99.16622,Cuauhtemoc
3,48,183,174,183 GABINO BARRERA-GUILLERMO PRIETO,19.43767,-99.16402,San Rafael,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.44111,-99.16164,San Rafael
4,47,217,211,217 EULER-AV. HORACIO,19.432888,-99.183605,Polanco,211 NEWTON-HORACIO,19.433607,-99.188528,Polanco


In [37]:
sorted_unique_routes_2010_df = pd.merge(sorted_unique_routes_2010_df, stations_sorted, left_on=["End_Station"],
                          right_on=["ID"],
                          how = 'left')

sorted_unique_routes_2010_df.head()


Unnamed: 0,Total_Rides,Start_Station,End_Station,ID_x,Name_x,Lat_x,Lon_x,districtName_x,ID_y,Name_y,Lat_y,Lon_y,districtName_y
0,25,61,61,61,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo,61,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo
1,24,85,85,85,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael,85,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael
2,22,70,70,70,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo,70,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo
3,18,13,20,13,13 REFORMA-RIO MISSISSIPPI,19.42502,-99.17231,Cuauhtemoc,20,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc
4,17,20,17,20,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc,17,17 REFORMA-RIO TIBER,19.427501,-99.167125,Cuauhtemoc


In [39]:
sorted_unique_routes_2010_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)
sorted_unique_routes_2010_df

Unnamed: 0,Total_Rides,Start_Station,End_Station,Name_x,Lat_x,Lon_x,districtName_x,Name_y,Lat_y,Lon_y,districtName_y
0,25,61,61,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo
1,24,85,85,85 ROSAS MORENO-SULLIVAN,19.434250,-99.162508,San Rafael,85 ROSAS MORENO-SULLIVAN,19.434250,-99.162508,San Rafael
2,22,70,70,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo
3,18,13,20,13 REFORMA-RIO MISSISSIPPI,19.425020,-99.172310,Cuauhtemoc,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc
4,17,20,17,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc,17 REFORMA-RIO TIBER,19.427501,-99.167125,Cuauhtemoc
...,...,...,...,...,...,...,...,...,...,...,...
3717,1,28,41,28 TOLEDO-TOKIO,19.423542,-99.172494,Juarez,41 REFORMA-AV. DE LA REPUBLICA,19.435637,-99.150137,Tabacalera
3718,1,53,49,53 FERNANDO MONTES DE OCA-TULA,19.414773,-99.178433,Condesa,49 MAZATLAN-JUAN DE LA BARRERA,19.416712,-99.175704,Condesa
3719,1,46,51,46 DURANGO-SALAMANCA,19.419214,-99.169732,Roma Norte,51 COLIMA-OAXACA,19.417995,-99.168248,Roma Norte
3720,1,51,28,51 COLIMA-OAXACA,19.417995,-99.168248,Roma Norte,28 TOLEDO-TOKIO,19.423542,-99.172494,Juarez


In [40]:
sorted_unique_routes_2010_df = sorted_unique_routes_2010_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})
sorted_unique_routes_2010_df.head()

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,25,61,61,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo,61 AVENIDA MEXICO-SONORA,19.413742,-99.169525,Hipodromo
1,24,85,85,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael
2,22,70,70,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo,70 PARQUE MEXICO-MICHOACAN,19.411272,-99.169718,Hipodromo
3,18,13,20,13 REFORMA-RIO MISSISSIPPI,19.42502,-99.17231,Cuauhtemoc,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc
4,17,20,17,20 REFORMA-RIO TAMESIS,19.430964,-99.160206,Cuauhtemoc,17 REFORMA-RIO TIBER,19.427501,-99.167125,Cuauhtemoc


In [41]:
sorted_unique_routes_2010_df.to_json("Routes/sorted_unique_routes_2010.json", orient='index') 

In [125]:
top_routes_df3.to_csv("routes_to_plot.csv", index=False) 

In [42]:
routes_2018 = ecobici_rides_df_test.loc[ecobici_rides_df_test.Usage_Timestamp.dt.year==2018]

unique_routes_2018 = uniqueRoutes(routes_2018)
sorted_unique_routes_2018 = dict(sorted(unique_routes_2018.items(), key=lambda item: item[1], reverse=True))

lst = []

for (key, value) in sorted_unique_routes_2018.items():
   # Check if key is even then add pair to new dictionary
   lst.append([value, key[0], key[1]])

sorted_unique_routes_2018_df = pd.DataFrame(lst, columns=['Total_Rides',
                                                       'Start_Station',
                                                       'End_Station'])

sorted_unique_routes_2018_df = pd.merge(sorted_unique_routes_2018_df, stations_sorted, left_on=["Start_Station"],
                          right_on=["ID"],
                          how = 'left')
            
sorted_unique_routes_2018_df = pd.merge(sorted_unique_routes_2018_df, stations_sorted, left_on=["End_Station"],
                          right_on=["ID"],
                          how = 'left')

sorted_unique_routes_2018_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)

sorted_unique_routes_2018_df = sorted_unique_routes_2018_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})
sorted_unique_routes_2018_df.head()

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,53,211,217,211 NEWTON-HORACIO,19.433607,-99.188528,Polanco,217 EULER-AV. HORACIO,19.432888,-99.183605,Polanco
1,51,1,18,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc,18 REFORMA-RIO RHIN,19.42888,-99.164176,Cuauhtemoc
2,48,18,1,18 REFORMA-RIO RHIN,19.42888,-99.164176,Cuauhtemoc,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc
3,47,107,113,107 TOLSA-BALDERAS,19.427329,-99.149971,Centro,113 GENERAL PRIM-VERSALLES,19.430369,-99.154419,Juarez
4,42,111,174,111 GUILLERMO PRIETO-JOAQUÍN VELÁZQUEZ DE LEÓN,19.438237,-99.165946,San Rafael,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.44111,-99.16164,San Rafael


In [43]:
sorted_unique_routes_2018_df

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,53,211,217,211 NEWTON-HORACIO,19.433607,-99.188528,Polanco,217 EULER-AV. HORACIO,19.432888,-99.183605,Polanco
1,51,1,18,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc,18 REFORMA-RIO RHIN,19.428880,-99.164176,Cuauhtemoc
2,48,18,1,18 REFORMA-RIO RHIN,19.428880,-99.164176,Cuauhtemoc,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc
3,47,107,113,107 TOLSA-BALDERAS,19.427329,-99.149971,Centro,113 GENERAL PRIM-VERSALLES,19.430369,-99.154419,Juarez
4,42,111,174,111 GUILLERMO PRIETO-JOAQUÍN VELÁZQUEZ DE LEÓN,19.438237,-99.165946,San Rafael,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.441110,-99.161640,San Rafael
...,...,...,...,...,...,...,...,...,...,...,...
40694,1,311,288,311 CALLE 5-AVENIDA REVOLUCIÓN,19.393665,-99.185391,San Pedro de los Pinos,288 11 DE ABRIL-AVENIDA 2,19.396333,-99.183575,San Pedro de los Pinos
40695,1,201,475,201 LOPE DE VEGA-EJERCITO NACIONAL,19.437431,-99.187215,Polanco,475 LAGO MURITZ-LAGO ALBERTO,19.439249,-99.181065,Anahuac I
40696,1,461,194,461 PRESA DON MARTÍN-BOULEVARD MIGUEL DE CERVA...,19.440106,-99.210031,Irrigacion,194 CTO MAHATMA GANDHI-AV. PASEO DE LA REFORMA,19.424282,-99.180334,Bosque de Chapultepec
40697,1,355,373,355 CALIFORNIA-SACRAMENTO,19.382050,-99.173291,Insurgentes San Borja,373 PILARES-GABRIEL MANCERA,19.377968,-99.166663,Del Valle Centro


In [44]:
sorted_unique_routes_2018_df.to_json("Routes/sorted_unique_routes_2018.json", orient='index') 

In [45]:
routes_2020 = ecobici_rides_df_test.loc[ecobici_rides_df_test.Usage_Timestamp.dt.year==2020]

unique_routes_2020 = uniqueRoutes(routes_2020)
sorted_unique_routes_2020 = dict(sorted(unique_routes_2020.items(), key=lambda item: item[1], reverse=True))

lst = []

for (key, value) in sorted_unique_routes_2020.items():
   # Check if key is even then add pair to new dictionary
   lst.append([value, key[0], key[1]])

sorted_unique_routes_2020_df = pd.DataFrame(lst, columns=['Total_Rides',
                                                       'Start_Station',
                                                       'End_Station'])

sorted_unique_routes_2020_df = pd.merge(sorted_unique_routes_2020_df, stations_sorted, left_on=["Start_Station"],
                          right_on=["ID"],
                          how = 'left')
            
sorted_unique_routes_2020_df = pd.merge(sorted_unique_routes_2020_df, stations_sorted, left_on=["End_Station"],
                          right_on=["ID"],
                          how = 'left')

sorted_unique_routes_2020_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)

sorted_unique_routes_2020_df = sorted_unique_routes_2020_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})

sorted_unique_routes_2020_df.to_json("Routes/sorted_unique_routes_2020.json", orient='index') 
sorted_unique_routes_2020_df.head()

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,28,27,27,27 REFORMA-HAVRE,19.429115,-99.162614,Juarez,27 REFORMA-HAVRE,19.429115,-99.162614,Juarez
1,26,34,34,34 LIVERPOOL-GENOVA,19.424725,-99.163331,Juarez,34 LIVERPOOL-GENOVA,19.424725,-99.163331,Juarez
2,25,18,1,18 REFORMA-RIO RHIN,19.42888,-99.164176,Cuauhtemoc,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc
3,24,33,33,E33 LONDRES-FLORENCIA,19.424234,-99.166431,Juarez,E33 LONDRES-FLORENCIA,19.424234,-99.166431,Juarez
4,23,139,139,E139 TONALÁ-GUANJUATO,19.416128,-99.162097,Roma Norte,E139 TONALÁ-GUANJUATO,19.416128,-99.162097,Roma Norte


In [46]:
routes_2012 = ecobici_rides_df_test.loc[ecobici_rides_df_test.Usage_Timestamp.dt.year==2012]

unique_routes_2012 = uniqueRoutes(routes_2012)
sorted_unique_routes_2012 = dict(sorted(unique_routes_2012.items(), key=lambda item: item[1], reverse=True))

lst = []

for (key, value) in sorted_unique_routes_2012.items():
   # Check if key is even then add pair to new dictionary
   lst.append([value, key[0], key[1]])

sorted_unique_routes_2012_df = pd.DataFrame(lst, columns=['Total_Rides',
                                                       'Start_Station',
                                                       'End_Station'])

sorted_unique_routes_2012_df = pd.merge(sorted_unique_routes_2012_df, stations_sorted, left_on=["Start_Station"],
                          right_on=["ID"],
                          how = 'left')
            
sorted_unique_routes_2012_df = pd.merge(sorted_unique_routes_2012_df, stations_sorted, left_on=["End_Station"],
                          right_on=["ID"],
                          how = 'left')

sorted_unique_routes_2012_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)

sorted_unique_routes_2012_df = sorted_unique_routes_2012_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})

sorted_unique_routes_2012_df.to_json("Routes/sorted_unique_routes_2012.json", orient='index') 
sorted_unique_routes_2012_df.head()

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,65,1,18,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc,18 REFORMA-RIO RHIN,19.42888,-99.164176,Cuauhtemoc
1,43,18,1,18 REFORMA-RIO RHIN,19.42888,-99.164176,Cuauhtemoc,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc
2,41,1,27,1 RIO SENA-RIO BALSAS,19.433571,-99.167809,Cuauhtemoc,27 REFORMA-HAVRE,19.429115,-99.162614,Juarez
3,41,85,85,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael,85 ROSAS MORENO-SULLIVAN,19.43425,-99.162508,San Rafael
4,41,41,3,41 REFORMA-AV. DE LA REPUBLICA,19.435637,-99.150137,Tabacalera,3 REFORMA-INSURGENTES,19.431655,-99.158668,Juarez


In [47]:
routes_2014 = ecobici_rides_df_test.loc[ecobici_rides_df_test.Usage_Timestamp.dt.year==2014]

unique_routes_2014 = uniqueRoutes(routes_2014)
sorted_unique_routes_2014 = dict(sorted(unique_routes_2014.items(), key=lambda item: item[1], reverse=True))

lst = []

for (key, value) in sorted_unique_routes_2014.items():
   # Check if key is even then add pair to new dictionary
   lst.append([value, key[0], key[1]])

sorted_unique_routes_2014_df = pd.DataFrame(lst, columns=['Total_Rides',
                                                       'Start_Station',
                                                       'End_Station'])

sorted_unique_routes_2014_df = pd.merge(sorted_unique_routes_2014_df, stations_sorted, left_on=["Start_Station"],
                          right_on=["ID"],
                          how = 'left')
            
sorted_unique_routes_2014_df = pd.merge(sorted_unique_routes_2014_df, stations_sorted, left_on=["End_Station"],
                          right_on=["ID"],
                          how = 'left')

sorted_unique_routes_2014_df.drop(['ID_x', 'ID_y'], inplace=True, axis=1)

sorted_unique_routes_2014_df = sorted_unique_routes_2014_df.rename(columns={"Name_x": "Start_Station_Name",
                                               'Lat_x' : 'Start_Station_Lat',
                                               'Lon_x' : 'Start_Station_Lon',
                                               'districtName_x' : 'Start_Colonia',
                                               "Name_y": "End_Station_Name",
                                               'Lat_y' : 'End_Station_Lat',
                                               'Lon_y' : 'End_Station_Lon',
                                               'districtName_y' : 'End_Colonia'})

sorted_unique_routes_2014_df.to_json("Routes/sorted_unique_routes_2014.json", orient='index') 
sorted_unique_routes_2014_df.head()

Unnamed: 0,Total_Rides,Start_Station,End_Station,Start_Station_Name,Start_Station_Lat,Start_Station_Lon,Start_Colonia,End_Station_Name,End_Station_Lat,End_Station_Lon,End_Colonia
0,63,41,3,41 REFORMA-AV. DE LA REPUBLICA,19.435637,-99.150137,Tabacalera,3 REFORMA-INSURGENTES,19.431655,-99.158668,Juarez
1,63,183,174,183 GABINO BARRERA-GUILLERMO PRIETO,19.43767,-99.16402,San Rafael,174 JOAQUÍN GARCIA-IGNACIO MANUEL ALTAMIRANO,19.44111,-99.16164,San Rafael
2,60,211,217,211 NEWTON-HORACIO,19.433607,-99.188528,Polanco,217 EULER-AV. HORACIO,19.432888,-99.183605,Polanco
3,53,21,27,21 REFORMA-DUBLIN,19.42472,-99.17206,Juarez,27 REFORMA-HAVRE,19.429115,-99.162614,Juarez
4,49,36,53,36 PUEBLA-VERACRUZ,19.419725,-99.175778,Piedad Narvarte,53 FERNANDO MONTES DE OCA-TULA,19.414773,-99.178433,Condesa
