In [586]:
import pandas as pd
import math
from geopy import distance
import networkx as nx
import itertools

# NO LOOPS!!! CODE BREAKS IF THERE ARE LOOPS!!

ROOT_DIR = '/Users/alexanderguo/Google Drive/MIT/junior/sem2/projects/metroworld/'
pd.options.display.max_rows = 999

# EDIT FOR DIFFERNET CONSTANTS
MAX_SPEED_KMH = 1200 # kmh
ACCELERATION = 3 # m/s2. CONSTANT ACCELERATION ONLY. DE_ACCELERATION MUST BE SAME (otherwise math too complicated)
TIME_AT_STATION_M = 5 # minutes
TRANSFER_TIME_1_M = 30 # minutes
TRANSFER_TIME_2_M = 240 # minutes
EXPRESS_POP_CUTOFF = 2000000 # population needed for an express station



MAX_SPEED = MAX_SPEED_KMH/3.6 # m/s2
ACC_TIME = MAX_SPEED/ACCELERATION # s
ACC_DISTANCE = ACC_TIME * MAX_SPEED / 2 # d = 1/2 at^2
TIME_AT_STATION = TIME_AT_STATION_M * 60 # s
TRANSFER_TIME_1 = TRANSFER_TIME_1_M * 60 # s
TRANSFER_TIME_2 = TRANSFER_TIME_2_M * 60 # s

with open(ROOT_DIR + 'metro_data.txt', 'r') as f:
    lines_raw = [a.strip().replace(':', '') for a in f.readlines()]
    


In [587]:
line_dict = {}
station_set = set()
next_is_stations_mainline = False
next_is_stations_branch = False
current_line_name = ''
current_branch_name = ''

# add to line_dict and station_set
for text_line in lines_raw:
    if text_line.split(' ')[-1] == 'LINE':
        current_line_name = text_line.split(' ')[0]
        line_dict[current_line_name] = None
        next_is_stations_mainline = True
    elif next_is_stations_mainline == True:
        line_dict[current_line_name] = text_line.split(',')
        next_is_stations_mainline = False
        station_set = station_set.union(set(text_line.split(',')))
    elif text_line.split(' ')[0] == 'BRANCH':
        current_branch_name = ' '.join(text_line.split(' ')[3:]) + '_' + current_line_name + '_' + text_line.split(' ')[1]
        line_dict[current_branch_name] = None
        next_is_stations_branch = True
    elif next_is_stations_branch == True:
        line_dict[current_branch_name] = text_line.split(',')
        next_is_stations_branch = False
        station_set = station_set.union(set(text_line.split(',')))

line_totals = {}
for line in line_dict:
    if '_' not in line: # main line
        line_totals[line] = line_totals[line] + len(line_dict[line]) if line in line_totals else len(line_dict[line])
    else:
        actual_line = line.split('_')[1]
        line_totals[actual_line] = line_totals[actual_line] + len(line_dict[line]) if actual_line in line_totals else len(line_dict[line])

# line_totals
station_set


{'Abidjan',
 'Abu Dhabi',
 'Abuja',
 'Accra',
 'Addis Ababa',
 'Ahmadabad',
 'Alexandria',
 'Algiers',
 'Almaty',
 'Amman',
 'Amsterdam',
 'Anchorage',
 'Ankara',
 'Antananarivo',
 'Ashgabat',
 'Asmara',
 'Asuncion',
 'Athens',
 'Atlanta',
 'Auckland',
 'Baghdad',
 'Baku',
 'Bali',
 'Bamako',
 'Bandar Seri Begawan',
 'Bangalore',
 'Bangkok',
 'Bangui',
 'Banjul',
 'Barcelona',
 'Basse-Terre',
 'Bata',
 'Beijing',
 'Beirut',
 'Belgrade',
 'Belize City',
 'Belo Horizonte',
 'Berlin',
 'Bishkek',
 'Bissau',
 'Bogota',
 'Boston',
 'Brasilia',
 'Bratislava',
 'Brazzaville',
 'Bridgetown',
 'Brisbane',
 'Brussels',
 'Bucharest',
 'Budapest',
 'Buenos Aires',
 'Bujumbura',
 'Busan',
 'Cairo',
 'Calgary',
 'Cape Town',
 'Caracas',
 'Casablanca',
 'Cayenne',
 'Chengdu',
 'Chennai',
 'Chicago',
 'Chisinau',
 'Chittagong',
 'Chongqing',
 'Christchurch',
 'Colombo',
 'Conakry',
 'Copenhagen',
 'Cotonou',
 'Dakar',
 'Dalian',
 'Dallas',
 'Damascus',
 'Dar es Salaam',
 'Davao',
 'Delhi',
 'Denver',


In [588]:
station_df = pd.DataFrame(sorted(list(station_set)), columns=['station'])

country_map = pd.read_csv(ROOT_DIR + 'iso_3166_country_mapping.csv')[['alpha-3', 'name', 'region', 'sub-region']]
city_map = pd.read_csv(ROOT_DIR + 'worldcities.csv')[['city_ascii', 'lat', 'lng', 'iso3', 'population']]
# station_df

# too many duplicates otherwise
city_map_small = city_map[(city_map['population'] > 60000) | (city_map['city_ascii'].isin(['Basse-Terre', 'Banjul', 'Monaco', 'Santa Cruz de la Sierra']))]

station_df = station_df.merge(city_map_small, left_on='station', right_on='city_ascii', how='left').drop(columns=['city_ascii'])
station_df = station_df.merge(country_map, left_on='iso3', right_on='alpha-3', how='left').drop(columns=['alpha-3'])

station_df = station_df.rename(columns={'iso3': 'country_code', 'name': 'country_territory', 'lng': 'long', 'sub-region': 'sub_region'})

print('Total stations:', station_df.shape[0])
print('Stations in urban areas with pop >5m:', station_df[station_df['population'] > 5000000].shape[0])
print('Stations in urban areas with pop >3m:', station_df[station_df['population'] > 3000000].shape[0])
print('Stations in urban areas with pop >2m:', station_df[station_df['population'] > 2000000].shape[0])
print('Stations in urban areas with pop >1m:', station_df[station_df['population'] > 1000000].shape[0])
print('Stations in urban areas with pop >500k:', station_df[station_df['population'] > 500000].shape[0])
print('Stations in urban areas with pop >100k:', station_df[station_df['population'] > 100000].shape[0])
print('Stations in urban areas with pop >50k:', station_df[station_df['population'] > 50000].shape[0])
print('Total population in urban area of stations:', station_df['population'].sum())

station_df



Total stations: 277
Stations in urban areas with pop >5m: 55
Stations in urban areas with pop >3m: 101
Stations in urban areas with pop >2m: 131
Stations in urban areas with pop >1m: 194
Stations in urban areas with pop >500k: 234
Stations in urban areas with pop >100k: 268
Stations in urban areas with pop >50k: 274
Total population in urban area of stations: 910208188.0


Unnamed: 0,station,lat,long,country_code,population,country_territory,region,sub_region
0,Abidjan,5.32,-4.04,CIV,3802000.0,Côte d'Ivoire,Africa,Sub-Saharan Africa
1,Abu Dhabi,24.4667,54.3666,ARE,603492.0,United Arab Emirates,Asia,Western Asia
2,Abuja,9.0833,7.5333,NGA,1576000.0,Nigeria,Africa,Sub-Saharan Africa
3,Accra,5.55,-0.2167,GHA,2121000.0,Ghana,Africa,Sub-Saharan Africa
4,Addis Ababa,9.0333,38.7,ETH,3100000.0,Ethiopia,Africa,Sub-Saharan Africa
5,Ahmadabad,23.0301,72.58,IND,5375000.0,India,Asia,Southern Asia
6,Alexandria,31.2,29.95,EGY,4165000.0,Egypt,Africa,Northern Africa
7,Algiers,36.7631,3.0506,DZA,3354000.0,Algeria,Africa,Northern Africa
8,Almaty,43.325,76.915,KAZ,1209000.0,Kazakhstan,Asia,Central Asia
9,Amman,31.95,35.9333,JOR,1060000.0,Jordan,Asia,Western Asia


In [603]:

def get_next_stations_from_line_dict(line_dict, station_dict, field):
    for line in line_dict:
        for i, station in enumerate(line_dict[line]):
            if '_' not in line: # main line
                station_dict[station][field] += [line_dict[line][i-1] + '_' + line] if i > 0 else [] # station before
                station_dict[station][field] += [line_dict[line][i+1] + '_' + line] if i < len(line_dict[line])-1 else [] # station after
            else: # branch line
                split_station, main_line, direction = line.split('_')
                station_dict[station]['lines'].add(main_line)
                if i == 0:
#                     print(line_dict[line])

#                     ml_station = line_dict[main_line]
#                     ml_split_index = ml_station.index(split_station)
                    
#                     previous_station = ml_station[ml_split_index-1] if direction == 'R' else ml_station[ml_split_index+1]
#                     print(previous_station, split_station)
#                     station_dict[previous_station][field] += [split_station + '_' + main_line]
#                     station_dict[split_station][field] += [previous_station + '_' + main_line]
                    station_dict[station][field] += [split_station + '_' + main_line]
                    station_dict[split_station][field] += [station + '_' + main_line]
                station_dict[station][field] += [line_dict[line][i-1] + '_' + main_line] if i > 0 else [] # station before
                station_dict[station][field] += [line_dict[line][i+1] + '_' + main_line] if i < len(line_dict[line])-1 else [] # station after
    return station_dict

def dict_to_df(row):
    row['lines'] = ','.join(station_dict[row['station']]['lines'])
    row['line_num'] = len(station_dict[row['station']]['lines'])
    row['transfer'] = True if row['line_num'] > 1 else False
    row['type'] = 'EXPRESS' if row['transfer'] or row['population'] > EXPRESS_POP_CUTOFF else 'LOCAL'
    return row


def dict_to_df_station_list(row):
    row['next_local_stations'] = ','.join(station_dict[row['station']]['next_local_stations'])
    row['next_express_stations'] = ','.join(station_dict[row['station']]['next_express_stations']) if station_dict[row['station']]['next_express_stations'] != [] else '' 
    return row



station_dict = {station: {'lines': set(), 'next_local_stations': [], 'next_express_stations': []} for station in station_set}


# add lines to station dict
for line in line_dict:
    for i, station in enumerate(line_dict[line]):
        if '_' not in line: # main line
            station_dict[station]['lines'].add(line)
        else: # branch line
            split_station, main_line, direction = line.split('_')
            station_dict[station]['lines'].add(main_line)

            
            
            
# add lines, line_num, transfer, and type (express/local) to dataframe
station_df = station_df.apply(dict_to_df, axis=1)



# generate a dict of express stations to get express stations
express_line_dict = {}
express_inters = {}
for line in line_dict:
    new_line_list = []
    temp_inters = []
    
    if '_' not in line: # main line
        previous_express = None
        for station in line_dict[line]:
            if station_df.set_index('station').at[station, 'type'] == 'EXPRESS':
                new_line_list.append(station)
                
                # intermediate stations
                if previous_express is not None: # end and start
                    temp_inters.append(station+'_'+line)
                    express_inters[(previous_express+'_'+line, station+'_'+line)] = temp_inters
                temp_inters = [station+'_'+line] # start
                previous_express = station
            else:
                temp_inters.append(station+'_'+line)

    else: # branch line
        split_station, main_line, direction = line.split('_')
        
        # split station is an express station = normal
        if station_df.set_index('station').at[split_station, 'type'] == 'EXPRESS':
            previous_express = split_station

        # need to backtrack and change name of line to nearest express station
        else: 
            split_station_i = line_dict[main_line].index(split_station)
            earlier_stations = line_dict[main_line][split_station_i:] if direction == 'L' else line_dict[main_line][0:split_station_i][::-1]
            for sstation in earlier_stations:
                if station_df.set_index('station').at[sstation, 'type'] == 'EXPRESS':
                    split_station = sstation
                    break # last express station found
        
        previous_express = split_station
        temp_inters = [previous_express+'_'+main_line]
        for station in line_dict[line]:
            if station_df.set_index('station').at[station, 'type'] == 'EXPRESS':
                new_line_list.append(station)

                # intermediate stations
                temp_inters.append(station+'_'+main_line)
                express_inters[(previous_express+'_'+main_line, station+'_'+main_line)] = temp_inters
                temp_inters = [station+'_'+main_line] # start
                previous_express = station
            else:
                temp_inters.append(station+'_'+main_line)
    
        line = split_station + '_' + main_line + '_' + direction            
    express_line_dict[line] = new_line_list


# get next local stations
get_next_stations_from_line_dict(line_dict, station_dict, 'next_local_stations')
get_next_stations_from_line_dict(express_line_dict, station_dict, 'next_express_stations')


station_df = station_df.apply(dict_to_df_station_list, axis=1)

print('Total transfer stations:', station_df[station_df['transfer']].shape[0])
print('Hub stations (3 lines):', station_df[station_df['line_num'] == 3].shape[0])
print('Superhub stations (4+ lines):', station_df[station_df['line_num'] >= 4].shape[0])
print('Express stations:', station_df[station_df['type'] == 'EXPRESS'].shape[0], 'out of', station_df.shape[0])

station_df



Total transfer stations: 106
Hub stations (3 lines): 13
Superhub stations (4+ lines): 12
Express stations: 161 out of 277


Unnamed: 0,station,lat,long,country_code,population,country_territory,region,sub_region,lines,line_num,transfer,type,next_local_stations,next_express_stations
0,Abidjan,5.32,-4.04,CIV,3802000.0,Côte d'Ivoire,Africa,Sub-Saharan Africa,BRONZE,1,False,EXPRESS,"Monrovia_BRONZE,Accra_BRONZE","Dakar_BRONZE,Accra_BRONZE"
1,Abu Dhabi,24.4667,54.3666,ARE,603492.0,United Arab Emirates,Asia,Western Asia,"LILAC,SILVER",2,True,EXPRESS,"Riyadh_SILVER,Dubai_SILVER,Dubai_LILAC,Riyadh_...","Riyadh_SILVER,Dubai_SILVER,Dubai_LILAC,Riyadh_..."
2,Abuja,9.0833,7.5333,NGA,1576000.0,Nigeria,Africa,Sub-Saharan Africa,RED,1,False,LOCAL,"Lagos_RED,Kano_RED",
3,Accra,5.55,-0.2167,GHA,2121000.0,Ghana,Africa,Sub-Saharan Africa,"NAVY,BRONZE",2,True,EXPRESS,"Abidjan_BRONZE,Lome_BRONZE,Ouagadougou_NAVY,La...","Abidjan_BRONZE,Lagos_BRONZE,Ouagadougou_NAVY,L..."
4,Addis Ababa,9.0333,38.7,ETH,3100000.0,Ethiopia,Africa,Sub-Saharan Africa,"BLUE,LIGHTGREEN",2,True,EXPRESS,"Mogadishu_LIGHTGREEN,Asmara_LIGHTGREEN,Nairobi...","Mogadishu_LIGHTGREEN,Khartoum_LIGHTGREEN,Nairo..."
5,Ahmadabad,23.0301,72.58,IND,5375000.0,India,Asia,Southern Asia,"LILAC,SILVER",2,True,EXPRESS,"Surat_SILVER,Mumbai_SILVER,Surat_LILAC,Karachi...","Surat_SILVER,Mumbai_SILVER,Surat_LILAC,Karachi..."
6,Alexandria,31.2,29.95,EGY,4165000.0,Egypt,Africa,Northern Africa,SILVER,1,False,EXPRESS,"Tripoli_SILVER,Cairo_SILVER","Tripoli_SILVER,Cairo_SILVER"
7,Algiers,36.7631,3.0506,DZA,3354000.0,Algeria,Africa,Northern Africa,SILVER,1,False,EXPRESS,"Casablanca_SILVER,Tunis_SILVER","Casablanca_SILVER,Tunis_SILVER"
8,Almaty,43.325,76.915,KAZ,1209000.0,Kazakhstan,Asia,Central Asia,AQUA,1,False,LOCAL,"Bishkek_AQUA,Urumqi_AQUA",
9,Amman,31.95,35.9333,JOR,1060000.0,Jordan,Asia,Western Asia,"SILVER,BRONZE",2,True,EXPRESS,"Beirut_BRONZE,Doha_BRONZE,Jerusalem_SILVER,Riy...","Beirut_BRONZE,Doha_BRONZE,Jerusalem_SILVER,Riy..."


In [590]:
# get distances + times between each pair of adjacent stations - edges of the graph!

def get_time_from_dist(dist_km): # in km
    '''
    Returns tuple (dist, time, max_speed, avg_speed) for time in seconds,
    max speed reached in km/h, and average speed for trip im km/h
    '''
    dist = dist_km*1000
    if dist > 2 * ACC_DISTANCE: # can fully accelerate to maximum speed
        max_speed_dist = dist - 2*ACC_DISTANCE
        time = 2*ACC_TIME + max_speed_dist/MAX_SPEED
        return int(dist_km), int(time), int(MAX_SPEED_KMH), int(dist_km/(time/3600))
    else: # distance too short, cannot accelerate fully
        acc_dist = dist/2
        acc_time = math.sqrt(2 * acc_dist/ACCELERATION) # from d = 1/2 at^2
        time = acc_time * 2
        return int(dist_km), int(time), int(acc_time * ACCELERATION * 3.6), int(dist_km/(time/3600))
    
    
station_durs = []

for i, row in station_df.iterrows():
    station_df_set = station_df.set_index('station')
    station_strip = row['station']
    all_station_lines = [station_strip + '_' + line for line in station_df_set.at[station_strip, 'lines'].split(',')]
    station_coord = (station_df_set.at[station_strip, 'lat'], station_df_set.at[station_strip, 'long'])
    next_local = row['next_local_stations'].split(',')
    next_express = row['next_express_stations'].split(',')
    for next_st in next_local:
        next_st_strip, next_line = next_st.split('_')
        next_coord = (station_df_set.at[next_st_strip, 'lat'], station_df_set.at[next_st_strip, 'long'])
        for station in all_station_lines:
            station_line = station.split('_')[1]
            if station_line == next_line: 
                station_durs.append((station+'_'+'LOCAL', next_st+'_'+'LOCAL', get_time_from_dist(distance.distance(station_coord, next_coord).km)))
    if next_express == ['']:
        continue
    for next_st in next_express:
        next_st_strip, next_line = next_st.split('_')
        for station in all_station_lines:
            station_line = station.split('_')[1]
            if station_line == next_line:
#                 print(station, next_st)
                next_inters = express_inters[(station, next_st)] if (station, next_st) in express_inters else express_inters[(next_st, station)]
#                 print(next_inters)
                total_exp_dist = 0
                for j in range(len(next_inters)-1): # add up distances through intermediate stations
                    station1, station2 = next_inters[j], next_inters[j+1]
                    station1_strip, station1_line = station1.split('_')
                    station2_strip, station2_line = station2.split('_')
                    station1_coord = (station_df_set.at[station1_strip, 'lat'], station_df_set.at[station1_strip, 'long'])
                    station2_coord = (station_df_set.at[station2_strip, 'lat'], station_df_set.at[station2_strip, 'long'])
                    total_exp_dist += distance.distance(station1_coord, station2_coord).km
#                 print(total_exp_dist)
#                 print()
                station_durs.append((station+'_'+'EXPRESS', next_st+'_'+'EXPRESS', get_time_from_dist(total_exp_dist)))


print(len(station_durs))
print('Longest streteches between stations')
sorted(station_durs, key=lambda x: x[2][0], reverse=True)[:20]

1404
Longest streteches between stations


[('Los Angeles_DARKGREEN_EXPRESS',
  'Tokyo_DARKGREEN_EXPRESS',
  (10318, 31066, 1200, 1195)),
 ('Tokyo_DARKGREEN_EXPRESS',
  'Los Angeles_DARKGREEN_EXPRESS',
  (10318, 31066, 1200, 1195)),
 ('Khabarovsk_ORANGE_EXPRESS',
  'Vancouver_ORANGE_EXPRESS',
  (6863, 20700, 1200, 1193)),
 ('Vancouver_ORANGE_EXPRESS',
  'Khabarovsk_ORANGE_EXPRESS',
  (6863, 20700, 1200, 1193)),
 ('Honolulu_DARKGREEN_LOCAL',
  'Tokyo_DARKGREEN_LOCAL',
  (6209, 18740, 1200, 1192)),
 ('Tokyo_DARKGREEN_LOCAL',
  'Honolulu_DARKGREEN_LOCAL',
  (6209, 18740, 1200, 1192)),
 ('Khabarovsk_RED_EXPRESS', 'Moscow_RED_EXPRESS', (6163, 18601, 1200, 1192)),
 ('Moscow_RED_EXPRESS', 'Khabarovsk_RED_EXPRESS', (6163, 18601, 1200, 1192)),
 ('Beijing_RED_EXPRESS', 'Moscow_RED_EXPRESS', (6022, 18178, 1200, 1192)),
 ('Moscow_RED_EXPRESS', 'Beijing_RED_EXPRESS', (6022, 18178, 1200, 1192)),
 ('Lisbon_DARKGREEN_LOCAL',
  'New York City_DARKGREEN_LOCAL',
  (5429, 16398, 1200, 1191)),
 ('Lisbon_DARKGREEN_EXPRESS',
  'New York City_DARKGREE

In [591]:
# create the graph!!

def create_graph(transfer_time):
    G = nx.DiGraph()

    travel_edges = [(station1+'_DEP', station2+'_ARR', stats[1]) for station1, station2, stats in station_durs]
    G.add_weighted_edges_from(travel_edges)

    # time at station
    detail_station_set = set([b for a in [(station1, station2) for station1, station2, stats in station_durs] for b in a])
    G.add_weighted_edges_from([(station+'_ARR', station+'_DEP', TIME_AT_STATION) for station in detail_station_set])

    for i, row in station_df.iterrows():
        station = row['station']
        lines = row['lines'].split(',')
        if row['line_num'] > 1:
            all_line_pairs = list(itertools.permutations(lines, 2))
            # transfer between lines
            if row['type'] == 'EXPRESS':
                for line1, line2 in all_line_pairs:
                    G.add_weighted_edges_from([(station+'_'+line1+'_LOCAL_ARR', station+'_'+line2+'_LOCAL_DEP', transfer_time)])
                    G.add_weighted_edges_from([(station+'_'+line1+'_EXPRESS_ARR', station+'_'+line2+'_EXPRESS_DEP', transfer_time)])
                    G.add_weighted_edges_from([(station+'_'+line1+'_LOCAL_ARR', station+'_'+line2+'_EXPRESS_DEP', transfer_time)])
                    G.add_weighted_edges_from([(station+'_'+line1+'_EXPRESS_ARR', station+'_'+line2+'_LOCAL_DEP', transfer_time)])
            else:
                for line1, line2 in all_line_pairs:
                    G.add_weighted_edges_from([(station+'_'+line1+'_LOCAL_ARR', station+'_'+line2+'_LOCAL_DEP', transfer_time)])
        # express-local transfer
        if row['type'] == 'EXPRESS':
            for line in lines:
                G.add_weighted_edges_from([(station+'_'+line+'_LOCAL_ARR', station+'_'+line+'_EXPRESS_DEP', transfer_time)])
                G.add_weighted_edges_from([(station+'_'+line+'_EXPRESS_ARR', station+'_'+line+'_LOCAL_DEP', transfer_time)])

    print('Num nodes', G.number_of_nodes())
    print('Num edges', G.number_of_edges())
    return G

G_speed = create_graph(TRANSFER_TIME_1)
G_transfer = create_graph(TRANSFER_TIME_2)

nx.write_adjlist(G_speed, )


Num nodes 1448
Num edges 4272
Num nodes 1448
Num edges 4272


In [593]:
def s_to_hm(s):
    return str(s//3600) + 'h ' + str((s-3600*(s//3600))//60) + 'm' if s//3600 > 0 else str(s//60) + 'min'



def run_analysis(G, origin_st, dest_st, transfer_time):
    station_durs_dict = {(station1+'_DEP', station2+'_ARR'): {'distance': stats[0], 'duration': stats[1], 'avg_speed': stats[3]} for station1, station2, stats in station_durs}

    detailed_st_per_st = {}
    for detailed_st in detail_station_set:
        station, line, exploc = detailed_st.split('_')
        detailed_st_per_st[station] = detailed_st_per_st[station] + [detailed_st] if station in detailed_st_per_st else [detailed_st]

    origin_set = set([st + '_DEP' for st in detailed_st_per_st[origin_st]])
    dest_set = set([st + '_ARR' for st in detailed_st_per_st[dest_st]])

    best_segments = None
    best_dur = float('inf')
    best_transfers = None

    for dest_st in dest_set:
        dur, path = nx.multi_source_dijkstra(G, origin_set, dest_st)
        if dur < best_dur:
            segments, line_list, transfers = {}, [path[0].split('_')[1]], []
            beg_station = origin_st
            cum_duration = 0
            cum_distance = 0
            stops = []
            for i in range(len(path)-1):
                station1, line1, type1, deparr1 = path[i].split('_')
                station2, line2, type2, deparr2 = path[i+1].split('_')
                if deparr1 == 'DEP' and deparr2 == 'ARR': # travel dist
                    cum_duration += station_durs_dict[(path[i], path[i+1])]['duration']
                    cum_distance += station_durs_dict[(path[i], path[i+1])]['distance']
                    stops.append((station2, station_durs_dict[(path[i], path[i+1])]['duration'], station_durs_dict[(path[i], path[i+1])]['distance']))
                elif deparr1 == 'ARR' and deparr2 == 'DEP':
                    if line1 != line2 or type1 != type2:
                        end_station = station1
                        segments[(beg_station, end_station)] = {'type': 'travel', 'stops': stops, 'line': line1 + ' ' + type1, 'distance': cum_distance, 'duration': cum_duration}
                        beg_station = end_station
                        cum_distance, cum_duration, stops = 0, 0, []
                        transfers.append(station1)
                        if line1 != line2: # transfer between two lines
                            line_list.append(line2)
                            segments[(station1, station2)] = {'type': 'reg_transfer', 'lines': (line1 + ' ' + type1, line2 + ' ' + type2), 'distance': 0, 'duration': transfer_time}
                        elif type1 != type2: # local-exp transfer
                            segments[(station1, station2)] = {'type': 'le_transfer', 'lines': (line1 + ' ' + type1, line2 + ' ' + type2), 'distance': 0, 'duration': transfer_time}
                    else:
                        cum_duration += TIME_AT_STATION
                else:
                    print('whats going on')

            # add the last leg
            end_station = station2
            segments[(beg_station, end_station)] = {'type': 'travel', 'stops': stops, 'line': line1 + ' ' + type1, 'distance': cum_distance, 'duration': cum_duration}

            best_segments = segments
            best_dur = dur
            best_transfers = transfers


    counter = 0
    for segment in best_segments:
        station1, station2 = segment
        counter += 1
        if best_segments[segment]['type'] == 'travel':
            stops = best_segments[segment]['stops']
            print(str(counter), 'Take the', best_segments[segment]['line'], 'line from', station1, 'to', station2, '(' + str(best_segments[segment]['distance']) + 'km' + ' '  + s_to_hm(best_segments[segment]['duration']) + ')')
            for stop, dur, dist in stops:
                print('    ', stop, str(dist)+'km', s_to_hm(dur))
        else:
            print(str(counter), 'Transfer at', station1, 'from the', best_segments[segment]['lines'][0], 'line to the', best_segments[segment]['lines'][1], 'line')
    print()  
    print('Transfers:', len(transfers))
    print('Total time:', s_to_hm(best_dur - (transfer_time - TRANSFER_TIME_1)*len(transfers)))





In [602]:

origin_st = 'New York City'
dest_st = 'Nairobi'

origin_coords = (station_df.set_index('station').at[origin_st, 'lat'], station_df.set_index('station').at[origin_st, 'long'])
dest_coords = (station_df.set_index('station').at[dest_st, 'lat'], station_df.set_index('station').at[dest_st, 'long'])


print(origin_st, 'to', dest_st)
print('Straight line distance:', int(distance.distance(origin_coords, dest_coords).km), 'km')
print('By plane at 800kmh:', s_to_hm(int(3600*distance.distance(origin_coords, dest_coords).km/800)))
print()
print('Transfer time:', TRANSFER_TIME_1_M, 'minutes')
print('Time at station:', TIME_AT_STATION_M, 'minutes')
print('Train speed:', MAX_SPEED_KMH, 'kmh')

print('\n\n')
print('Prioritize speed')
run_analysis(G_speed, origin_st, dest_st, TRANSFER_TIME_1)

print('\n\n')
print('Prioritize less transfers')
run_analysis(G_transfer, origin_st, dest_st, TRANSFER_TIME_2)




New York City to Nairobi
Straight line distance: 11843 km
By plane at 800kmh: 14h 48m

Transfer time: 30 minutes
Time at station: 5 minutes
Train speed: 1200 kmh



Prioritize speed
1 Take the DARKGREEN LOCAL line from New York City to Cairo (9815km 8h 53m)
     Lisbon 5429km 4h 33m
     Madrid 504km 27min
     Barcelona 506km 27min
     Milan 727km 38min
     Rome 476km 25min
     Athens 1054km 54min
     Cairo 1119km 57min
2 Transfer at Cairo from the DARKGREEN LOCAL line to the LIGHTGREEN EXPRESS line
3 Take the LIGHTGREEN EXPRESS line from Cairo to Addis Ababa (2990km 2h 38m)
     Khartoum 1606km 1h 22m
     Addis Ababa 1384km 1h 11m
4 Transfer at Addis Ababa from the LIGHTGREEN EXPRESS line to the BLUE LOCAL line
5 Take the BLUE LOCAL line from Addis Ababa to Nairobi (1159km 59min)
     Nairobi 1159km 59min

Transfers: 2
Total time: 13h 31m



Prioritize less transfers
1 Take the BRONZE EXPRESS line from New York City to Nairobi (16247km 14h 42m)
     Boston 297km 16min
     Londo

In [595]:
by_region = station_df[['region', 'station']].groupby('region').count().reset_index().sort_values('station', ascending=False)
by_region


Unnamed: 0,region,station
2,Asia,93
1,Americas,62
0,Africa,61
3,Europe,51
4,Oceania,10


In [596]:
by_subregion = station_df[['region', 'sub_region', 'station']].groupby(['region', 'sub_region']).count().reset_index().sort_values('station', ascending=False)
by_subregion


Unnamed: 0,region,sub_region,station
1,Africa,Sub-Saharan Africa,53
2,Americas,Latin America and the Caribbean,40
5,Asia,Eastern Asia,28
7,Asia,Southern Asia,23
3,Americas,Northern America,22
8,Asia,Western Asia,19
9,Europe,Eastern Europe,19
6,Asia,South-eastern Asia,18
11,Europe,Southern Europe,12
10,Europe,Northern Europe,11


In [597]:
by_country = station_df[['country_territory', 'station']].groupby('country_territory').count().reset_index().sort_values('station', ascending=False)
print('Total countries/territories:', by_country.shape[0])
by_country[by_country['station'] > 1]



Total countries/territories: 178


Unnamed: 0,country_territory,station
32,China,17
169,United States of America,16
73,India,11
134,Russian Federation,9
21,Brazil,6
28,Canada,6
74,Indonesia,4
6,Australia,4
81,Japan,4
123,Pakistan,3


In [598]:

    
get_time_from_dist(1)


(1, 36, 197, 98)

In [599]:
station_dict

{'Denver': {'lines': {'DARKGREEN'},
  'next_local_stations': ['Las Vegas_DARKGREEN', 'Chicago_DARKGREEN'],
  'next_express_stations': ['Las Vegas_DARKGREEN', 'Chicago_DARKGREEN']},
 'Skopje': {'lines': {'AQUA'},
  'next_local_stations': ['Tirana_AQUA', 'Istanbul_AQUA'],
  'next_express_stations': []},
 'Berlin': {'lines': {'DARKGREEN', 'LILAC'},
  'next_local_stations': ['Prague_LILAC',
   'Copenhagen_LILAC',
   'Amsterdam_DARKGREEN',
   'Warsaw_DARKGREEN'],
  'next_express_stations': ['Prague_LILAC',
   'Copenhagen_LILAC',
   'Amsterdam_DARKGREEN',
   'Kyiv_DARKGREEN']},
 'Panama City': {'lines': {'ORANGE'},
  'next_local_stations': ['Medellin_ORANGE', 'San Jose_ORANGE'],
  'next_express_stations': []},
 'Lisbon': {'lines': {'DARKGREEN'},
  'next_local_stations': ['New York City_DARKGREEN', 'Madrid_DARKGREEN'],
  'next_express_stations': ['New York City_DARKGREEN', 'Madrid_DARKGREEN']},
 'Harbin': {'lines': {'ORANGE'},
  'next_local_stations': ['Khabarovsk_ORANGE', 'Shenyang_ORANGE'],

In [600]:
line_dict

{'ORANGE': ['Rio de Janeiro',
  'Sao Paulo',
  'Montevideo',
  'Buenos Aires',
  'Santiago',
  'La Paz',
  'Lima',
  'Quito',
  'Bogota',
  'Medellin',
  'Panama City',
  'San Jose',
  'Managua',
  'Tegucigalpa',
  'San Salvador',
  'Guatemala City',
  'Mexico City',
  'Guadalajara',
  'Los Angeles',
  'San Francisco',
  'Seattle',
  'Vancouver',
  'Anchorage',
  'Khabarovsk',
  'Harbin',
  'Shenyang',
  'Beijing',
  'Wuhan',
  'Guangzhou',
  'Shenzhen',
  'Hong Kong',
  'Manila',
  'Davao',
  'Dili',
  'Port Moresby',
  'Brisbane',
  'Sydney',
  'Melbourne'],
 'Los Angeles_ORANGE_L': ['Phoenix', 'Dallas', 'Houston', 'Miami', 'Nassau'],
 'Seattle_ORANGE_L': ['Chicago', 'Washington D.C.', 'New York City', 'Boston'],
 'Port Moresby_ORANGE_R': ['Honiara', 'Noumea', 'Suva'],
 'RED': ['Caracas',
  'Bogota',
  'Quito',
  'Lima',
  'La Paz',
  'Santa Cruz de la Sierra',
  'Asuncion',
  'Sao Paulo',
  'Rio de Janeiro',
  'Salvador',
  'Fortaleza',
  'Lagos',
  'Abuja',
  'Kano',
  'Tunis',
  '