# Around the World in 80 days 
Le problematiche che si incontrano nello svolgere il compito assegnato si posso dividere in due parti principali:
## La creazione di un grafo seguendo le regole assegnate
## L'implenentazione dell' algoritmo di Dijkstra per la ricerca del cammino di peso minimo 




In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt 
import folium 
import math
from sklearn.metrics.pairwise import euclidean_distances
from typing import List
from collections import defaultdict 
from tqdm.notebook import tqdm

import networkx as nx
import haversine as hs

In [2]:
#Read the dataset 

fields = ["city","lat","lng","country","population"]
df = pd.read_excel('worldcities.xlsx', usecols=fields,engine="openpyxl")

number_city = len(df)
C = defaultdict(lambda: [])


la struttura dati del grafo sarà un dizionario con chiave l'index di ogni citta e come elementi del dizionario una tupla formata dall'index della città che si puo ragginungere e la durata del traggitto. 

Le regole per la crazione del grafo sono:

1- ci si puo spostare solo verso est

2- le citta che si possono raggiungere sono solo le 3 piu vicine alla citta di partenza con la segente durata:

    - 2 ore per la piu vicina 
    - 4 per la seconda 
    - 8 per la terza
3- se si cambia nazione si impiega 2 ore in piu 

4- se si arriva in una citta con piu 200000 abitanti ci si impiega 2 ore in piu



In [3]:
# Create two new variables:  "nlat" takes value 0 at the North Pole and 180 at the South Pole
# and "nlong" takes value 0 in Greenwich and 360 after the World tour 


df["nlat"] = 90 - df["lat"]
def fun_long(x):
    if x<0:
        x = 360 + x
    else:
        pass
    return x
df["nlong"] = [fun_long(x) for x in df["lng"]]

#Build an empty grid using the 'grid' dictionary. 
# 
#Each cell can be accessed via two keys, relating to a measure of latitude and longitude. 
#Each city in the dataset belongs to only one cell.
#Each cell have the some lenght in latitud and longitud, it is a square cell. 


df["grid_lat"] = [int(x//0.5) for x in df.nlat.values]
df["grid_lng"] = [int(x//1) for x in df.nlong.values]

# 'grid' dictionary contains the lists of cities that belong to the same cell

grid= {}


for i, grid_cord in enumerate(df[['grid_lat','grid_lng']].values):
    try:
        grid[grid_cord[0], grid_cord[1]].append(i)
    except KeyError:
        grid[grid_cord[0], grid_cord[1]]=[i]
        

max_grid_lng=df["grid_lng"].max()

In [4]:
##given a city return a list index with city in the some cell

def get_cities(city):
    list_of_cities=grid[df["grid_lat"].iloc[city],df["grid_lng"].iloc[city]]
    return list_of_cities
    

## return a list of index of city more est:
def more_est(indice_citta, inicici_citta_esaminare):
    listaona=[]
    citta=df["nlong"].iloc[indice_citta]
    for indice in inicici_citta_esaminare:
        if df["nlong"].iloc[indice]>citta:
            listaona.append(indice) 
    return listaona

## returns to the cities further east by the meridians near Greenwich 
def more_est_green(indice_citta, inicici_citta_esaminare):
    listaona=[]
    citta=df["nlong"].iloc[indice_citta]
    
    for indice in inicici_citta_esaminare:
        if df["nlong"].iloc[indice]>180 and df["nlong"].iloc[indice] > citta:
            listaona.append(indice)
        elif df["nlong"].iloc[indice] < 180:
            listaona.append(indice)
    return listaona




## expand search cells. From a city index returns the cities in neighboring cells with a set width 

def get_more_cities2(city,ampiezza):
    lat=df["grid_lat"].iloc[city]
    long=df["grid_lng"].iloc[city]
    ind_lat= range(lat-ampiezza, lat+ampiezza+1,)
    ind_long = range(long, long+ampiezza+1)
    list_of_cities=[]
    for i in range(len(ind_lat)):
        for j in range(len(ind_long)):
            try:
                list_of_cities.append(grid[ind_lat[i],ind_long[j]])
            except KeyError:
                pass
                
    flattened_list = [y for x in list_of_cities for y in x]
    return flattened_list


## For cities near Greenwich get a list of city index that are in the cell or close cell with a factor "ampiezza"
# city: index of a city in dataframe
# ampiezza:int how many cell shouold be included in the search 
def get_green(city, ampiezza):
    lat=df["grid_lat"].iloc[city]
    long=df["grid_lng"].iloc[city]
    ind_lat= range(lat-ampiezza, lat+ampiezza+1,)
    ind_long = [x for x in range(long, long+ampiezza+1)]
    x = [x for x in range(ampiezza)]
    ind_long.extend(x)
    list_of_cities=[]
    for i in range(len(ind_lat)):
        for j in range(len(ind_long)):
            try:
                list_of_cities.append(grid[ind_lat[i],ind_long[j]])
            except KeyError:
                pass
                
    flattened_list = [y for x in list_of_cities for y in x]
    return flattened_list
    




In [5]:
C={}

# For each city in the dataframe we measure the distance from Haverstine to the 20 closest cities.
# To find the nearest cities we start from the cell to which the city belongs. If the cities in the frame are less than 20, 
# the neighboring cells are also considered 
# If the index city is less than 4 cells from (espresso dal if df["grid_lng"].iloc[city] > max_grid_lng-4 )  
# the meridian of Greenwich we also include in the search the cities that are found 
# after the meridian 0 at the same latitude. Eventually the earth is round!!

for city in tqdm(range(len(df))):
    vettore_query=(df["lat"].iloc[city],df["lng"].iloc[city])
    amp=0
    citta_est=[]
    citta_cella  = get_more_cities2(city,amp)
    citta_est = more_est(city,citta_cella)
    
    if df["grid_lng"].iloc[city] > max_grid_lng-4:
        citta_cella = get_green(city,amp)
        citta_est = more_est_green(city,citta_cella)
        while len(citta_est)<20:
            citta_cella = get_green(city,amp)
            citta_est = more_est_green(city,citta_cella)
            amp = amp + 1
            
    else:
        while len(citta_est)<20:
            amp = amp + 1
            citta_cella  = get_more_cities2(city,amp)
            citta_est = more_est(city,citta_cella)
            
            
        
    # citta_est is a list of at least 20 cities that are located east and close to the city in question
    dati=df.iloc[citta_est]
     
    lista_dist=[]
    #For each city in eastern city we calculate the Haversine distance 
    for j in range(len(dati)):
            vettore_target=(dati["lat"].iloc[j],dati["lng"].iloc[j])
            #distanza=euclidean_distances(vettore_query.reshape(1, -1), vettore_target.reshape(1, -1))
            distanza=hs.haversine(vettore_query, vettore_target)
            lista_dist.append(distanza)
        
    
    distances=np.array( lista_dist)
    
    ind = np.argsort(distances)[0:3]
    ind = ind.tolist()    
    index =dati.index[ind]
    
    # We build the dictionary with key the city in question and with values the index tuples destination and duration
    for it,j in enumerate(index):
        time= pow(2,(it+1))
        if df['population'].iloc[j] > 200000:
            time +=2
        elif df['country'].iloc[j] != df['country'].iloc[city]:
            time += 2
        try:    
            C[city].append((j,time))
        except KeyError:
            C[city]= [(j,time)]
        

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=26569.0), HTML(value='')))




In [6]:
disegno={}

for city in range(len(df)):
    try:
        disegno[city].append((df["lng"].iloc[city] , df["lat"].iloc[city]))
    except KeyError:
        disegno[city]=(df["lng"].iloc[city] , df["lat"].iloc[city])
        

In [7]:
# creation of the graph with Networkx library from the dictionary C 
G = nx.DiGraph()
for city in C:
    G.add_node(city)
    
    
for city in C:
    for near_city in C[city]:
        G.add_edge(city,near_city[0],weight = near_city[1])
       

In [8]:
# With the assigned rules there is no path in the one-way graph created:
# this shows that there is no way to go around the World from London to London with bound movement towards the East 
nx.has_path(G,6681,34)


False

In [23]:
# However, there is a more reasonable and linear route, without latitude jumps, to go around the world from Moscow to Moscow
print(df.iloc[14])
print("Queste sono le citta che si posso raggiungere da Mosca, quindi piu ad est ", C[14])

my_path=nx.shortest_path(G,4026,14, weight="weight")

city              Moscow
lat              55.7558
lng              37.6178
country           Russia
population    1.7125e+07
nlat             34.2442
nlong            37.6178
grid_lat              68
grid_lng              37
Name: 14, dtype: object
These are the cities that can be reached from Moscow, therefore further east [(4026, 2), (2681, 4), (2649, 10)]


In [31]:
ore = nx.path_weight(G,my_path, "weight")
giorni=ore//24
ore_resto= ore%24+2

print ("Per fare il giro del mondo e tornare a Mosca ci si impiega Giorni {} e {} ore".format(giorni, ore_resto))

To travel around the world and return to Moscow it takes 24 and 8 hours


In [24]:
# Build the map with the cities of the tour pointed at flags 

#import requests



resolution, width, height = 75, 12, 9

map = folium.Map(location=[51,0],zoom_start=1)


for i in my_path[1:-2]:
    folium.Marker([df['lat'].iloc[i],df['lng'].iloc[i]], popup=df['city'].iloc[i],icon=folium.Icon(color='blue',icon='flag')).add_to(map)
map