# Il giro del mondo in 80 giorni

Questo progetto utilizza un dataset contenente le principali città del mondo, con annessa locazione geografica e altre informazioni, per calcolare il tempo minimo che ci si metterebbe a viaggiare tra due città, il percorso migliore, il percorso più turistico e altre funzionalità.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from geopy import distance as geopy
from dijkstar import Graph, find_path
import csv
import time

In [2]:
data_file = './data/worldcities_ascii.csv'
with open(data_file, 'r') as f:
    reader = csv.reader(f)
    all_lines = []
    for row in reader:
        all_lines.append(row)

## Preprocessing

Preprocesso i dati pulendo le virgole in eccesso, corregendo qualche nome e aggiungendo i campi che mi servono.

>TODO: Aggiungere dei campi tipo cose da visitare, prezzo medio ecc...

In [3]:
newLines = []
for line in all_lines:
    newLine = []
    for field in line:
        field = field.replace("Korea, South", "South Korea")
        field = field.replace("Korea, North", "North Korea")
        field = field.replace("Gambia, The", "The Gambia")
        field = field.replace("Micronesia, Federated States Of", "Federated States Of Micronesia")
        field = field.replace("Bahamas, The", "The Bahamas")
        field = field.replace("Saint Helena, Ascension, And Tristan Da Cunha", "Saint Helena, Ascension and Tristan da Cunha")
        field = field.replace("Islamorada, Village of Islands", "Village of Islands Islamorada")
        field = field.replace("\n", "")
        newLine.append(field)
    newLines.append(newLine)

#Lo esporto in caso mi servisse in altri progetti
df = pd.DataFrame(newLines[1:], columns=newLines[0])
df["population"].replace({"": 0}, inplace= True)
df.to_csv("./data/worldcities_preprocessed.csv", index=False)

In [4]:
#Siccome sono troppe per lavorarci inizio a prenderne solo 2000 a caso
lineIndexes = np.random.choice(range(1, len(newLines)), size=1000, replace=False)
df = pd.DataFrame([newLines[i] for i in lineIndexes], columns=newLines[0])
df["population"].replace({"": 0}, inplace= True)
df

Unnamed: 0,city,lat,lng,country,iso3,population,id
0,Cuito,-12.38,16.94,Angola,AGO,114286,1024939858
1,Guiseley,53.875,-1.706,United Kingdom,GBR,22347,1826210408
2,San Francisco,13.7,-88.1,El Salvador,SLV,16152,1222399599
3,Vihari,30.0419,72.3528,Pakistan,PAK,128034,1586888846
4,Zhashkiv,49.25,30.1,Ukraine,UKR,13853,1804691013
...,...,...,...,...,...,...,...
995,Bedford,40.0456,-78.4998,United States,USA,5220,1840001396
996,Salto,-31.3883,-57.9606,Uruguay,URY,124878,1858575950
997,Lakewood,47.1628,-122.5299,United States,USA,61037,1840019860
998,Kitagata,35.4369,136.6861,Japan,JPN,18271,1392622247


In [5]:
class CityNode:
    
    def __init__(self, cityID: int, cityName: str, lat: float, lng: float, population: int, countryISO3: str):
        self.cityID = cityID
        self.cityName = cityName
        self.lat = lat
        self.lng = lng
        self.coordinates = np.array([lat, lng])
        self.population = population
        self.countryISO3 = countryISO3
        self.neighbour1 = None
        self.neighbour2 = None
        self.neighbour3 = None
        
    
    def updateNeighbour(self, cityIndex: int, position: int):
        if position == 1:
            self.neighbour1 = cityIndex
        elif position == 2:
            self.neighbour2 = cityIndex
        elif position == 3:
            self.neighbour3 = cityIndex
            
    def __repr__(self):
        return f"{self.cityName} at {self.lat}, {self.lng}"

In [6]:
nodes = [CityNode(int(c.id), c.city, float(c.lat), float(c.lng), int(c.population), c.iso3) for _, c in df.iterrows()]
edges = []

In [7]:
def updateMin(min1, min2, min3, dist: float, index: int):
    if (min1 is None) or (min2 is None) or (min3 is None):
        min1 = {"index": index, "dist": dist}
        min2 = {"index": index, "dist": dist}
        min3 = {"index": index, "dist": dist}
    elif dist <= min1["dist"]:
        min3 = min2
        min2 = min1
        min1 = {"index": index, "dist": dist}
    elif dist <= min2["dist"]:
        min3 = min2
        min2 = {"index": index, "dist": dist}
    elif dist <= min3["dist"]:
        min3 = {"index": index, "dist": dist}
    return (min1, min2, min3)

def calculateTime(start: CityNode, end: CityNode, position: int):
    if position == 1:
        time = 2
    elif position == 2:
        time = 4
    elif position == 3:
        time = 8
    if start.countryISO3 != end.countryISO3:
        time += 2
    if end.population > 200000:
        time += 2
    return time

In [8]:
graph = Graph()
start = time.time()
for i in range(len(nodes)):
    min1 = None
    min2 = None
    min3 = None
    for j in range(len(nodes)):
        if i == j:
            continue
        dist = geopy.distance(nodes[i].coordinates, nodes[j].coordinates).km
        min1, min2, min3 = updateMin(min1, min2, min3, dist, j)
    nodes[i].updateNeighbour(min1["index"], 0)
    nodes[i].updateNeighbour(min2["index"], 1)
    nodes[i].updateNeighbour(min3["index"], 2)
    graph.add_edge(i, j, calculateTime(nodes[i], nodes[min1["index"]], 1))
    graph.add_edge(i, j, calculateTime(nodes[i], nodes[min2["index"]], 2))
    graph.add_edge(i, j, calculateTime(nodes[i], nodes[min3["index"]], 3))
    if (i+1) % 100 == 0:
        print(f"Execution: {i+1}\t\tTime elapsed (s): {time.time() - start}")

Execution: 0		Time elapsed (s): 0.23922157287597656
Execution: 100		Time elapsed (s): 18.349867343902588
Execution: 200		Time elapsed (s): 35.924164056777954
Execution: 300		Time elapsed (s): 53.671019554138184
Execution: 400		Time elapsed (s): 71.62119483947754
Execution: 500		Time elapsed (s): 89.67220067977905
Execution: 600		Time elapsed (s): 108.09967160224915
Execution: 700		Time elapsed (s): 126.79116749763489
Execution: 800		Time elapsed (s): 145.30104994773865
Execution: 900		Time elapsed (s): 163.96000742912292


NoPathError: Could not find a path from 5 to 34

In [20]:
for i in range(len(nodes)):
    for j in range(len(nodes)):
        try:
            if len(find_path(graph, i, j).edges) > 1:
                print("CIAO")
        except:
            pass