# Il giro del mondo in 80 giorni

Questo progetto utilizza un dataset contenente le principali città del mondo, con annessa locazione geografica e altre informazioni, per calcolare il tempo minimo che ci si metterebbe a viaggiare tra due città, il percorso migliore, il percorso più turistico e altre funzionalità.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from geopy import distance as geopy
import csv

In [2]:
data_file = './data/worldcities_ascii.csv'
with open(data_file, 'r') as f:
    reader = csv.reader(f)
    all_lines = []
    for row in reader:
        all_lines.append(row)

## Preprocessing

Preprocesso i dati pulendo le virgole in eccesso, corregendo qualche nome e aggiungendo i campi che mi servono.

>TODO: Aggiungere dei campi tipo cose da visitare, prezzo medio ecc...

In [3]:
newLines = []
for line in all_lines:
    newLine = []
    for field in line:
        field = field.replace("Korea, South", "South Korea")
        field = field.replace("Korea, North", "North Korea")
        field = field.replace("Gambia, The", "The Gambia")
        field = field.replace("Micronesia, Federated States Of", "Federated States Of Micronesia")
        field = field.replace("Bahamas, The", "The Bahamas")
        field = field.replace("Saint Helena, Ascension, And Tristan Da Cunha", "Saint Helena, Ascension and Tristan da Cunha")
        field = field.replace("Islamorada, Village of Islands", "Village of Islands Islamorada")
        field = field.replace("\n", "")
        newLine.append(field)
    newLines.append(newLine)

#Lo esporto in caso mi servisse in altri progetti
pd.DataFrame(newLines[1:], columns=newLines[0]).to_csv("./data/worldcities_preprocessed.csv", index=False)

In [4]:
#Siccome sono troppe per lavorarci inizio a prenderne solo 1000 a caso
lineIndexes = np.random.choice(range(1, len(newLines)), size=1000, replace=False)
df = pd.DataFrame([newLines[i] for i in lineIndexes], columns=newLines[0])
df

Unnamed: 0,city,lat,lng,country,iso3,population,id
0,Bajina Basta,43.9731,19.5597,Serbia,SRB,26022,1688643521
1,Montclair,34.0714,-117.698,United States,USA,40083,1840020408
2,Chaparral,32.0442,-106.4061,United States,USA,14793,1840018050
3,Warren,41.8433,-79.1445,United States,USA,14370,1840003334
4,Mahtomedi,45.0619,-92.966,United States,USA,8294,1840008913
...,...,...,...,...,...,...,...
995,Kota Kinabalu,5.975,116.0725,Malaysia,MYS,452058,1458441859
996,Visby,57.629,18.3071,Sweden,SWE,24693,1752027172
997,Neves,0.3586,6.5525,Sao Tome And Principe,STP,,1678563060
998,Villa O'Higgins,-48.4679,-72.56,Chile,CHL,250,1152709181


In [5]:
class CityNode:
    
    def __init__(self, cityID: int, cityName: str, lat: float, lng: float, population: int, countryISO3: str):
        self.cityID = cityID
        self.cityName = cityName
        self.lat = lat
        self.lng = lng
        self.coordinates = np.array([lat, lng])
        self.population = population
        self.countryISO3 = countryISO3
        self.neighbour0 = None
        self.neighbour1 = None
        self.neighbour2 = None
    
    def insertNeighbour(self, city, position: int):
        if position == 0:
            self.neighbour0 = city
        elif position == 1:
            self.neighbour1 = city
        elif position == 2:
            self.neighbour2 = city
            
    def __repr__(self):
        return f"{self.cityName} at {self.lat}, {self.lng}"

In [6]:
graph = [CityNode(c.id, c.city, c.lat, c.lng, c.population, c.iso3) for _, c in df.iterrows()]

In [7]:
adjacencyMatrix = np.zeros([len(graph), len(graph)])
for i in range(len(graph)):
    for j in range(1+i, len(graph)):
        adjacencyMatrix[i][j] = geopy.distance(graph[i].coordinates, graph[j].coordinates).km
adjacencyMatrix

array([[    0.        , 10342.06683601,  9964.94872456, ...,
         5000.58715433, 13603.71740335,  8078.60177766],
       [    0.        ,     0.        ,  1077.65492696, ...,
        13083.43600004, 10182.07318431,  2993.76773924],
       [    0.        ,     0.        ,     0.        , ...,
        12142.46205186,  9532.62463116,  2116.99184325],
       ...,
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,  9236.71858901, 10089.96994997],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,  9776.11723272],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ]])