# Il giro del mondo in 80 giorni

Questo progetto utilizza un dataset contenente le principali città del mondo, con annessa locazione geografica e altre informazioni, per calcolare il tempo minimo che ci si metterebbe a viaggiare tra due città, il percorso migliore, il percorso più turistico e altre funzionalità.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from geopy import distance as geopy
import csv

In [2]:
data_file = './data/worldcities_ascii.csv'
with open(data_file, 'r') as f:
    reader = csv.reader(f)
    all_lines = []
    for row in reader:
        all_lines.append(row)

## Preprocessing

Preprocesso i dati pulendo le virgole in eccesso, corregendo qualche nome e aggiungendo i campi che mi servono.

>TODO: Aggiungere dei campi tipo cose da visitare, prezzo medio ecc...

In [3]:
newLines = []
for line in all_lines:
    newLine = []
    for field in line:
        field = field.replace("Korea, South", "South Korea")
        field = field.replace("Korea, North", "North Korea")
        field = field.replace("Gambia, The", "The Gambia")
        field = field.replace("Micronesia, Federated States Of", "Federated States Of Micronesia")
        field = field.replace("Bahamas, The", "The Bahamas")
        field = field.replace("Saint Helena, Ascension, And Tristan Da Cunha", "Saint Helena, Ascension and Tristan da Cunha")
        field = field.replace("Islamorada, Village of Islands", "Village of Islands Islamorada")
        field = field.replace("\n", "")
        newLine.append(field)
    newLines.append(newLine)

#Lo esporto in caso mi servisse in altri progetti
pd.DataFrame(newLines[1:], columns=newLines[0]).to_csv("./data/worldcities_preprocessed.csv", index=False)

In [4]:
#Siccome sono troppe per lavorarci inizio a prenderne solo 1000 a caso
lineIndexes = np.random.choice(range(1, len(newLines)), size=1000, replace=False)
df = pd.DataFrame([newLines[i] for i in lineIndexes], columns=newLines[0])
df

Unnamed: 0,city,lat,lng,country,iso3,population,id
0,Market Harborough,52.4775,-0.9206,United Kingdom,GBR,22911,1826292544
1,Woodfin,35.6458,-82.5914,United States,USA,6717,1840016285
2,Prokuplje,43.2339,21.5861,Serbia,SRB,,1688111468
3,Oundle,52.48,-0.472,United Kingdom,GBR,5735,1826530571
4,Pompano Beach,26.2428,-80.1312,United States,USA,112118,1840015143
...,...,...,...,...,...,...,...
95,Ueda,36.4019,138.2492,Japan,JPN,154201,1392438902
96,Novoselytsya,48.2167,26.2667,Ukraine,UKR,7588,1804491559
97,Lilongwe,-13.9833,33.7833,Malawi,MWI,781538,1454688499
98,Mocimboa da Praia,-11.35,40.3333,Mozambique,MOZ,30950,1508896362


In [5]:
class CityNode:
    
    def __init__(self, cityID: int, cityName: str, lat: float, lng: float, population: int, countryISO3: str):
        self.cityID = cityID
        self.cityName = cityName
        self.lat = lat
        self.lng = lng
        self.coordinates = np.array([lat, lng])
        self.population = population
        self.countryISO3 = countryISO3
        self.neighbour0 = None
        self.neighbour1 = None
        self.neighbour2 = None
    
    def insertNeighbour(self, city, position: int):
        if position == 0:
            self.neighbour0 = city
        elif position == 1:
            self.neighbour1 = city
        elif position == 2:
            self.neighbour2 = city
            
    def __repr__(self):
        return f"{self.cityName} at {self.lat}, {self.lng}"

In [6]:
graph = [CityNode(c.id, c.city, c.lat, c.lng, c.population, c.iso3) for _, c in df.iterrows()]

In [7]:
#Non va bene questo metodo, cercare altro
adjacencyMatrix = np.zeros([len(graph), len(graph)])
for i in range(len(graph)):
    for j in range(1+i, len(graph)):
        adjacencyMatrix[i][j] = geopy.distance(graph[i].coordinates, graph[j].coordinates).km
adjacencyMatrix

array([[    0.        ,  6434.80783437,  1960.64994371, ...,
         8082.34034605,  8094.77618938,  2266.95641286],
       [    0.        ,     0.        ,  8390.16210394, ...,
        13282.42639904, 13706.86202364,  8669.77910807],
       [    0.        ,     0.        ,     0.        , ...,
         6456.28418873,  6337.5603018 ,   621.78127408],
       ...,
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,   768.75562593,  6628.11194802],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,  6424.92415663],
       [    0.        ,     0.        ,     0.        , ...,
            0.        ,     0.        ,     0.        ]])