In [5]:
import requests
from bs4 import BeautifulSoup

def get_soup_from_url(url):
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'html.parser')

    return soup


def get_biggest_cities(limit=10):
    url = 'https://fr.wikipedia.org/wiki/Liste_des_communes_de_France_les_plus_peuplées'

    soup = get_soup_from_url(url)
    cities = []

    table = soup.find('tbody')
    for row in table.find_all('tr')[1:11]:
        cells = row.find_all('td')
        city = cells[1].find('a').text.strip()
        cities.append(city)

    return cities[:limit]

In [3]:
def get_distance_between(city_to, city_from):
        query = "https://fr.distance24.org/route.json?stops={}|{}".format(city_to, city_from)
        res = requests.get(query)        
        data = res.json()
        
        return data['distance']

In [8]:
cities = get_biggest_cities()

for idx, c1 in enumerate(cities):
    for c2 in cities[idx+1:]:
        print("{} => {} = {}".format(c1, c2, get_distance_between(c1, c2)))

Paris => Marseille = 661
Paris => Lyon = 392
Paris => Toulouse = 589
Paris => Nice = 688
Paris => Nantes = 343
Paris => Montpellier = 596
Paris => Strasbourg = 397
Paris => Bordeaux = 500
Paris => Lille = 204
Marseille => Lyon = 278
Marseille => Toulouse = 319
Marseille => Nice = 159
Marseille => Nantes = 696
Marseille => Montpellier = 126
Marseille => Strasbourg = 617


From cffi callback <function _verify_callback at 0x00000232CA7E7158>:
Traceback (most recent call last):
  File "C:\Users\Thomas\Anaconda3\lib\site-packages\OpenSSL\SSL.py", line 306, in wrapper
    @wraps(callback)
KeyboardInterrupt


SSLError: HTTPSConnectionPool(host='fr.distance24.org', port=443): Max retries exceeded with url: /route.json?stops=Marseille%7CBordeaux (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')],)",),))

In [10]:
import itertools

combinations = list(itertools.combinations(cities, 2))

distances = []
for origin, dest in combinations:
    distances.append(get_distance_between(origin, dest))

In [13]:
import pandas as pd
df = pd.DataFrame(combinations, columns=['origin', 'dest'])
df['distance'] = distances

df.head()

Unnamed: 0,origin,dest,distance
0,Paris,Marseille,661
1,Paris,Lyon,392
2,Paris,Toulouse,589
3,Paris,Nice,688
4,Paris,Nantes,343


### avec google map

```pythonb
import googlemaps

gmaps = googlemaps.Client(key=open('api-key.txt').read())

matrix = gmaps.distance_matrix(cities, cities)

rows = [r for r in matrix['rows']]

elements = [r['elements'] for r in matrix['rows']]

elements = list(itertools.chain.from_iterable(elements))
```

In [17]:
df = df.sort_values('distance')

df_with_index = df.set_index(['origin', 'dest'])

df_with_index.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,distance
origin,dest,Unnamed: 2_level_1
Marseille,Montpellier,126
Marseille,Nice,159
Toulouse,Montpellier,196
Paris,Lille,204
Toulouse,Bordeaux,212
