In [9]:
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd

## CITIES

In [10]:
cities = [
    'helsinki','espoo','tampere','vantaa','oulu','turku','kotka','lahti','kuopio','pori','kouvola',
    'joensuu','lappeenranta','mikkeli','vaasa', # 15
    'rovaniemi','kemi','tornio','savonlinna','nokia' # 20
]
short_list = ['nokia','rovaniemi','kemi','savonlinna','tornio']

## CODE

In [11]:
def get_distance(from_city, to_city):
    query = requests.get('https://www.distancecalculator.net/from-{}-to-{}'.format(from_city, to_city))
    soup = BeautifulSoup(query.text, 'html.parser')
    
    for option in soup.find_all('span'):
        if option.get('id') == 'distance-km':
            return int(option.text[:-3])
        
def create_matrix(cities, csv_name, cooldown_time):
    result = []
    
    # LOOP THROUGH EACH CITY
    for from_city in cities:
        sub_result = []
        
        print(f"getting distances for city: {from_city}...")
        
        # LOOP THROUGH EACH CITY AGAIN
        for to_city in cities:
            
            # APPEND ZERO IF CITIES ARE THE SAME
            if (from_city == to_city):
                sub_result.append(0)
                
            # OTHERWISE, SCRAPE DISTANCE
            else:
                distance = get_distance(from_city, to_city)
                sub_result.append(distance)
                
                # SLEEP FOR 5 SECONDS
                time.sleep(cooldown_time)
            
        # APPEND TO PARENT CONTAINER
        result.append(sub_result)
    
    # CONVERT TO DATAFRAME
    dataframe = pd.DataFrame(result, columns=cities, index=cities)
    
    # SAVE DATAFRAME AS CSV
    dataframe.to_csv(csv_name, index=False)
    print('done!')
    
    return result

## FETCHING DATA

In [12]:
data = create_matrix(cities, 'dt_20.csv', 1)
#data = create_matrix(short_list, 'test.csv', 1)

getting distances for city: helsinki...
getting distances for city: espoo...
getting distances for city: tampere...
getting distances for city: vantaa...
getting distances for city: oulu...
getting distances for city: turku...
getting distances for city: kotka...
getting distances for city: lahti...
getting distances for city: kuopio...
getting distances for city: pori...
getting distances for city: kouvola...
getting distances for city: joensuu...
getting distances for city: lappeenranta...
getting distances for city: mikkeli...
getting distances for city: vaasa...
getting distances for city: rovaniemi...
getting distances for city: kemi...
getting distances for city: tornio...
getting distances for city: savonlinna...
getting distances for city: nokia...
done!


## VALIDATING DATA

In [13]:
dataframe1 = pd.DataFrame(data, columns=cities,index=cities)
#dataframe1 = pd.DataFrame(data, columns=short_list,index=short_list)
dataframe1

Unnamed: 0,helsinki,espoo,tampere,vantaa,oulu,turku,kotka,lahti,kuopio,pori,kouvola,joensuu,lappeenranta,mikkeli,vaasa,rovaniemi,kemi,tornio,savonlinna,nokia
helsinki,0,16,160,15,539,150,114,99,336,225,124,373,203,211,370,705,619,633,284,164
espoo,16,0,151,24,536,134,128,103,339,211,134,381,215,217,359,702,615,628,293,154
tampere,160,151,0,150,400,142,204,116,254,106,171,335,240,186,210,564,473,484,272,16
vantaa,15,24,150,0,525,153,105,84,321,220,111,359,191,196,360,691,606,619,271,155
oulu,539,536,400,525,0,533,511,448,259,433,465,341,460,380,284,166,91,111,388,406
turku,150,134,142,153,533,0,255,194,394,118,246,463,328,302,296,694,599,607,388,131
kotka,114,128,204,105,511,255,0,89,273,299,46,281,95,137,404,673,598,614,188,215
lahti,99,103,116,84,448,194,89,0,237,215,58,281,136,116,316,613,531,546,198,128
kuopio,336,339,254,321,259,394,273,237,0,343,231,111,206,135,307,412,350,370,129,268
pori,225,211,106,220,433,118,299,215,343,0,272,434,345,291,180,590,492,499,377,91
