# Driving Distance Calculation

## Imports

In [2]:
import pandas as pd
import numpy as np
import pgeocode
import osrm
import requests
import json
from tqdm import tqdm
import glob
import os 

## Data Preparation

### Create DataFrames

In [3]:
zipcodes_df = pd.read_csv('zipcodes.csv',usecols=['zipcode'],dtype='str')
compounds_df = pd.read_csv('compounds_addresses.csv')

### Convert Zipcode to longitude and latitude

#### For zipcodes_df

In [6]:
# Usage of pgeocode to convert zipcode to coordinates. 
# Insert abbreviation of country of which the zipcodes are from
nomi = pgeocode.Nominatim('de') 

postal_code                                                   01067
country_code                                                     DE
place_name        Dresden Innere Altstadt, Dresden, Dresden Frie...
state_name                                                  Sachsen
state_code                                                       SN
county_name                                                     NaN
county_code                                                     0.0
community_name                             Kreisfreie Stadt Dresden
community_code                                              14612.0
latitude                                                    51.0547
longitude                                                   13.7269
accuracy                                                        4.0
Name: 0, dtype: object

In [7]:
# Query pgeocode for lat, long, state_name, state_code, community_name, community_code 
for index,row in zipcodes_df.iterrows():
    query = nomi.query_postal_code(zipcodes_df.iat[index,0])
    zipcodes_df.at[index,'lat']= query['latitude']
    zipcodes_df.at[index,'long']= query['longitude']
    zipcodes_df.at[index,'state']= query['state_name']
    zipcodes_df.at[index,'state_code']= query['state_code']
    zipcodes_df.at[index,'community_name']= query['community_name']
    zipcodes_df.at[index,'community_code']= query['community_code']

#### For compounds_df

In [8]:
# Extract zipcode from address
compounds_df['zipcode'] = compounds_df['compound_address'].str.findall(r'([0-9]\d+)').apply(lambda x: x[-1] if len(x) >= 1 else '')

# Query pgeocode for lat and long (more information also possible like in previous cell)
for index,row in compounds_df.iterrows():
    query = nomi.query_postal_code(compounds_df.iat[index,2])
    compounds_df.at[index,'lat']= query['latitude']
    compounds_df.at[index,'long']= query['longitude']

### Add coordinate column

In [None]:
# Add column in which lat and long are merged together
zipcodes_df['coor']=list(zip(zipcodes_df.lat,zipcodes_df.long))
compounds_df['coor']=list(zip(compounds_df.lat,compounds_df.long))

## Driving Distance Calculation

In [None]:
def request_driving_distance_in_meters_from_api(loc1,loc2):
    '''Requests from OpenStreetMap to calculate Driving Distance between customer and compound'''
    r = requests.get(f"""http://router.project-osrm.org/route/v1/car/{loc1[1]},{loc1[0]};{loc2[1]},{loc2[0]}?overview=false""")
    content = json.loads(r.content)
    if 'routes' in content:
        route_1 = content['routes'][0]
        return route_1['distance']
    else:
        return 0.0

In [None]:
# Checkpointing by diving dataframe in small chunks and only calculating on the small chunks
zipcodes_df_split = np.array_split(zipcodes_df,100)

In [None]:
# Calculations
tqdm.pandas()

for i in range(55,100):
    driving_distances_df = zipcodes_df_split[i]
    for _,row in compounds_df.iterrows():
        driving_distances_df[row.compound_name]=driving_distances_df['coor'].progress_apply(lambda x: request_driving_distance_in_meters_from_api(row['coor'],x))
    driving_distances_df.to_csv(f'csv_{i}')

In [None]:
# Merging all 100 resulting csv files into one
files = os.path.join("csv_*")
files = glob.glob(files)
files.sort()
print(len(files))

driving_distances_complete_df = pd.concat(map(pd.read_csv,files), ignore_index=True)

# Drop first column as it's a duplicate of the index
driving_distances_complete_df=driving_distances_complete_df.loc[:, ~driving_distances_complete_df.columns.str.contains('^Unnamed')]

# Safe raw results as csv
driving_distances_complete_df.to_csv('driving_distances_complete_in_meters.csv')

In [None]:
def convert_meters_to_kilometers(x):
    '''Function converting input in meters to kilometers'''
    return x/1000


In [None]:
# Convert to km and safe as csv 
distances_km_df = driving_distances_complete_df.copy()
distances_km_df.iloc[:,8:]=distances_km_df.iloc[:,8:].apply(convert_meters_to_kilometers,axis=1)

distances_km_df.to_csv('distances_complete_in_kilometers.csv')