# Geographic Optimization

## 1. Distance Calculation

Distances between each of the 16 focus compounds and all approx. 8000 zip codes in Germany.

### Imports

In [1]:
import pandas as pd
import numpy as np
import pgeocode
import haversine as hs

### Data Preparation

#### Create Dataframes

In [None]:
zipcodes_df = pd.read_csv('zipcodes.csv',usecols=['zipcode'],dtype='str')
zipcodes_df

In [None]:
compounds_df = pd.read_csv('compounds_addresses.csv')
compounds_df

#### Convert zip code to longitude and latitude

In [None]:
nomi = pgeocode.Nominatim('de')

In [None]:
for index,row in zipcodes_df.iterrows():
    query = nomi.query_postal_code(zipcodes_df.iat[index,0])
    zipcodes_df.at[index,'lat']= query['latitude']
    zipcodes_df.at[index,'long']= query['longitude']

In [None]:
compounds_df['zipcode'] = compounds_df['compound_address'].str.findall(r'([0-9]\d+)').apply(lambda x: x[-1] if len(x) >= 1 else '')


In [None]:
for index,row in compounds_df.iterrows():
    query = nomi.query_postal_code(compounds_df.iat[index,2])
    compounds_df.at[index,'lat']= query['latitude']
    compounds_df.at[index,'long']= query['longitude']

#### Add coordinate column (necessary for usage of Haversine) 

In [None]:
zipcodes_df['coor']=list(zip(zipcodes_df.lat,zipcodes_df.long))
compounds_df['coor']=list(zip(compounds_df.lat,compounds_df.long))

In [None]:
zipcodes_df

### Calculate Distances

In [None]:
def distance_from(loc1,loc2):
    '''This function defines the distance between customers (loc1) and compound (loc2)'''
    dist = hs.haversine(loc1,loc2)
    return round(dist,2)

In [None]:
full_distances_df = zipcodes_df.copy()

In [None]:
for _,row in compounds_df.iterrows():
    full_distances_df[row.compound_name]=full_distances_df['coor'].apply(lambda x: distance_from(row['coor'],x))

In [None]:
distances = full_distances_df.drop(columns=['lat','long','coor'],axis=1)

In [None]:
distances.set_index('zipcode', inplace=True)

In [None]:
distances

Result:
For every zipcode, the distances (in km) to every compound are given. 
As it's stored in a pandas Dataframe, further investigations can be easily done (p.eg. seeing the minimum per row etc.).

### Calculate Driving Distance

In [None]:
import requests
import json
from tqdm import tqdm

In [None]:
def request_driving_distance_in_meters_from_api(loc1,loc2):
    '''Requests from OpenStreetMap to calculate Driving Distance between customer and compound'''
    r = requests.get(f"""http://router.project-osrm.org/route/v1/car/{loc1[1]},{loc1[0]};{loc2[1]},{loc2[0]}?overview=false""")
    content = json.loads(r.content)
    if 'routes' in content:
        route_1 = content['routes'][0]
        return route_1['distance']
    else:
        return 0.0

In [None]:
tqdm.pandas()
driving_distances_df = zipcodes_df.copy()
for _,row in compounds_df.iterrows():
    driving_distances_df[row.compound_name]=driving_distances_df['coor'].progress_apply(lambda x: request_driving_distance_in_meters_from_api(row['coor'],x))

In [None]:
driving_distances_df