# Geographic Optimization

## 1. Distance Calculation

Distances between each of the 16 focus compounds and all approx. 8000 zip codes in Germany.

### Imports

In [1]:
import pandas as pd
import numpy as np
import pgeocode
import haversine as hs

### Data Preparation

#### Create Dataframes

In [30]:
zipcodes_df = pd.read_csv('zipcodes.csv',usecols=['zipcode'],dtype='str')
zipcodes_df

Unnamed: 0,zipcode
0,01067
1,01069
2,01097
3,01099
4,01108
...,...
8169,99988
8170,99991
8171,99994
8172,99996


In [4]:
compounds_df = pd.read_csv('compounds_addresses.csv')
compounds_df

Unnamed: 0,compound_name,compound_address
0,AKB Kitzingen,"AKB Compound Kitzingen, Larson Barracks 53, 97..."
1,AKB Dortmund,"AKB Compound Dortmund, Dammstraße 25, 44145 Do..."
2,AKB Zörbig,"AKB Compound Zörbig, Jeßnitzer Str. 26, 06780 ..."
3,AKB Schöneck,"AKB Compound Schöneck, Windecker Str. 2, 61137..."
4,AKB Buch,"AKB Compound Buch, An der Lehmgrube 1, 89290 Buch"
5,Mosolf Etzin,ACM Auto-Service und Umschlag-Center Mosolf Et...
6,Mosolf Kippenheim,"Mosolf Compound, Freimatte 25, 77971 Kippenheim"
7,BLG Kelheim,"Hafenstraße 33, 93342 Saal an der Donau"
8,BLG Duisburg,"BLG AutoTerminal Duisburg GmbH & Co. KG, Rotte..."
9,BLG Neuss,"ATN Autoterminal Neuss, Floßhafenstr. 30, 4146..."


#### Convert zip code to longitude and latitude

In [5]:
nomi = pgeocode.Nominatim('de')

In [6]:
for index,row in zipcodes_df.iterrows():
    query = nomi.query_postal_code(zipcodes_df.iat[index,0])
    zipcodes_df.at[index,'lat']= query['latitude']
    zipcodes_df.at[index,'long']= query['longitude']

In [7]:
compounds_df['zipcode'] = compounds_df['compound_address'].str.findall(r'([0-9]\d+)').apply(lambda x: x[-1] if len(x) >= 1 else '')


In [8]:
for index,row in compounds_df.iterrows():
    query = nomi.query_postal_code(compounds_df.iat[index,2])
    compounds_df.at[index,'lat']= query['latitude']
    compounds_df.at[index,'long']= query['longitude']

#### Add coordinate column (necessary for usage of Haversine) 

In [21]:
zipcodes_df['coor']=list(zip(zipcodes_df.lat,zipcodes_df.long))
compounds_df['coor']=list(zip(compounds_df.lat,compounds_df.long))

In [22]:
zipcodes_df

Unnamed: 0,zipcode,lat,long,coor
0,01067,51.054700,13.726900,"(51.0547, 13.7269)"
1,01069,51.043000,13.737300,"(51.043, 13.7373)"
2,01097,51.071400,13.739900,"(51.0714, 13.7399)"
3,01099,51.078300,13.805100,"(51.0783, 13.8051)"
4,01108,51.155733,13.782467,"(51.15573333333333, 13.782466666666666)"
...,...,...,...,...
8169,99988,51.172900,10.290450,"(51.1729, 10.29045)"
8170,99991,51.148467,10.553300,"(51.14846666666667, 10.5533)"
8171,99994,51.239850,10.670850,"(51.23985, 10.67085)"
8172,99996,51.288800,10.580350,"(51.2888, 10.58035)"


### Calculate Distances

In [23]:
def distance_from(loc1,loc2):
    '''This function defines the distance between customers (loc1) and compound(loc2)'''
    dist = hs.haversine(loc1,loc2)
    return round(dist,2)

In [24]:
for _,row in compounds_df.iterrows():
    zipcodes_df[row.compound_name]=zipcodes_df['coor'].apply(lambda x: distance_from(row['coor'],x))

In [28]:
distances = zipcodes_df.drop(columns=['lat','long','coor'],axis=1)

In [29]:
distances

Unnamed: 0,zipcode,AKB Kitzingen,AKB Dortmund,AKB Zörbig,AKB Schöneck,AKB Buch,Mosolf Etzin,Mosolf Kippenheim,BLG Kelheim,BLG Duisburg,BLG Neuss,Carservice Erkens,ARS Altmann Wolnzach,CAT Zülpich,Autohaus Siebrecht,Neu: FleetParQ Essen,Neu: FleetParQ Kassel
0,01067,293.89,437.43,127.20,357.77,405.16,170.55,523.64,271.74,490.10,490.84,518.92,311.40,497.90,291.88,465.29,293.80
1,01069,293.92,438.36,128.48,358.18,404.64,172.02,523.53,270.96,490.99,491.67,519.80,310.63,498.59,292.92,466.20,294.69
2,01097,295.57,438.02,127.07,359.08,407.16,169.15,525.43,273.81,490.78,491.60,519.59,313.46,498.87,292.29,465.92,294.49
3,01099,299.95,442.40,130.70,363.69,410.71,170.19,529.65,276.70,495.21,496.08,524.02,316.41,503.45,296.51,470.32,298.93
4,01108,302.71,439.58,125.45,364.28,416.25,161.66,533.13,283.46,492.70,493.93,521.50,323.08,502.25,292.98,467.63,296.52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8169,99988,158.34,199.13,137.14,148.67,327.91,226.36,365.68,278.63,250.38,250.62,279.17,301.25,260.43,70.45,226.25,54.78
8170,99991,158.19,217.61,121.49,160.24,326.20,214.74,372.89,268.74,268.90,269.05,297.69,293.20,278.06,85.17,244.77,73.22
8171,99994,169.89,223.72,109.81,173.07,337.10,201.78,385.88,275.33,275.96,276.94,304.78,300.97,287.84,85.46,251.32,79.66
8172,99996,173.86,216.66,113.71,172.08,341.91,202.70,387.09,282.60,269.26,270.63,298.07,307.78,282.71,77.22,244.41,73.06


Result:
For every zipcode, the distances (in km) to every compound are given. 
As it's stored in a pandas Dataframe, further investigations can be easily done (p.eg. seeing the minimum per row etc.).