In [None]:
import pandas as pd
import requests
from urllib.parse import quote

# Data processing

In [None]:
## Read in files. Path removed to make code shareable.
df_sf = pd.read_csv(Path)
df_c360 = pd.read_csv(Path)

# display(df_sf)
# display(df_c360)

In [None]:
## Drop redundant cols and rename cols for Salesforce data.

# df_sf.columns.values.tolist()
df_sf = df_sf.drop(['redundant_cols'], axis=1)
df_sf.rename(columns = {'Longitude':'Dest_lng', 'Latitude':'Dest_lat', 'Town_City':'Dest_Town_City', 'County':'Dest_county', 'Country':'Dest_country'}, inplace = True)

# display(df_sf)
# df_sf.County.unique()

In [None]:
## Drop na and redundant cols, and rename cols for Salesforce data.

df_c360 = df_c360.dropna(subset=['drop na values for this col'])
df_c360 = df_c360.drop(['redundant cols'], axis=1)
df_c360.rename(columns = {'order_shipping_address':'Origin_address'}, inplace = True)
# display(df_c360)

In [None]:
## Join Salesforce and C360 DTC data on coupons.

abt = df_sf.merge(df_c360, how='inner', left_on='Code', right_on='key')
abt = abt.drop(['Code', 'key'], axis=1)
abt = abt.drop_duplicates()
abt = abt.reset_index(drop=True)

# display(abt)
# abt.head(0)

# URL encoding - geocoding

In [None]:
## Url encoding (https://developers.google.com/maps/url-encoding) and creating blank columns for populating later.

geocoding_p1 = 'https://maps.googleapis.com/maps/api/geocode/json?address='
geocoding_p2 = '&region=gb&key='
geocoding_api_key = '##API KEY##'
abt['geocode'] = geocoding_p1 + abt['Origin_address'] + geocoding_p2 + geocoding_api_key

abt['Origin_lat'] = ''
abt['Origin_lng'] = ''
abt['Origin_admin_area'] = ''
abt['Origin_country'] = ''

# display(abt)

In [None]:
## View encoding results

# print(abt['geocode'][0])
# print(abt['dist_matrix'][0])
# abt.loc[0]['Dest_lat']

# Geocoding API - address to coords

In [None]:
## Geocode shipping addresses continued - using results from previous run but continuing from where left off with using where
## latitude values are blank.

## For tag country = GB then administrative_area_level_2
## For tag country = IE then administrative_area_level_1
## For tag country = GG JE IM then country

def get_proper_name(response, country):
    if country == 'GB':
        key = "administrative_area_level_2"
    elif country == 'IE':
        key = "administrative_area_level_1"
    elif country in ['GG', 'JE', 'IM']:
        key = "country"

    for item in response['results'][0]['address_components']:
        if key in item['types']:
            return item['long_name']

def get_country_tag(response):
    for item in response['results'][0]['address_components']:
        if 'country' in item['types']:
            return item['short_name']

for row in range(abt.shape[0]):

    if abt['Origin_lat'][row: row+1].isna().bool() == True:
        url = quote(abt['geocode'][row], safe='/:=?&')
        payload={}
        headers = {}
        response = requests.request("GET", url, headers=headers, data=payload).json()

        if response['status'] == 'OK':
            abt.loc[[row],['Origin_lat']] = response['results'][0]['geometry']['location']['lat']
            abt.loc[[row],['Origin_lng']] = response['results'][0]['geometry']['location']['lng']
            country_tag = get_country_tag(response)
            abt.loc[[row],['Origin_admin_area']] = get_proper_name(response, country_tag)
            abt.loc[[row],['Origin_country']] = country_tag
        else:
            abt.loc[[row],['Origin_lat']] = float('NaN')
            abt.loc[[row],['Origin_lng']] = float('NaN')
            abt.loc[[row],['Origin_admin_area']] = float('NaN')
            abt.loc[[row],['Origin_country']] = float('NaN')

display(abt)

In [None]:
## Save file to impute errors manually

path_to_save = # Path
abt.to_csv(path_to_save, index=False)

In [None]:
## Check how many nulls returned from API response.

# print(abt['Origin_lat'].isnull().sum())
# print(abt['Origin_lng'].isnull().sum())
# print(abt['Origin_admin_area'].isnull().sum())
# print(abt['Origin_country'].isnull().sum())

# print(abt.Origin_country.unique())

In [None]:
## Read file after manual imputations where a couple entries didn't work due to consumer shipping address

abt = pd.read_csv(Path)

# URL encoding - distance matrix

In [None]:
## Url encoding with geocoding api response to find distances.

dist_matrix_p1 = 'https://maps.googleapis.com/maps/api/distancematrix/json?origins='
dist_matrix_p2 = '&destinations='
dist_matrix_p3 = '&units=metric&key='
dist_matrix_api_key = '##API KEY##'
abt['dist_matrix'] = dist_matrix_p1 + abt['Origin_lat'].astype(str) + ' ' + abt['Origin_lng'].astype(str) + dist_matrix_p2 + abt['Dest_lat'].astype(str) + ' ' + abt['Dest_lng'].astype(str) + dist_matrix_p3 + dist_matrix_api_key

abt['Distance_metres'] = ''
abt['Distance_secs'] = ''

# display(abt)

# Distance matrix API - distance between a to b

In [None]:
## Calculate driving time and distance between origin and destination.

for row in range(abt.shape[0]):
    url = quote(abt['dist_matrix'][row], safe='/:=?&')
    payload={}
    headers = {}
    response = requests.request("GET", url, headers=headers, data=payload).json()

    if response['status'] == 'OK':
        abt['Distance_metres'][row] = response['rows'][0]['elements'][0]['distance']['value']
        abt['Distance_secs'][row] = response['rows'][0]['elements'][0]['duration']['value']
    else:
        abt['Distance_metres'][row] = float('NaN')
        abt['Distance_secs'][row] = float('NaN')

# display(abt)

In [None]:
abt['Distance_km'] = abt['Distance_metres'] * (1/1000)
abt['Distance_min'] = abt['Distance_secs']* (1/60)

In [None]:
display(abt)

In [None]:
path_to_save = Path
abt.to_csv(path_to_save, index=False)