# Nearest neighbor analysis

## Libraries and settings

In [None]:
# Install specific version of scipy

# Libraries
import folium
import platform
import pandas as pd
import seaborn as sns
import geopandas as gdp
import matplotlib.pyplot as plt

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Import functions to calculate nearest-neighbors
import nn_functions as nn

## Prepare geodataframe of apartments data

In [None]:
# Read apartments data
df_app = pd.read_csv('apartments_data_enriched.csv', 
                      sep=',', 
                      encoding='utf-8')[['web-scraper-order', 
                                         'lat', 
                                         'lon',
                                         'address_raw',
                                         'bfs_number',
                                         'bfs_name']]

# Convert data frame of apartments data to geodataframe
df_app_geo = gdp.GeoDataFrame(df_app, 
                        geometry=gdp.points_from_xy(df_app['lon'], 
                                                    df_app['lat']))

# Set Coordinate Reference System (CRS)
df_app_geo.set_crs(4326, allow_override=True)
df_app_geo.head()

## Prepare geodataframe of supermarkets data

In [None]:
# Read supermarket data and select those with know brand
df_sup = pd.read_csv('supermarkets_data_enriched.csv', 
                     sep=',', 
                     encoding='utf-8')[['id', 
                                        'lat', 
                                        'lon',
                                        'brand',
                                        'bfs_number',
                                        'bfs_name']].dropna()
print(df_sup.shape)

# Convert data frame of apartments data to geodataframe
df_sup_geo = gdp.GeoDataFrame(df_sup, 
                       geometry=gdp.points_from_xy(df_sup['lon'], 
                                                   df_sup['lat']))

# Set Coordinate Reference System (CRS)
df_sup_geo.set_crs(4326, allow_override=True)
df_sup_geo.head()

# Subset (example)
# df_sup_geo = df_sup_geo[df_sup_geo['brand'] == 'Migros']

# Alternatively, subset of two brands (example)
# df_sup_geo = df_sup_geo[df_sup_geo['brand'].isin(['Migros', 'Coop'])]

df_sup_geo.head()

## Identify closest supermarkets per apartment and calculate its distance

In [None]:
# Closest supermarket of each apartment
closest_supermarkets = nn.nearest_neighbor(df_app_geo, 
                                           df_sup_geo, 
                                           return_dist=True)

print(len(closest_supermarkets), '==', len(df_app_geo))

# Rename the geometry of closest stops gdf so that we can easily identify it
closest_supermarkets = closest_supermarkets.rename(columns={'geometry': 'closest_sup_geom'})
closest_supermarkets.head()

## Merge closest supermarkets to apartments

In [None]:
# Merge supermarkets to apartments
result = pd.merge(closest_supermarkets, 
                  df_app_geo, 
                  left_index=True, 
                  right_index=True)[['web-scraper-order',
                                     'address_raw',
                                     'lat_y',
                                     'lon_y',
                                     'id',
                                     'brand',
                                     'geometry',
                                     'closest_sup_geom',
                                     'distance']]

# Rename columns
results = result.rename(columns={'lat_y': 'lat',
                                 'lon_y': 'lon'},
                        inplace = True)
result.head()

## Summary statistics of distance to closest supermarkets

In [None]:
result['distance'].describe()

## Boxplot of distance to closest supermarkets

In [None]:
plt.figure(figsize=(8,1.2))
plt.ticklabel_format(style='plain')
sns.boxplot(x=result['distance'], 
            color="greenyellow")

## Plotting map with apartments and nearest supermarkets

In [None]:
# Polygonmap als .json-File (WGS84)
polys = gdp.read_file("GEN_A4_GEMEINDEN_2019_epsg4326.json")

# Marker symbols
url_01 = 'https://raw.githubusercontent.com/pointhi/leaflet-color-markers/master/img/marker-icon-blue.png'
url_02 = 'https://raw.githubusercontent.com/pointhi/leaflet-color-markers/master/img/marker-icon-gold.png'

# Initialisierung der Map
m = folium.Map(location=[47.44, 8.65], 
               # tiles='Stamen Toner', 
               zoom_start=11)

# Plot Polygonmap of municipalities
folium.Choropleth(
    geo_data=polys,
    name='polys',
    fill_color='transparent',
    line_color='darkred').add_to(m)

# Add lat/lon of apartments
for i in range(0, len(result)):
    folium.Marker(location=(result.iloc[i]['lat'],  
                            result.iloc[i]['lon']), 
                  popup=result.iloc[i]['address_raw'],
                  icon=folium.features.CustomIcon(url_01,icon_size=(14, 23))).add_to(m)
    
# Add lat/lon of apartments
for i in range(0, len(closest_supermarkets)):
    folium.Marker(location=(closest_supermarkets.iloc[i]['lat'],  
                            closest_supermarkets.iloc[i]['lon']), 
                  popup=closest_supermarkets.iloc[i]['brand'],
                  icon=folium.features.CustomIcon(url_02,icon_size=(14, 23))).add_to(m)

# Layer control
folium.LayerControl().add_to(m)

# Plot map
m

## Save data to file

In [None]:
result.to_csv('apartments_data_with_supermarkets.csv', 
              sep=",", 
              encoding='utf-8',
              index=False)

### Jupyter notebook --footer info-- (please always provide this at the end of each notebook)

In [None]:
import os
import platform
import socket
from platform import python_version
from datetime import datetime

print('-----------------------------------')
print(os.name.upper())
print(platform.system(), '|', platform.release())
print('Datetime:', datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
print('Python Version:', python_version())
print('-----------------------------------')