# Nearest Neighbor Analysis

### Libraries and settings

In [23]:
# Libraries
import folium
import platform
import pandas as pd
import seaborn as sns
import geopandas as gdp
import matplotlib.pyplot as plt
import sqlite3
import fnmatch

# Function to close a sqlite db-connection
def check_conn(conn):
     try:
        conn.cursor()
        return True
     except Exception as ex:
        return False

# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

# Import functions to calculate nearest-neighbors
import nn_functions as nn

### Import data from SQLite dbs

In [25]:
# Import listings data from listings.db and only include columns id, latitude, longitude, room_type, and price
conn = sqlite3.connect('listings.db')
df_list = pd.read_sql_query("SELECT id, lat, lon, room_type, price FROM listings", conn)
print(df_list.head())

# Import tourist attractions data from attractions.db
conn = sqlite3.connect('tourist.db')
df_ta = pd.read_sql_query("SELECT * FROM tourist", conn)
print(df_ta.head())

# Import public transport data from oev.db
conn = sqlite3.connect('oev.db')
df_pb = pd.read_sql_query("SELECT * FROM oev", conn)
print (df_pb.head())

       id       lat      lon        room_type  price
0   42515  46.20140  6.15507     Private room   72.0
1   44959  46.19704  6.16076     Private room   68.0
2  194110  46.20420  6.16067  Entire home/apt  135.0
3  276025  46.27944  6.16724  Entire home/apt   65.0
4  325986  46.21428  6.14610  Entire home/apt  150.0
           id        lat       lon                                    name  \
0   701851300  46.233972  6.055721  Globe de la Science et de l'Innovation   
1   981690477  46.184291  6.139151                        Musée de Carouge   
2   983366077  46.198282  6.108982                           Musée des EAN   
3  1768216261  46.220829  6.152126           Musée d'histoire des sciences   
4  2021513198  46.198828  6.137523                                   MAMCO   

  category  
0   museum  
1   museum  
2   museum  
3   museum  
4   museum  
         lat       lon         tags.name tags.railway
0  46.231334  6.110271   Genève-Aéroport        train
1  46.220426  6.094808     

In [19]:
# Close db connection (if open)
try:
    if check_conn(conn):
        conn.close()
    else:
        pass
except:
    pass

# Status (True = open, False = closed)
print(check_conn(conn))

False


### Prepare geodataframes

In [33]:
# Convert data frame of listings data to geodataframe
df_list_geo = gdp.GeoDataFrame(df_list, 
                        geometry=gdp.points_from_xy(df_list['lon'], 
                                                    df_list['lat']))

# Set Coordinate Reference System (CRS)
df_list_geo.set_crs(4326, allow_override=True)
print(df_list_geo.shape)
df_list_geo.head()

(1827, 6)


Unnamed: 0,id,lat,lon,room_type,price,geometry
0,42515,46.2014,6.15507,Private room,72.0,POINT (6.15507 46.2014)
1,44959,46.19704,6.16076,Private room,68.0,POINT (6.16076 46.19704)
2,194110,46.2042,6.16067,Entire home/apt,135.0,POINT (6.16067 46.2042)
3,276025,46.27944,6.16724,Entire home/apt,65.0,POINT (6.16724 46.27944)
4,325986,46.21428,6.1461,Entire home/apt,150.0,POINT (6.1461 46.21428)


In [32]:
# Convert data frame of tourist attraction data to geodataframe
df_ta_geo = gdp.GeoDataFrame(df_ta, 
                        geometry=gdp.points_from_xy(df_ta['lon'], 
                                                    df_ta['lat']))

# Set Coordinate Reference System (CRS)
df_ta_geo.set_crs(4326, allow_override=True)
print(df_ta_geo.shape)
df_ta_geo.head()

(51, 6)


Unnamed: 0,id,lat,lon,name,category,geometry
0,701851300,46.233972,6.055721,Globe de la Science et de l'Innovation,museum,POINT (6.05572 46.23397)
1,981690477,46.184291,6.139151,Musée de Carouge,museum,POINT (6.13915 46.18429)
2,983366077,46.198282,6.108982,Musée des EAN,museum,POINT (6.10898 46.19828)
3,1768216261,46.220829,6.152126,Musée d'histoire des sciences,museum,POINT (6.15213 46.22083)
4,2021513198,46.198828,6.137523,MAMCO,museum,POINT (6.13752 46.19883)


In [31]:
# Convert data frame of public transport data to geodataframe
df_pb_geo = gdp.GeoDataFrame(df_pb, 
                        geometry=gdp.points_from_xy(df_pb['lon'], 
                                                    df_pb['lat']))

# Set Coordinate Reference System (CRS)
df_pb_geo.set_crs(4326, allow_override=True)
print(df_pb_geo.shape)
df_pb_geo.head()


(193, 5)


Unnamed: 0,lat,lon,tags.name,tags.railway,geometry
0,46.231334,6.110271,Genève-Aéroport,train,POINT (6.11027 46.23133)
1,46.220426,6.094808,Vernier,train,POINT (6.09481 46.22043)
2,46.263684,6.161146,Creux-de-Genthod,train,POINT (6.16115 46.26368)
3,46.27974,6.165805,Versoix,train,POINT (6.16581 46.27974)
4,46.222273,6.076288,Meyrin,train,POINT (6.07629 46.22227)
