# Capstone project report

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs:
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.21.0               |             py_0          58 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.2 MB

The following NEW packages will be INSTALLED:

  geographiclib      conda-forge/noarch::geographiclib-1.50-py_0
  geopy              conda-forge/noarch::geopy-1.21.0-py_0

The following packages will be UPDATED:

  openssl                                 1.1.1f-h516909a_0 --> 1.1.1g-h51

In [2]:
neighborhoods= pd.read_excel('ns.xlsx')
neighborhoods['popis']= neighborhoods['popis'].astype(float)
neighborhoods.head()

Unnamed: 0,Neighborhoods,Latitude,Longitude,popis,culture,buisness,schools,faculty,bigstreets,venues
0,Stari grad,45.250215,19.847105,4119.0,28,12,4,6,8,22
1,Grbavica,45.245257,19.833275,11802.0,0,9,6,1,9,1
2,Liman 1,45.243913,19.852188,4232.0,4,3,0,9,4,2
3,Liman 2,45.24146,19.845423,14300.0,4,2,1,0,3,0
4,Liman 3,45.238301,19.836687,11284.0,4,3,1,0,2,0


In [3]:
venues= pd.read_excel('data.csv.xlsx')
venues.head()

Unnamed: 0,name,categories,lat,lng
0,Savoca,Italian Restaurant,45.259939,19.832616
1,Petrus,Restaurant,45.254588,19.846535
2,VELIKI,Restaurant,45.258449,19.846238
3,Trg slobode,Plaza,45.255084,19.845115
4,Project 72,Deli / Bodega,45.259825,19.849454


In [4]:
address = 'Novi Sad, NS'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of NS are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of NS are 45.2551338, 19.8451756.


In [5]:
map_ns = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhoods']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=20,
        popup=label,
        color='yellowgreen',
        fill=True,
        fill_color='yellowgreen',
        fill_opacity=0.2,
        line_opacity= 0.3,
        parse_html=False).add_to(map_ns)  
# add markers to map
for lat1, lng1, label in zip(venues.lat, venues.lng, venues.categories):
    folium.features.CircleMarker(
        [lat1, lng1],
        radius=5,
        popup=label,
        fill=True,
        color='coral',
        fill_color='coral',
        fill_opacity=0.6
        ).add_to(map_ns)
map_ns

# Multiple Regression

In [7]:
import matplotlib.pyplot as plt
import pandas as pd
import pylab as pl
import numpy as np
%matplotlib inline
from sklearn import linear_model
reg= linear_model.LinearRegression()
x= np.asanyarray(neighborhoods[['popis','culture','buisness','schools','faculty','bigstreets']])
y=np.asanyarray(neighborhoods[['venues']])
reg.fit (x, y)
# The coefficients
print ('Coefficients: ', reg.coef_)
print ('Intercept:', reg.intercept_)


Coefficients:  [[6.80837199e-06 6.02752977e-01 2.97126323e-01 3.13093064e-01
  2.19347090e-01 8.55232972e-02]]
Intercept: [-2.83340219]


In [None]:
yhat= reg.predict(neighborhoods[['popis','culture','buisness','schools','faculty','bigstreets']])
from sklearn.metrics import r2_score
print ('R2:', r2_score(yhat,y))

# Clustering

In [9]:
from sklearn.preprocessing import StandardScaler
xk = neighborhoods[['popis','culture','buisness','schools','faculty','bigstreets','venues']]
xk = np.nan_to_num(xk)
Clus_dataSet = StandardScaler().fit_transform(xk)
clusterNum = 3
k_means = KMeans(init = "k-means++", n_clusters = clusterNum, n_init = 12)
k_means.fit(xk)
labels = k_means.labels_
print(labels)

[0 1 0 1 1 1 1 2 2 1 1 0 1 2]


In [10]:
neighborhoods["Cluster"] = labels
neighborhoods["Cluster"] = neighborhoods["Cluster"].replace(0,3)
neighborhoods

Unnamed: 0,Neighborhoods,Latitude,Longitude,popis,culture,buisness,schools,faculty,bigstreets,venues,Cluster
0,Stari grad,45.250215,19.847105,4119.0,28,12,4,6,8,22,3
1,Grbavica,45.245257,19.833275,11802.0,0,9,6,1,9,1,1
2,Liman 1,45.243913,19.852188,4232.0,4,3,0,9,4,2,3
3,Liman 2,45.24146,19.845423,14300.0,4,2,1,0,3,0,1
4,Liman 3,45.238301,19.836687,11284.0,4,3,1,0,2,0,1
5,Adamovic,45.2475,19.819711,11229.0,7,3,2,0,2,0,1
6,Sajmiste,45.253366,19.822818,14154.0,0,4,2,2,3,1,1
7,Detelinara,45.25988,19.812252,23086.0,1,1,3,0,3,0,2
8,Bistrica,45.25478,19.794825,21562.0,2,6,2,0,2,0,2
9,Banatic,45.2618,19.82447,15953.0,0,3,1,0,4,1,1


Thanks for watching this notebook! <3

# Results

In [11]:
neighborhoods["Cluster"] = neighborhoods["Cluster"].replace(1,"coral")
neighborhoods["Cluster"] = neighborhoods["Cluster"].replace(3,"green")
neighborhoods["Cluster"] = neighborhoods["Cluster"].replace(2,"darkblue")
map_ns = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, neighborhood, cluster in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Neighborhoods'], neighborhoods["Cluster"]):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=20,
        popup=label,
        color= cluster,
        fill=True,
        fill_color= cluster,
        fill_opacity=0.2,
        line_opacity= 0.3,
        parse_html=False).add_to(map_ns) 
map_ns