In [1]:
!conda install -c conda-forge beautifulsoup4 --yes
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following packages will be UPDATED:

    beautifulsoup4: 4.6.0-py35h442a8c9_1 --> 4.6.3-py35_0 conda-forge

beautifulsoup4 100% |################################| Time: 0:00:00  40.77 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    geographiclib: 1.49-py_0   conda-forge
    geopy:         1.19.0-py_0 conda-forge

geographiclib- 100% |################################| Time: 0:00:00  23.41 MB/s
geopy-1.19.0-p 100% |################################| Time: 0:00:00  35.76 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:

In [2]:
print('Libraries installed!')
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import requests
import json

from bs4 import BeautifulSoup

from geopy.geocoders import Nominatim

import folium
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.preprocessing import StandardScaler, normalize, scale
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, r2_score

print('Libraries imported!')

Libraries installed!
Libraries imported!


In [3]:
# Load Boston neighborhood rent info
boston_Data = pd.read_csv('https://raw.githubusercontent.com/littlefish77/capstone/master/Boston_Neighborhoods.csv')
boston_Data

Unnamed: 0,Neighborhood,City,State,2019w
0,Roslindale,Boston,MA,1700
1,Jamaica Plain,Boston,MA,1850
2,Mission Hill,Boston,MA,2100
3,Longwood,Boston,MA,2300
4,Bay Village,Boston,MA,2700
5,Chinatown,Boston,MA,3000
6,North End,Boston,MA,2300
7,Roxbury,Boston,MA,1780
8,South End,Boston,MA,2700
9,Back Bay,Boston,MA,2900


In [4]:
#creating a dataframe for storing co-ordinates details.
coordinates = pd.DataFrame(columns=['Latitude','Longitude'])

# Using 'for loop' to get pass each Neighborhood name and get co-ordinates details through geocoding.
for row,neighborhood in boston_Data.iterrows():
    address = neighborhood['Neighborhood'] + ',' + neighborhood['City'] + ',' + neighborhood['State'] 
    try:
        geolocator = Nominatim(user_agent="my-application")
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        # appending latitude and longitude values on coordiantes dataframe.
        coordinates = coordinates.append({'Latitude':latitude,'Longitude':longitude},ignore_index=True)
    except:
        print(address)
    

coordinates

Unnamed: 0,Latitude,Longitude
0,42.291209,-71.124497
1,42.30982,-71.12033
2,42.332926,-71.103214
3,42.336168,-71.099527
4,42.350011,-71.066948
5,42.352217,-71.062607
6,42.365097,-71.054495
7,42.324843,-71.095016
8,42.34131,-71.07723
9,42.350707,-71.07973


In [5]:
#Take only Neighorhood and rent data for further testing.
boston_Data= boston_Data[['Neighborhood','2019w']]

# Adding the boston_Data and coordinates in one new dataframe.
boston_Neighborhood = boston_Data.join(coordinates, how='outer')
boston_Neighborhood

Unnamed: 0,Neighborhood,2019w,Latitude,Longitude
0,Roslindale,1700,42.291209,-71.124497
1,Jamaica Plain,1850,42.30982,-71.12033
2,Mission Hill,2100,42.332926,-71.103214
3,Longwood,2300,42.336168,-71.099527
4,Bay Village,2700,42.350011,-71.066948
5,Chinatown,3000,42.352217,-71.062607
6,North End,2300,42.365097,-71.054495
7,Roxbury,1780,42.324843,-71.095016
8,South End,2700,42.34131,-71.07723
9,Back Bay,2900,42.350707,-71.07973


In [6]:
# Max value in '2019w'
maxrentofN = max(boston_Neighborhood['2019w'])
print('Max rent value of Boston Neighborhood is $',maxrentofN)
minrentofN = min(boston_Neighborhood['2019w'])
print('Min rent value Boston Neighborhood is $',minrentofN)

Max rent value of Boston Neighborhood is $ 3050
Min rent value Boston Neighborhood is $ 1500


In [7]:
#create a new dataframe to store rent score.
rent_Score = pd.DataFrame(columns=['RentScore'])
for index, Neighborhood in boston_Neighborhood.iterrows():
    currentrentofN = Neighborhood['2019w']
    rentscore =  (maxrentofN-currentrentofN)/(maxrentofN-minrentofN)
    rent_Score = rent_Score.append({'RentScore' : rentscore},ignore_index = True)
    
rent_Score

Unnamed: 0,RentScore
0,0.870968
1,0.774194
2,0.612903
3,0.483871
4,0.225806
5,0.032258
6,0.483871
7,0.819355
8,0.225806
9,0.096774


In [8]:
#adding rent score information in boston_Neighborhood dataframe. 
boston_Neighborhood = boston_Neighborhood.join(rent_Score, how='outer')
boston_Neighborhood

Unnamed: 0,Neighborhood,2019w,Latitude,Longitude,RentScore
0,Roslindale,1700,42.291209,-71.124497,0.870968
1,Jamaica Plain,1850,42.30982,-71.12033,0.774194
2,Mission Hill,2100,42.332926,-71.103214,0.612903
3,Longwood,2300,42.336168,-71.099527,0.483871
4,Bay Village,2700,42.350011,-71.066948,0.225806
5,Chinatown,3000,42.352217,-71.062607,0.032258
6,North End,2300,42.365097,-71.054495,0.483871
7,Roxbury,1780,42.324843,-71.095016,0.819355
8,South End,2700,42.34131,-71.07723,0.225806
9,Back Bay,2900,42.350707,-71.07973,0.096774


In [12]:
CLIENT_ID = '5U2IT4EVFR3AHE42MXPA4G23KTKMVUUKBS3NJFQSYSLZ1JZD' # your Foursquare ID
CLIENT_SECRET = '0HHJWAAX1CYOYJX4ZSFD1QAMXED1PKKAADKCZJIEDZRQWNYR' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 5U2IT4EVFR3AHE42MXPA4G23KTKMVUUKBS3NJFQSYSLZ1JZD
CLIENT_SECRET:0HHJWAAX1CYOYJX4ZSFD1QAMXED1PKKAADKCZJIEDZRQWNYR


In [14]:
# Indian restaurant categoryid of foursquare
categoryId= '4bf58dd8d48988d10f941735'
# url willl search in 500 meters radius of latitude and longitude.
radius=500
api_endpoint = 'https://api.foursquare.com/'
#used below url to cache the request made to foursquare api
#api_endpoint = 'http://cladiusfernando-eval-test.apigee.net/foursquare/'

#dataframe to save venue information.
venue_Details = pd.DataFrame(columns=['VenueNeighborhoodName','VenueName','VenueRating'])
#dataframe to save count of good rating information.
goodRating =pd.DataFrame(columns=['Neighborhood','GoodRatingRestaurant'])

# 'for loop' to get venue information search endpoint
for row,neighborhood in boston_Neighborhood.iterrows():
    venue_NeighborhoodName = neighborhood['Neighborhood']
    lat = neighborhood['Latitude']
    long = neighborhood['Longitude']
    url = '{}v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&categoryId={}'.format(
    api_endpoint,
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    lat, 
    long, 
    radius,
    categoryId)
    
    venues = requests.get(url).json()['response']['venues']
    goodrating = 0
    for venue in venues:
        venuen = venue['name']         #retrieving venue name neighborhood-wise
        VENUE_ID = venue['id']         #retrieving venue id neighborhood-wise  
        url2 = '{}v2/venues/{}&client_id={}&client_secret={}&v={}'.format(
        api_endpoint,
        VENUE_ID,
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION
        )
        ven = requests.get(url2).json()['response']['venue']
        
         #if statement to get rating values for each venue.
        rating = 0
        if 'rating' in ven:
            rating = ven['rating']
            venue_Details = venue_Details.append({'VenueNeighborhoodName': venue_NeighborhoodName,'VenueName' : venuen, 'VenueRating': rating}, ignore_index = True)
       
            #if statement to get good restaurant count neighborhood-wise.
            if rating >= 7.0:
                goodrating = goodrating+1
    #appending values in goodrating dataframe          
    goodRating =goodRating.append({'Neighborhood': venue_NeighborhoodName , 'GoodRatingRestaurant' : goodrating}, ignore_index = True)

goodRating

KeyError: 'venue'

In [61]:
venue_Details

Unnamed: 0,VenueNeighborhoodName,VenueName,VenueRating
0,Bay Village,P.F. Chang's,7.3
1,Bay Village,Hot Eastern,7.3
2,Bay Village,Spicy World,7.4
3,Bay Village,Wing's Kitchen,7.2
4,Bay Village,Dumpling King,7.0
5,Bay Village,Potluck Cafe,5.9
6,Bay Village,Dumpling Cafe,8.1
7,Bay Village,Asian Garden,6.3
8,Bay Village,Gourmet Dumpling House,8.7
9,Bay Village,Quic Pic BBQ,6.9


In [62]:
# Add good rating count restaurants on boston_Neighborhood dataframe.
boston_Neighborhood = boston_Neighborhood.merge(goodRating, how='outer')
boston_Neighborhood

Unnamed: 0,Neighborhood,2019w,Latitude,Longitude,RentScore,GoodRatingRestaurant
0,Roslindale,1700,42.291209,-71.124497,0.870968,0
1,Jamaica Plain,1850,42.30982,-71.12033,0.774194,0
2,Mission Hill,2100,42.332926,-71.103214,0.612903,0
3,Longwood,2300,42.336168,-71.099527,0.483871,0
4,Bay Village,2700,42.350011,-71.066948,0.225806,16
5,Chinatown,3000,42.352217,-71.062607,0.032258,18
6,North End,2300,42.365097,-71.054495,0.483871,0
7,Roxbury,1780,42.324843,-71.095016,0.819355,0
8,South End,2700,42.34131,-71.07723,0.225806,0
9,Back Bay,2900,42.350707,-71.07973,0.096774,0


In [64]:
maxgoodrest = max(boston_Neighborhood['GoodRatingRestaurant'])
print('Maximum good restaurant count',maxgoodrest)
mingoodrest = min(boston_Neighborhood['GoodRatingRestaurant'])
print('Minimum good restaurant count',mingoodrest)

rating_Score = pd.DataFrame(columns=['RatingScore'])
for index, Neighborhood in boston_Neighborhood.iterrows():
    currentrentofN = Neighborhood['GoodRatingRestaurant']
    ratingScore =   (maxgoodrest-currentrentofN)/(maxgoodrest-mingoodrest)
    rating_Score = rating_Score.append({'RatingScore' : ratingScore},ignore_index = True)
    
boston_Neighborhood = boston_Neighborhood.join(rating_Score, how='outer')
boston_Neighborhood

Maximum good restaurant count 18
Minimum good restaurant count 0


Unnamed: 0,Neighborhood,2019w,Latitude,Longitude,RentScore,GoodRatingRestaurant,RatingScore
0,Roslindale,1700,42.291209,-71.124497,0.870968,0,1.0
1,Jamaica Plain,1850,42.30982,-71.12033,0.774194,0,1.0
2,Mission Hill,2100,42.332926,-71.103214,0.612903,0,1.0
3,Longwood,2300,42.336168,-71.099527,0.483871,0,1.0
4,Bay Village,2700,42.350011,-71.066948,0.225806,16,0.111111
5,Chinatown,3000,42.352217,-71.062607,0.032258,18,0.0
6,North End,2300,42.365097,-71.054495,0.483871,0,1.0
7,Roxbury,1780,42.324843,-71.095016,0.819355,0,1.0
8,South End,2700,42.34131,-71.07723,0.225806,0,1.0
9,Back Bay,2900,42.350707,-71.07973,0.096774,0,1.0
