In [19]:

# installing dependencies
!pip install beautifulsoup4
!pip install geopy
!pip install folium

# importing dependencies
import requests
from bs4 import BeautifulSoup
import pandas as pd
import folium

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/53/fc/3d1b47e8e82ea12c25203929efb1b964918a77067a874b2c7631e2ec35ec/geopy-1.21.0-py2.py3-none-any.whl (104kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.21.0


<h1>Choosing the most convenient Neighbourhood of Bengaluru</h1>
<h2>Introduction/Business Problem<h2>
One of the major problem faced by people when they are thinking of switching cities is choosing the neighbourhood to stay/live in that particular city. There are couple of bigger factors like Comfort, Convenience, and saftey.

Here in this particular notebook we will be analyzing the neighbourhood in Bengaluru City of Karnataka, India to figure out the best neighbourhood in this particular city based on Convenience of people.



<h1>Data Description</h1>
<h4>Here for this report we will be using data set for neighbourhood fetched from foursquare loaction api's. But to get data about Boroughs, PinCode, and Neighbourhood we will scraping this data from a webpage(https://www.indiatvnews.com/pincode/karnataka/bangalore) to create our own dataset.</h4>

In [21]:
page = requests.get("https://www.indiatvnews.com/pincode/karnataka/bangalore")
soup = BeautifulSoup(page.content, 'html.parser')
table = soup.find('table', class_='alt')
table_rows = table.find_all('tr')

In [22]:
# this array will hold the table data
temp = []

# adding invidual subarrays for each table array
for tr in table_rows:
    td = tr.find_all('td')
    row = [d.text.strip() for d in td]
    
    if row and row[1] != "NA":
        temp.append(row)

In [23]:

# creating dataframe out of mentioned array
df = pd.DataFrame(data=temp, columns=['Neighbourhood', 'Borough', 'District', 'State', 'Pincode'])
df = df.drop(['District', 'State'], axis=1)
df = df.iloc[1:]
print(df.shape)
print(df)

(259, 3)
                   Neighbourhood          Borough Pincode
1                          Agram  Bangalore South  560007
2             Air Force Hospital  Bangalore North  560007
3                   Amruthahalli  Bangalore North  560092
4           Anandnagar Bangalore  Bangalore North  560024
5                 Arabic College  Bangalore North  560045
6                          Attur  Bangalore North  560064
7                    Austin Town  Bangalore North  560047
8                      Banaswadi  Bangalore North  560043
9               Bangalore Bazaar  Bangalore North  560001
10                     Bellandur  Bangalore South  560103
11                   Benson Town  Bangalore North  560046
12                 Bhattarahalli  Bangalore North  560049
13                    Bidrahalli  Bangalore South  560049
14          BSF Campus Yelahanka  Bangalore North  560063
15               Byatarayanapura  Bangalore North  560092
16               C.V.Raman Nagar  Bangalore North  560093
17   

In [24]:
df['Borough'] = df['Borough'].replace(['Bangalore North', 'Bangalore north', 'Banglorenorth', 'Bg North', 'Bgnorth'], 'Bangalore North')

df['Borough'] = df['Borough'].replace(['Bangalore South', 'Bangaloresouth', 'Bg South', 'Bgsouth', 'Nla & Bgsouth'], 'Bangalore South')

df['Borough'] = df['Borough'].replace(['Bangalore', 'Banglore'], 'Bangalore')

df = df[df['Borough'].isin(["Bangalore South", "Bangalore North"])]

print("let's drop the row with duplicate pincode and keep only the first one")

print("Shape of dataframe before removing duplicates")
print(df.shape)

print("Shape of dataframe before removing duplicates")
df = df.drop_duplicates(subset="Pincode")
print(df.shape)

# setting pincode as index
df = df.set_index('Pincode')

print("Below show dataframe will be used for further research")
print(df.head())

# adding column for latitude and longitude
df["Latitude"] = "null"
df["Longitude"] = "null"

# saving this dataframe to csv file
df.to_csv("without_lat_lng_bangalore_neighbourhood.csv", sep='\t', encoding='utf-8')

let's drop the row with duplicate pincode and keep only the first one
Shape of dataframe before removing duplicates
(220, 3)
Shape of dataframe before removing duplicates
(100, 3)
Below show dataframe will be used for further research
                Neighbourhood          Borough
Pincode                                       
560007                  Agram  Bangalore South
560092           Amruthahalli  Bangalore North
560024   Anandnagar Bangalore  Bangalore North
560045         Arabic College  Bangalore North
560064                  Attur  Bangalore North


In [30]:

import requests

def fetchLatLng(postal_code, Neighbourhood):
    #init variable to none
    lat_lng = None
    api_key = "add your own api key"
    address = '{}, {}, Bangalore, Karnataka, India'.format(postal_code, Neighbourhood)
    print(address)
    geocode_url = "https://maps.googleapis.com/maps/api/geocode/json?address={}".format(address)
    
    if api_key is not None:
        geocode_url = geocode_url + "&key={}".format(api_key)

    #loop until you get co-ordiantes
    while(lat_lng is None):
        results = requests.get(geocode_url)
        results = results.json()
        answer = results['results'][0]
        lat_lng = {
            "latitude": answer.get('geometry').get('location').get('lat'),
            "longitude": answer.get('geometry').get('location').get('lng'),
        }
        latitude = lat_lng['latitude']
        longitude = lat_lng['longitude']


    df.loc[postal_code, 'Latitude'] = latitude
    df.loc[postal_code, 'Longitude'] = longitude
    print('Latitude: {} & longitude: {}'.format(latitude, longitude))
    return

<h2>For further purpose we will load up our data from "banglore_ng_lat_long.csv" file to avoid getting me billed by google for extensive use of maps api</h2>

In [34]:
df = pd.read_csv('banglore_ng_lat_long.csv', sep=",")
df

Unnamed: 0,Pincode,Neighbourhood,Borough,Latitude,Longitude
0,560007,Agram,Bangalore South,12.957917,77.630912
1,560092,Amruthahalli,Bangalore North,13.065879,77.604206
2,560024,Anandnagar Bangalore,Bangalore North,13.031328,77.591313
3,560045,Arabic College,Bangalore North,13.030375,77.621131
4,560064,Attur,Bangalore North,13.106962,77.566299
5,560047,Austin Town,Bangalore North,12.958768,77.615995
6,560043,Banaswadi,Bangalore North,13.010376,77.648194
7,560001,Bangalore Bazaar,Bangalore North,12.928798,77.676381
8,560103,Bellandur,Bangalore South,12.929869,77.684837
9,560046,Benson Town,Bangalore North,13.001164,77.599548


In [35]:
bangalore_latitude = "12.9715"
bangalore_longitude = "77.5945"

map_bangalore = folium. Map(location=[bangalore_latitude, bangalore_longitude], zoom_start=12)

for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}'.format(neighbourhood)
    label= folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=True
    ).add_to(map_bangalore)

map_bangalore