# The Battle of Neighborhoods – Buildings Location Predictor

##### Installing geopy and folium library and Importing all the necessary Libraries

In [1]:
#!conda install -c conda-forge geopy
#!conda install -c conda-forge folium=0.5.0
import json # library to handle JSON files
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
# import k-means from clustering stage
from sklearn.cluster import KMeans
import folium # map rendering library

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

##### Reading Data

In [2]:
data=pd.read_csv('IN.txt',delimiter='\t',names=['Country','Postal Code','Place Name','State','No1','County','no2','Province','No3','Latitude','Longitude','Accuracy'])
data.to_csv()
data.head()

Unnamed: 0,Country,Postal Code,Place Name,State,No1,County,no2,Province,No3,Latitude,Longitude,Accuracy
0,IN,744301,Lapathy,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1833,92.7667,3
1,IN,744301,Kakana,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1167,92.8,4
2,IN,744301,Sawai,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,7.5166,93.6031,4
3,IN,744301,Carnicobar,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.1833,92.7667,3
4,IN,744301,Mus,Andaman & Nicobar Islands,1,Nicobar,638.0,Carnicobar,,9.2333,92.7833,4


##### Deleting all the unnecessary Columns from the data

In [3]:
data.drop(['Country', 'No1','no2','No3','Accuracy'],inplace=True,axis=1)
data.head()

Unnamed: 0,Postal Code,Place Name,State,County,Province,Latitude,Longitude
0,744301,Lapathy,Andaman & Nicobar Islands,Nicobar,Carnicobar,9.1833,92.7667
1,744301,Kakana,Andaman & Nicobar Islands,Nicobar,Carnicobar,9.1167,92.8
2,744301,Sawai,Andaman & Nicobar Islands,Nicobar,Carnicobar,7.5166,93.6031
3,744301,Carnicobar,Andaman & Nicobar Islands,Nicobar,Carnicobar,9.1833,92.7667
4,744301,Mus,Andaman & Nicobar Islands,Nicobar,Carnicobar,9.2333,92.7833


##### Merging all the duplicate data depending on Place Name

In [4]:
data= data.groupby('Postal Code').agg({'Place Name': ', '.join,'Province':'first', 'County': 'first', 'State':'first'}).reset_index()
data.head()

Unnamed: 0,Postal Code,Place Name,Province,County,State
0,110001,"New Delhi G.P.O., Parliament House, Connaught ...",New Delhi,New Delhi,Delhi
1,110002,"Civic Centre, Darya Ganj, Minto Road, Indrapra...",New Delhi Central,New Delhi,Delhi
2,110003,"Delhi High Court, Pandara Road, Delhi High Cou...",New Delhi,Central Delhi,Delhi
3,110004,Rashtrapati Bhawan,New Delhi,Central Delhi,Delhi
4,110005,"Bank Street (Central Delhi), Karol Bagh, Anand...",New Delhi,Central Delhi,Delhi


##### Reading the Latitiude,Longitude Data

In [5]:
df=pd.read_csv('Data.txt',delimiter=',')
df.to_csv()
df.head()

Unnamed: 0,Postal Code,place_name,admin_name1,latitude,longitude,accuracy
0,110001,Connaught Place,New Delhi,28.6333,77.2167,4.0
1,110002,Darya Ganj,New Delhi,28.6333,77.25,4.0
2,110003,Aliganj,New Delhi,28.65,77.2167,
3,110004,Rashtrapati Bhawan,New Delhi,28.65,77.2167,
4,110005,Lower Camp Anand Parbat,New Delhi,28.65,77.2,


##### Merging the two datasets

In [6]:
result=pd.merge(data,df,on='Postal Code')
result.head()

Unnamed: 0,Postal Code,Place Name,Province,County,State,place_name,admin_name1,latitude,longitude,accuracy
0,110001,"New Delhi G.P.O., Parliament House, Connaught ...",New Delhi,New Delhi,Delhi,Connaught Place,New Delhi,28.6333,77.2167,4.0
1,110002,"Civic Centre, Darya Ganj, Minto Road, Indrapra...",New Delhi Central,New Delhi,Delhi,Darya Ganj,New Delhi,28.6333,77.25,4.0
2,110003,"Delhi High Court, Pandara Road, Delhi High Cou...",New Delhi,Central Delhi,Delhi,Aliganj,New Delhi,28.65,77.2167,
3,110004,Rashtrapati Bhawan,New Delhi,Central Delhi,Delhi,Rashtrapati Bhawan,New Delhi,28.65,77.2167,
4,110005,"Bank Street (Central Delhi), Karol Bagh, Anand...",New Delhi,Central Delhi,Delhi,Lower Camp Anand Parbat,New Delhi,28.65,77.2,


##### Dropping the not required columns

In [7]:
result.drop(['admin_name1','accuracy'],inplace=True,axis=1)
result.head()

Unnamed: 0,Postal Code,Place Name,Province,County,State,place_name,latitude,longitude
0,110001,"New Delhi G.P.O., Parliament House, Connaught ...",New Delhi,New Delhi,Delhi,Connaught Place,28.6333,77.2167
1,110002,"Civic Centre, Darya Ganj, Minto Road, Indrapra...",New Delhi Central,New Delhi,Delhi,Darya Ganj,28.6333,77.25
2,110003,"Delhi High Court, Pandara Road, Delhi High Cou...",New Delhi,Central Delhi,Delhi,Aliganj,28.65,77.2167
3,110004,Rashtrapati Bhawan,New Delhi,Central Delhi,Delhi,Rashtrapati Bhawan,28.65,77.2167
4,110005,"Bank Street (Central Delhi), Karol Bagh, Anand...",New Delhi,Central Delhi,Delhi,Lower Camp Anand Parbat,28.65,77.2


In [8]:
i=result.sort_values("State")
i=i["State"].unique()
t=result[["Place Name","State"]].groupby(["State"]).count()
v=t["Place Name"]
print(type(v))
print([i.transpose()])
#pd.DataFrame(data=[i,v],columns=["State","Place Count"])
#import seaborn as sns
#sns.catplot(x=i,y=v,kind="box")

<class 'pandas.core.series.Series'>
[array(['Andaman & Nicobar Islands', 'Andhra Pradesh', 'Arunachal Pradesh',
       'Assam', 'Bihar', 'Chandigarh', 'Chattisgarh',
       'Dadra & Nagar Haveli', 'Daman & Diu', 'Delhi', 'Goa', 'Gujarat',
       'Haryana', 'Himachal Pradesh', 'Jammu & Kashmir', 'Jharkhand',
       'Karnataka', 'Kerala', 'Lakshadweep', 'Madhya Pradesh',
       'Maharashtra', 'Manipur', 'Meghalaya', 'Mizoram', 'Nagaland',
       'Odisha', 'Pondicherry', 'Punjab', 'Rajasthan', 'Sikkim',
       'Tamil Nadu', 'Telangana', 'Tripura', 'Uttar Pradesh',
       'Uttarakhand', 'West Bengal'], dtype=object)]


In [9]:
d=result[result['Province'].isna()].index
for x in d:
    result['Province'].fillna(value=result['County'][x],limit=1,inplace=True)

In [10]:
result.shape

(8255, 8)

##### Function to take input from user where he wants to set up.

In [11]:
def get_city():
    print("Enter city : ")
    city=input()
    return city

##### Function to Create a map of city passed with neighborhoods superimposed on top.

In [12]:
def city_map(city):
    #geolocator = Nominatim(user_agent="ny_explorer")
    #location = geolocator.geocode(city)
    #Here the latitude and longitude are hard-coded because the library doesn`t work in India, otherwise you can uncomment the above two mentioned lines
    latitude = 28.7041
    longitude = 77.1025
    map1= folium.Map(location=[latitude, longitude], zoom_start=10)
    data_city=result[result['County']==city]
    data_city= data_city.groupby('Province').agg({'Postal Code':'first','Place Name': ', '.join,'Province':'first', 'County': 'first', 'State':'first','place_name':'first','latitude':'first','longitude':'first'}).reset_index(drop=True)
    #Adding markers to the map
    venues_list=[]
    for lat,lon,place,county in zip(data_city['latitude'],data_city['longitude'],data_city['place_name'],data_city['Province']):
        label = '{}, {}'.format(place,county)
        label = folium.Popup(label, parse_html=True)
        folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map1)
    urls=foursquare(data_city)
    for url,county in zip(urls,data_city['Province']):
        req=call(url)
        d=convert_jsontopandas(req,county)
        venues_list.append(d)
    results=convert_pandas(venues_list)
    return (map1,results,data_city)

##### Function to do connect to FOURSQUARE API and get data

In [13]:
def four_api(lat,lon):
    CLIENT_ID = 'C3OS12MYBUFN0J1VXGD004FN3G4KIN0TFETPANW1EDWLJQE2' # your Foursquare ID
    CLIENT_SECRET = '4HOYTJLN1LEZCOLQ3Z1ISKLFRUD5IQMEQFCVLPEABNWT5T5B' # your Foursquare Secret
    VERSION = '20180604'
    LIMIT = 100 # limit of number of venues returned by Foursquare API
    radius = 500 # define radius
    url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat,
        lon,
        radius, 
        LIMIT)
    return url
def foursquare(data_city):
    urls=[]
    for lat,lon in zip(data_city["latitude"],data_city["longitude"]):
        url=four_api(lat,lon)
        urls.append(url)
    return urls

##### Function to call url and get .json data

In [14]:
def call(url):
    http_proxy  = "http://GTU4KOR:Sunamya@070597@rb-proxy-apac.bosch.com:8080"
    https_proxy = "https://GTU4KOR:Sunamya@070597@rb-proxy-apac.bosch.com:8080"
    proxyDict = { 
                  "http"  : http_proxy, 
                  "https" : https_proxy
                }
    r = requests.get(url, proxies=proxyDict).json()
    return r

##### Function to get category

In [15]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

##### Funtion to convert .json to Pandas

In [16]:
def convert_jsontopandas(results,county):
    d=[]
    try:
        venues = results['response']['groups'][0]['items']
        lat=results['response']['suggestedBounds']['ne']['lat']
        lon=results['response']['suggestedBounds']['ne']['lng']
        temp=results['response']['totalResults']
        nearby_venues = json_normalize(venues) # flatten JSON
        if temp==0:
            d=[(county,lat,lon,"No venue Found",lat,lon,"No Category")]
        else:
            d=[(county,lat, lon,v['venue']['name'],v['venue']['location']['lat'],v['venue']['location']['lng'],v['venue']['categories'][0]['name']) for v in venues]
    except Exception as e:
        print("Error : ",e)
    return(d)

##### Function to convert to pandas

In [17]:
def convert_pandas(venues_list):
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns=['County','Place Latitude','Place Longitude','Venue Name','Venue Latitude','Venue Longitude','Venue Categories']
    return nearby_venues

##### Analyze Each Neighborhood and printing the neighbourhood Analysis

In [18]:
def analyze(v):
    print(v.groupby('County').count()) #Counting the no of categories based on County
    ny_onehot = pd.get_dummies(v[['Venue Categories']], prefix="", prefix_sep="")
    ny_onehot['Province'] = v['County'] 
    fixed_columns = [ny_onehot.columns[-1]] + list(ny_onehot.columns[:-1])
    ny_onehot = ny_onehot[fixed_columns]
    #Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category
    ny_grouped = ny_onehot.groupby('Province').mean().reset_index()
    num_top_venues = 5
    for hood in ny_grouped['Province']:
        print("----"+hood+"----")
        temp = ny_grouped[ny_grouped['Province'] == hood].T.reset_index()
        temp.columns = ['Venue categories','Frequency']
        temp = temp.iloc[1:]
        temp['Frequency'] = temp['Frequency'].astype(float)
        temp = temp.round({'Frequency': 2})
        print(temp.sort_values('Frequency', ascending=False).reset_index(drop=True).head(num_top_venues))
        print('\n')
    return ny_grouped

##### Let's put that into a pandas dataframe
First, let's write a function to sort the venues in descending order.

In [19]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]
def convert(ny_grouped):
    y,num_top_venues=ny_grouped.shape
    num_top_venues=num_top_venues-1
    indicators = ['st', 'nd', 'rd']

    # create columns according to number of top venues
    columns = ['Province']
    for ind in np.arange(num_top_venues):
        try:
            columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
        except:
            columns.append('{}th Most Common Venue'.format(ind+1))

    # create a new dataframe
    neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
    neighborhoods_venues_sorted['Province'] = ny_grouped['Province']
    for ind in np.arange(ny_grouped.shape[0]):
        neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_grouped.iloc[ind, :], num_top_venues)
    return neighborhoods_venues_sorted

#### Clustering Neighbourhood

In [20]:
def cluster(ny_grouped,neighborhoods_venues_sorted):
    # set number of clusters
    kclusters = 3

    ny_grouped_clustering = ny_grouped.drop('Province', 1)

    # run k-means clustering
    kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ny_grouped_clustering)

    # check cluster labels generated for each row in the dataframe
    kmeans.labels_[0:3]

    # add clustering labels
    neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

    ny_merged = data1[data1['County']==city]

    # merge toronto_grouped with toronto_data to add latitude/longitude for each neighborhood
    ny_merged = ny_merged.join(neighborhoods_venues_sorted.set_index('Province'), on='Province')

    return (ny_merged,kclusters)  # check the last columns!

#### Creating Map to demonstrate different cluster

In [21]:
def maps(ny_merged,kclusters):
    latitude = 28.7041
    longitude = 77.1025
    map_clusters = folium.Map(location=[latitude, longitude], zoom_start=8)

    # set color scheme for the clusters
    x = np.arange(kclusters)
    ys = [i + x + (i*x)**2 for i in range(kclusters)]
    colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
    rainbow = [colors.rgb2hex(i) for i in colors_array]

    # add markers to the map
    markers_colors = []
    for lat, lon, poi, cluster in zip(ny_merged['latitude'], ny_merged['longitude'], ny_merged['Province'], ny_merged['Cluster Labels']):
        label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
        folium.CircleMarker(
            [lat, lon],
            radius=5,
            popup=label,
            color=rainbow[cluster-1],
            fill=True,
            fill_color=rainbow[cluster-1],
            fill_opacity=0.7).add_to(map_clusters)

    return map_clusters

##### Printing different clusters

In [22]:
def printcluster(ny_merged):
    y=ny_merged['Cluster Labels'].unique()
    for i in y:
        print(ny_merged.loc[ny_merged['Cluster Labels'] == i, ny_merged.columns[[1,2] + list(range(5, ny_merged.shape[1]))]])

### Main body to call the program

In [23]:
city=get_city()
map1,v,data1=city_map(city)
ny_grouped=analyze(v)
neighborhoods_venues_sorted=convert(ny_grouped)
ny_merged,kclusters=cluster(ny_grouped,neighborhoods_venues_sorted)
map2=maps(ny_merged,kclusters)
printcluster(ny_merged)
#map1

Enter city : 
New Delhi
                   Place Latitude  Place Longitude  Venue Name  \
County                                                           
Delhi                           4                4           4   
New Delhi                      60               60          60   
New Delhi Central               2                2           2   

                   Venue Latitude  Venue Longitude  Venue Categories  
County                                                                
Delhi                           4                4                 4  
New Delhi                      60               60                60  
New Delhi Central               2                2                 2  
----Delhi----
        Venue categories  Frequency
0                    ATM       0.25
1         Clothing Store       0.25
2      Electronics Store       0.25
3      Food & Drink Shop       0.25
4  Portuguese Restaurant       0.00


----New Delhi----
     Venue categories  Frequency
0      

In [24]:
map1

In [25]:
map2

In [26]:
ny_merged

Unnamed: 0,Postal Code,Place Name,Province,County,State,place_name,latitude,longitude,Cluster Labels,1st Most Common Venue,...,24th Most Common Venue,25th Most Common Venue,26th Most Common Venue,27th Most Common Venue,28th Most Common Venue,29th Most Common Venue,30th Most Common Venue,31th Most Common Venue,32th Most Common Venue,33th Most Common Venue
0,110043,"Gopal Nagar, Arjun Park, Shyam Vihar, Najafgar...",Delhi,New Delhi,Delhi,Najafgarh,28.6125,76.9847,0,ATM,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,Fast Food Restaurant
1,110001,"New Delhi G.P.O., Parliament House, Connaught ...",New Delhi,New Delhi,Delhi,Connaught Place,28.6333,77.2167,2,Café,...,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Bakery,Stadium,Electronics Store,ATM
2,110002,"Civic Centre, Darya Ganj, Minto Road, Indrapra...",New Delhi Central,New Delhi,Delhi,Darya Ganj,28.6333,77.25,1,Stadium,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,ATM


In [27]:
ny_merged.loc[ny_merged['Cluster Labels'] == 0, ny_merged.columns[[1,2] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Place Name,Province,place_name,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,24th Most Common Venue,25th Most Common Venue,26th Most Common Venue,27th Most Common Venue,28th Most Common Venue,29th Most Common Venue,30th Most Common Venue,31th Most Common Venue,32th Most Common Venue,33th Most Common Venue
0,"Gopal Nagar, Arjun Park, Shyam Vihar, Najafgar...",Delhi,Najafgarh,28.6125,76.9847,0,ATM,Clothing Store,Food & Drink Shop,Electronics Store,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,Fast Food Restaurant


In [28]:
ny_merged.loc[ny_merged['Cluster Labels'] == 1, ny_merged.columns[[1,2] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Place Name,Province,place_name,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,24th Most Common Venue,25th Most Common Venue,26th Most Common Venue,27th Most Common Venue,28th Most Common Venue,29th Most Common Venue,30th Most Common Venue,31th Most Common Venue,32th Most Common Venue,33th Most Common Venue
2,"Civic Centre, Darya Ganj, Minto Road, Indrapra...",New Delhi Central,Darya Ganj,28.6333,77.25,1,Stadium,Falafel Restaurant,Asian Restaurant,BBQ Joint,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,ATM


In [30]:
ny_merged.loc[ny_merged['Cluster Labels'] == 2, ny_merged.columns[[1,2] + list(range(5, ny_merged.shape[1]))]]

Unnamed: 0,Place Name,Province,place_name,latitude,longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,...,24th Most Common Venue,25th Most Common Venue,26th Most Common Venue,27th Most Common Venue,28th Most Common Venue,29th Most Common Venue,30th Most Common Venue,31th Most Common Venue,32th Most Common Venue,33th Most Common Venue
1,"New Delhi G.P.O., Parliament House, Connaught ...",New Delhi,Connaught Place,28.6333,77.2167,2,Café,Chinese Restaurant,Indian Restaurant,Bar,...,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Bakery,Stadium,Electronics Store,ATM


In [33]:
neighborhoods_venues_sorted

Unnamed: 0,Cluster Labels,Province,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,...,24th Most Common Venue,25th Most Common Venue,26th Most Common Venue,27th Most Common Venue,28th Most Common Venue,29th Most Common Venue,30th Most Common Venue,31th Most Common Venue,32th Most Common Venue,33th Most Common Venue
0,0,Delhi,ATM,Clothing Store,Food & Drink Shop,Electronics Store,Bakery,Bar,Bistro,Café,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,Fast Food Restaurant
1,2,New Delhi,Café,Chinese Restaurant,Indian Restaurant,Bar,Fast Food Restaurant,Pub,Coffee Shop,Lounge,...,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Bakery,Stadium,Electronics Store,ATM
2,1,New Delhi Central,Stadium,Falafel Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bar,Bistro,Café,...,Jazz Club,Lounge,Miscellaneous Shop,Molecular Gastronomy Restaurant,Multiplex,North Indian Restaurant,Plaza,Portuguese Restaurant,Pub,ATM
