In [1]:
#Importing all the required libraries.

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import requests
import folium 
import seaborn as sns
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

In [2]:
df = pd.read_csv('yangon.csv')

In [3]:
df.head(10)

Unnamed: 0.1,Unnamed: 0,Borough,Neighborhoods,Population,Longitude,Latitude
0,0,Western District (Downtown),Ahlon,41200,96.127778,16.783056
1,1,Western District (Downtown),Bahan,81000,96.152222,16.808056
2,2,Western District (Downtown),Dagon,25082,96.149722,16.780833
3,3,Western District (Downtown),Kyauktada,37000,96.158756,16.774422
4,4,Western District (Downtown),Kyimyindaing,73200,96.124167,16.803056
5,5,Western District (Downtown),Lanmadaw,47160,96.149722,16.780833
6,6,Western District (Downtown),Seikkan,1250,96.15,16.766667
7,7,Western District (Downtown),Latha,27500,96.149722,16.780833
8,8,Western District (Downtown),Pabedan,40000,96.155556,16.780278
9,9,Western District (Downtown),Sanchaung,65300,96.135,16.806389


In [4]:
df.drop('Unnamed: 0',axis=1,inplace=True)

Creating 'Population'dataframe from the main dataframe

In [5]:
yangon_population = pd.DataFrame(df[['Borough','Neighborhoods','Population']])

In [6]:
yangon_population.head()

Unnamed: 0,Borough,Neighborhoods,Population
0,Western District (Downtown),Ahlon,41200
1,Western District (Downtown),Bahan,81000
2,Western District (Downtown),Dagon,25082
3,Western District (Downtown),Kyauktada,37000
4,Western District (Downtown),Kyimyindaing,73200


In [7]:
yangon_population.to_csv('population_dataset.csv')

In [8]:
yangon_latitude = df['Latitude'].mean()
yangon_longitude = df['Longitude'].mean()
print("Latitude and Longitude of Yangon is : ",yangon_latitude,yangon_longitude)

Latitude and Longitude of Yangon is :  16.813652484848486 96.15452463636362


In [9]:
CLIENT_ID = 'DRNSKMKJ4BAH5R4P5FKPBWNQ1KN33TH5GMR30G0HTJKX5KVI'
CLIENT_SECRET = 'KJKHXUKCES5AAWNGE4VGH1NLGAYV4OWSLN02C2CJ4PIPQAO4'
VERSION = '20180604'
LIMIT = 150

In [10]:
unique_boroughs_of_yangon = df['Borough'].unique().tolist()

In [11]:
unique_boroughs_of_yangon

['Western District (Downtown)',
 'Eastern District',
 'Southern District',
 'Northern District']

In [12]:
borough_colors ={}
for i in unique_boroughs_of_yangon:
    borough_colors[i] = '#%02X%02X%02X' % tuple(np.random.choice(range(256),size=3))

In [13]:
yangon_map = folium.Map(location=[yangon_latitude,yangon_longitude],zoom_start=12,control_scale=True)

In [14]:
for lat,lng,boro,nei in zip(df['Latitude'],
                           df['Longitude'],
                           df['Borough'],
                           df['Neighborhoods']):
    label_text = boro + ' - ' + nei
    label = folium.Popup(label_text,parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    tooltip = label_text,
    radius = 4,
    popup = label,
    color=borough_colors[boro],
    fill=True,
    fill_color = borough_colors[boro],
    fill_opacity=0.7).add_to(yangon_map)

In [15]:
yangon_map

# Exploring Yangon Neighborhoods using FourSquare API

In [16]:
def getNearbyVenues(names, boro, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, boro, lat, lng in zip(names, boro, latitudes, longitudes):
        print("Fetching venues for : ",name)            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            boro,
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood',
                             'Borough',
                             'Neighborhood Latitude', 
                             'Neighborhood Longitude', 
                             'Venue', 
                             'Venue Latitude', 
                             'Venue Longitude', 
                             'Venue Category']
    
    return(nearby_venues)

In [17]:
yangon_venues = getNearbyVenues(names=df['Neighborhoods'],latitudes=df['Latitude'],longitudes=df['Longitude'],boro=df['Borough'])

Fetching venues for :  Ahlon
Fetching venues for :  Bahan
Fetching venues for :  Dagon
Fetching venues for :  Kyauktada
Fetching venues for :  Kyimyindaing
Fetching venues for :  Lanmadaw
Fetching venues for :  Seikkan
Fetching venues for :  Latha
Fetching venues for :  Pabedan
Fetching venues for :  Sanchaung
Fetching venues for :  Dagon Seikkan
Fetching venues for :  East Dagon
Fetching venues for :  North Dagon
Fetching venues for :  North Okkalapa
Fetching venues for :  South Dagon
Fetching venues for :  South Okkalapa
Fetching venues for :  Thingangyun
Fetching venues for :  Dala
Fetching venues for :  Dawbon
Fetching venues for :  Botataung
Fetching venues for :  Mingala Taungnyunt
Fetching venues for :  Seikkyi Kanaungto
Fetching venues for :  Tamwe
Fetching venues for :  Pazundaung
Fetching venues for :  Thaketa
Fetching venues for :  Yankin
Fetching venues for :  Insein
Fetching venues for :  Hlaing
Fetching venues for :  Hlaingthaya
Fetching venues for :  Kamayut
Fetching ven

In [18]:
print("Total number of venues found in Yangon are : ",yangon_venues.shape[0])

Total number of venues found in Yangon are :  238


In [19]:
yangon_venues.head(5)

Unnamed: 0,Neighborhood,Borough,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Ahlon,Western District (Downtown),16.783056,96.127778,Urban 86 Food Court,16.782257,96.126869,Food Stand
1,Ahlon,Western District (Downtown),16.783056,96.127778,Sein Gay Har @ Pyay Road,16.781087,96.127681,Shopping Mall
2,Ahlon,Western District (Downtown),16.783056,96.127778,Meringue Café,16.78616,96.128021,Café
3,Ahlon,Western District (Downtown),16.783056,96.127778,Apple Hotpot & Dim Sum,16.786116,96.130737,Dim Sum Restaurant
4,Ahlon,Western District (Downtown),16.783056,96.127778,Western Park Ruby,16.786108,96.130772,Dumpling Restaurant


In [20]:
yangon_venues.groupby('Venue Category').count()['Neighborhood'].sort_values(ascending=False).head(10)

Venue Category
Café                   19
Hotel                  17
Noodle House           11
Restaurant             10
Shopping Mall          10
Japanese Restaurant     9
Bakery                  8
Burmese Restaurant      7
Hostel                  7
Supermarket             7
Name: Neighborhood, dtype: int64

In [21]:
print("Total number of unique categories in yangon are : ",len(yangon_venues['Venue Category'].unique()))

Total number of unique categories in yangon are :  73


### Getting number of venues per neighborhood

In [22]:
individual_yangon_venue_count = yangon_venues.groupby(['Borough','Neighborhood'])['Borough'].count().to_frame()

In [23]:
individual_yangon_venue_count

Unnamed: 0_level_0,Unnamed: 1_level_0,Borough
Borough,Neighborhood,Unnamed: 2_level_1
Eastern District,South Okkalapa,4
Eastern District,Thingangyun,5
Northern District,Hlaing,4
Northern District,Kamayut,31
Northern District,Mingaladon,5
Northern District,Shwepyitha,1
Southern District,Botataung,1
Southern District,Mingala Taungnyunt,5
Southern District,Pazundaung,5
Southern District,Tamwe,20


In [24]:
individual_yangon_venue_count.rename(columns={'Borough':'NumberOfVenues'},inplace=True)

In [25]:
individual_yangon_venue_count.reset_index(inplace=True)

In [26]:
individual_yangon_venue_count

Unnamed: 0,Borough,Neighborhood,NumberOfVenues
0,Eastern District,South Okkalapa,4
1,Eastern District,Thingangyun,5
2,Northern District,Hlaing,4
3,Northern District,Kamayut,31
4,Northern District,Mingaladon,5
5,Northern District,Shwepyitha,1
6,Southern District,Botataung,1
7,Southern District,Mingala Taungnyunt,5
8,Southern District,Pazundaung,5
9,Southern District,Tamwe,20


In [None]:
sns.set(rc={'figure.figsize':(11,16)})
plot = sns.barplot(x='Neighborhood',y='NumberOfVenues',data=individual_yangon_venue_count)
plot.set_xticklabels(individual_yangon_venue_count['Neighborhood'],rotation=90)

[Text(0, 0, 'South Okkalapa'),
 Text(0, 0, 'Thingangyun'),
 Text(0, 0, 'Hlaing'),
 Text(0, 0, 'Kamayut'),
 Text(0, 0, 'Mingaladon'),
 Text(0, 0, 'Shwepyitha'),
 Text(0, 0, 'Botataung'),
 Text(0, 0, 'Mingala Taungnyunt'),
 Text(0, 0, 'Pazundaung'),
 Text(0, 0, 'Tamwe'),
 Text(0, 0, 'Thaketa'),
 Text(0, 0, 'Yankin'),
 Text(0, 0, 'Ahlon'),
 Text(0, 0, 'Bahan'),
 Text(0, 0, 'Dagon'),
 Text(0, 0, 'Kyauktada'),
 Text(0, 0, 'Lanmadaw'),
 Text(0, 0, 'Latha'),
 Text(0, 0, 'Pabedan'),
 Text(0, 0, 'Sanchaung')]

From the above graph we can see that inderanagar has most number of venues and soo on...

## Exploring Kyauktada venues

In [None]:
yangon_venues[yangon_venues['Neighborhood']=='Kyauktada']

# One hot encoding for letting us to compare different venues based on some common scale

In [None]:
yangon_venues_onehot = pd.get_dummies(yangon_venues[['Venue Category']])

In [None]:
yangon_venues_onehot

In [None]:
yangon_venues_onehot['Neighborhood'] = yangon_venues['Neighborhood']
yangon_venues_grouped = yangon_venues_onehot.groupby('Neighborhood').mean().reset_index()
yangon_venues_grouped

In [None]:
number_of_top_venues = 5

In [None]:
for hood in yangon_venues_grouped['Neighborhood']:
    print('*********',hood,'**********')
    temp = yangon_venues_grouped[yangon_venues_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['Venue','Frequency']
    temp = temp.iloc[1:]
    temp['Frequency'] = temp['Frequency'].astype(float)
    temp = temp.round({'Frequency': 2})
    print(temp.sort_values('Frequency', ascending=False).reset_index(drop=True).head(number_of_top_venues))
    print('\n')

Frequency of each neighborhood and its top 5 venues can be known. This is very important is we can analyze top neighborhoods with most busy restaurants

In [None]:
def return_most_common_venues(row, number_of_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    return row_categories_sorted.index.values[0:number_of_top_venues]

In [None]:
number_of_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(number_of_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = yangon_venues_grouped['Neighborhood']

for ind in np.arange(yangon_venues_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(yangon_venues_grouped.iloc[ind, :], number_of_top_venues)

neighborhoods_venues_sorted.head()

In [None]:
neighborhoods_venues_sorted.shape

## Next challenge is to find the optimal k value for clustering and we do it using the elbow method

In [None]:
sse = {}
for k in range(1,15):
    kmeans = KMeans(n_clusters=k,random_state=0)
    kmeans.fit(yangon_venues_grouped.drop('Neighborhood',axis=1))
    yangon_venues_grouped['Cluster'] = kmeans.labels_
    sse[k] = kmeans.inertia_

plt.figure()
plt.plot(list(sse.keys()), list(sse.values()))
plt.xlabel("Number of cluster")
plt.ylabel("SSE")
plt.show()

### From the above graph, we can see the optimal value for cluster is 5.

In [None]:
kmeans = KMeans(n_clusters=5,random_state=0)

In [None]:
kmeans.fit(yangon_venues_grouped.drop('Neighborhood',axis=1))

In [None]:
yangon_venues_grouped['Cluster'] = kmeans.labels_

In [None]:
yangon_venues_grouped.groupby('Cluster')['Neighborhood'].count()

In [None]:
yangon_venues_grouped.columns

In [None]:
neighborhoods_venues_sorted = neighborhoods_venues_sorted.merge(yangon_venues_grouped,on='Neighborhood')

In [None]:
neighborhoods_venues_sorted.head(4)

In [None]:
neighborhoods_venues_sorted.columns
neighborhoods_venues_sorted = neighborhoods_venues_sorted.merge(yangon_venues,on='Neighborhood')

In [None]:
# create map
map_clusters = folium.Map(location=[yangon_latitude, yangon_longitude], zoom_start=11)

In [None]:
# set color scheme for the clusters
x = np.arange(6)
ys = [i + x + (i*x)**2 for i in range(6)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(neighborhoods_venues_sorted['Neighborhood Latitude'], neighborhoods_venues_sorted['Neighborhood Longitude'], neighborhoods_venues_sorted['Neighborhood'], neighborhoods_venues_sorted['Cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       

In [None]:
map_clusters

## Finding similar locations based on user input (recommending location)

In [None]:
yangon_population.head()

In [None]:
yangon_population['Normalized_population'] = yangon_population['Population']/yangon_population['Population'].max(axis=0)
yangon_population.head()

In [None]:
yangon_venues_grouped.head()

#### recommending Veg restaurants

In [None]:
yangon_veg = yangon_venues_onehot.groupby(['Neighborhood']).sum().reset_index()
yangon_veg.head()

In [None]:
yangon_veg.rename(columns={'Neighborhood':'Neighborhoods'},inplace=True)

### Building a target neighborhood by providing a sample restaurant : say 'Kyauktada'

In [None]:
target_cluster_dataframe = neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Neighborhood']=='Kyauktada']

In [None]:
target_cluster_dataframe.reset_index()

In [None]:
target_cluster = target_cluster_dataframe.iloc[0].at['Cluster']

In [None]:
target_cluster

In [None]:
print("The target cluster is : ",target_cluster)

In [None]:
possible_neighborhoods = neighborhoods_venues_sorted[neighborhoods_venues_sorted['Cluster']==target_cluster]
possible_neighborhoods.head()

In [None]:
print("There are {} neighborhoods which has similar characteristics to Kyauktada.".format(possible_neighborhoods.shape[0]))

Selected the crowded population in Kyauktada which is the center of Yangon that has similar characteristics 195 neighborhoods.

# Thanks for your review!!!!