# Capstone Project - The Battle of Neighborhoods: Istanbul

In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np

## Data

Get Istanbul's district list from Wikipedia:

In [2]:
istanbul_df = pd.read_html("https://en.wikipedia.org/wiki/List_of_districts_of_Istanbul")[0]
istanbul_df

Unnamed: 0,District,Population (2019),Area (km²),Density (per km²),Mensual household income TL(USD),Annual household income TL(USD)
0,Adalar,15238,11.05,1379,6.652₺ (918$),"79.821₺ (10,978$)"
1,Arnavutköy,282488,450.35,627,2.030₺ (279$),"24.360₺ (3,350$)"
2,Ataşehir,425094,25.23,16849,6.577₺ (904$),"78.924₺ (10,854$)"
3,Avcılar,448882,42.01,10685,3.662₺ (503$),"43.938₺ (6,064$)"
4,Bağcılar,745125,22.36,33324,3.197₺ (441$),"38.367₺ (5,295$)"
5,Bahçelievler,611059,16.62,36766,4.674₺ (645$),"56.088₺ (7,741$)"
6,Bakırköy,229239,29.64,7734,"8.845₺ (1,220$)","106.140₺ (14,650$)"
7,Başakşehir,460259,104.30,4413,4.513₺ (622$),"54.152₺ (7,474$)"
8,Bayrampaşa,274735,9.61,28588,3.480₺ (480$),"41.762₺ (5,764$)"
9,Beşiktaş,182649,18.01,10142,"10.560₺ (1,457$)","126.720₺ (17,490$)"


Remove all non district rows

In [3]:
istanbul_df = istanbul_df.head(-4)
istanbul_df

Unnamed: 0,District,Population (2019),Area (km²),Density (per km²),Mensual household income TL(USD),Annual household income TL(USD)
0,Adalar,15238,11.05,1379,6.652₺ (918$),"79.821₺ (10,978$)"
1,Arnavutköy,282488,450.35,627,2.030₺ (279$),"24.360₺ (3,350$)"
2,Ataşehir,425094,25.23,16849,6.577₺ (904$),"78.924₺ (10,854$)"
3,Avcılar,448882,42.01,10685,3.662₺ (503$),"43.938₺ (6,064$)"
4,Bağcılar,745125,22.36,33324,3.197₺ (441$),"38.367₺ (5,295$)"
5,Bahçelievler,611059,16.62,36766,4.674₺ (645$),"56.088₺ (7,741$)"
6,Bakırköy,229239,29.64,7734,"8.845₺ (1,220$)","106.140₺ (14,650$)"
7,Başakşehir,460259,104.3,4413,4.513₺ (622$),"54.152₺ (7,474$)"
8,Bayrampaşa,274735,9.61,28588,3.480₺ (480$),"41.762₺ (5,764$)"
9,Beşiktaş,182649,18.01,10142,"10.560₺ (1,457$)","126.720₺ (17,490$)"


In [4]:
istanbul_df.shape

(39, 6)

Now we have all the 39 districts. For a better handling, we will rename the column values as follows

In [5]:
istanbul_df.rename(columns={'Population (2019)': 'Population'}, inplace=True)
istanbul_df.rename(columns={'Area (km²)': 'Area'}, inplace=True)
istanbul_df.rename(columns={'Density (per km²)': 'Density'}, inplace=True)
istanbul_df.rename(columns={'Mensual household income TL(USD)': 'Mensual'}, inplace=True)
istanbul_df.rename(columns={'Annual household income TL(USD)': 'Annual'}, inplace=True)
istanbul_df.columns.values

array(['District', 'Population', 'Area', 'Density', 'Mensual', 'Annual'],
      dtype=object)

Append latitude, longitude and side columns to the dataframe

In [6]:
header_list = istanbul_df.columns.tolist()
print('Before:', header_list)
header_list.extend(['Latitude','Longitude','Side'])
print('After adding:', header_list)

istanbul_df = istanbul_df.reindex(columns = header_list) 

istanbul_df.head()

Before: ['District', 'Population', 'Area', 'Density', 'Mensual', 'Annual']
After adding: ['District', 'Population', 'Area', 'Density', 'Mensual', 'Annual', 'Latitude', 'Longitude', 'Side']


Unnamed: 0,District,Population,Area,Density,Mensual,Annual,Latitude,Longitude,Side
0,Adalar,15238,11.05,1379,6.652₺ (918$),"79.821₺ (10,978$)",,,
1,Arnavutköy,282488,450.35,627,2.030₺ (279$),"24.360₺ (3,350$)",,,
2,Ataşehir,425094,25.23,16849,6.577₺ (904$),"78.924₺ (10,854$)",,,
3,Avcılar,448882,42.01,10685,3.662₺ (503$),"43.938₺ (6,064$)",,,
4,Bağcılar,745125,22.36,33324,3.197₺ (441$),"38.367₺ (5,295$)",,,


Fix column data types. Current data types

In [7]:
istanbul_df.dtypes

District       object
Population      int64
Area           object
Density         int64
Mensual        object
Annual         object
Latitude      float64
Longitude     float64
Side          float64
dtype: object

In [8]:
istanbul_df = istanbul_df.astype({ "Side":str, "Area":np.float64 })
istanbul_df.dtypes

District       object
Population      int64
Area          float64
Density         int64
Mensual        object
Annual         object
Latitude      float64
Longitude     float64
Side           object
dtype: object

Set side, latitude and longitude values

In [9]:
from geopy.geocoders import Nominatim
from functools import partial

geolocator = Nominatim(user_agent="istanbul_explorer")

# Set turkish language 
geocode = partial(geolocator.geocode, language="tr")

for i, row in istanbul_df.iterrows():
    district = 'Turkey, Istanbul, ' + row[0]
    location = geocode(district)
    print(location.address, location.latitude, location.longitude)
    
    # if longitude is bigger than 29.008, it's a asian district
    side = "Asian" if (location.longitude > 29.008) else "European"
    
    istanbul_df.at[i, 'Latitude'] = location.latitude
    istanbul_df.at[i, 'Longitude'] = location.longitude
    istanbul_df.at[i, 'Side'] = side

istanbul_df.head()

Adalar, İstanbul, Marmara Bölgesi, Türkiye 40.87625945 29.091027262109563
Arnavutköy, İstanbul, Marmara Bölgesi, 33345, Türkiye 41.0683942 29.0411538
Ataşehir, İstanbul, Marmara Bölgesi, Türkiye 40.9847487 29.1067199
Avcılar, İstanbul, Marmara Bölgesi, Türkiye 40.9801353 28.7175465
Bağcılar, İstanbul, Marmara Bölgesi, Türkiye 41.0338992 28.8578982
Bahçelievler, İstanbul, Marmara Bölgesi, 34180, Türkiye 40.9991724 28.8612556
Bakırköy, İstanbul, Marmara Bölgesi, 34147, Türkiye 40.9835414 28.8679735
Başakşehir, İstanbul, Marmara Bölgesi, Türkiye 41.0976935 28.8061626
Bayrampaşa, İstanbul, Marmara Bölgesi, Türkiye 41.0357375 28.9122605
Beşiktaş, İstanbul, Marmara Bölgesi, 34022, Türkiye 41.0428465 29.0075283
Beykoz, İstanbul, Marmara Bölgesi, 34820, Türkiye 41.1239355 29.1083151
Beylikdüzü, İstanbul, Marmara Bölgesi, Türkiye 41.0010262 28.6419843
Beyoğlu, İstanbul, Marmara Bölgesi, 34421, Türkiye 41.0284233 28.9736808
Büyükçekmece, İstanbul, Marmara Bölgesi, 34500, Türkiye 41.0156913 28.59

Unnamed: 0,District,Population,Area,Density,Mensual,Annual,Latitude,Longitude,Side
0,Adalar,15238,11.05,1379,6.652₺ (918$),"79.821₺ (10,978$)",40.876259,29.091027,Asian
1,Arnavutköy,282488,450.35,627,2.030₺ (279$),"24.360₺ (3,350$)",41.068394,29.041154,Asian
2,Ataşehir,425094,25.23,16849,6.577₺ (904$),"78.924₺ (10,854$)",40.984749,29.10672,Asian
3,Avcılar,448882,42.01,10685,3.662₺ (503$),"43.938₺ (6,064$)",40.980135,28.717547,European
4,Bağcılar,745125,22.36,33324,3.197₺ (441$),"38.367₺ (5,295$)",41.033899,28.857898,European


In [10]:
istanbul_df

Unnamed: 0,District,Population,Area,Density,Mensual,Annual,Latitude,Longitude,Side
0,Adalar,15238,11.05,1379,6.652₺ (918$),"79.821₺ (10,978$)",40.876259,29.091027,Asian
1,Arnavutköy,282488,450.35,627,2.030₺ (279$),"24.360₺ (3,350$)",41.068394,29.041154,Asian
2,Ataşehir,425094,25.23,16849,6.577₺ (904$),"78.924₺ (10,854$)",40.984749,29.10672,Asian
3,Avcılar,448882,42.01,10685,3.662₺ (503$),"43.938₺ (6,064$)",40.980135,28.717547,European
4,Bağcılar,745125,22.36,33324,3.197₺ (441$),"38.367₺ (5,295$)",41.033899,28.857898,European
5,Bahçelievler,611059,16.62,36766,4.674₺ (645$),"56.088₺ (7,741$)",40.999172,28.861256,European
6,Bakırköy,229239,29.64,7734,"8.845₺ (1,220$)","106.140₺ (14,650$)",40.983541,28.867974,European
7,Başakşehir,460259,104.3,4413,4.513₺ (622$),"54.152₺ (7,474$)",41.097693,28.806163,European
8,Bayrampaşa,274735,9.61,28588,3.480₺ (480$),"41.762₺ (5,764$)",41.035738,28.91226,European
9,Beşiktaş,182649,18.01,10142,"10.560₺ (1,457$)","126.720₺ (17,490$)",41.042847,29.007528,European


Fix wrong side value:

In [11]:
istanbul_df['Side'][29] = 'European' #Sarıyer
istanbul_df['Side'][1] = 'European' #Arnavutköy

Get the geograpical coordinates of Istanbul

In [12]:
address = 'Turkey, Istanbul'

istanbul_location = geocode(address)
istanbul_latitude = istanbul_location.latitude
istanbul_longitude = istanbul_location.longitude

print('The geograpical coordinates of {} are {}, {}.'.format(address, istanbul_latitude, istanbul_longitude))

The geograpical coordinates of Turkey, Istanbul are 41.0096334, 28.9651646.


Now, let's viusal the Istanbul's districts

In [16]:
import folium # to create maps
import unidecode # convert label to unicode

# create a map of Istanbul using latitude and longitude 
map_istanbul = folium.Map(location=[istanbul_latitude, istanbul_longitude], zoom_start=9)

# add markers to the map
for lat, lng, label, side in zip(istanbul_df['Latitude'], istanbul_df['Longitude'], istanbul_df['District'],istanbul_df['Side']):
    popup = folium.Popup(unidecode.unidecode(label), parse_html=True)
    color='blue' if side == 'Asian' else 'red'
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=popup,
        color=color,
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_istanbul) 
    
map_istanbul