# Welcome to the Notebook for my final submission

##### You will notice that I supressed the part of the code that contains my API access data, for security reasons.  
##### Enjoy the rest of the code! :D

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import warnings
import urllib.request
import requests # library to handle requests
import folium
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors


from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from sklearn import preprocessing
# import k-means from clustering stage
from sklearn.cluster import KMeans

warnings.filterwarnings('ignore')

##### Crafting our database

In [2]:
#First, let's get a list of Florianópolis'neighborhoods! 
url = 'https://pt.wikipedia.org/wiki/Lista_de_distritos_e_bairros_de_Florian%C3%B3polis'
req = urllib.request.urlopen(url)
article = req.read().decode()

with open('neighb.html', 'w') as code:
    code.write(article)

In [3]:
article = open('neighb.html').read()
soup = BeautifulSoup(article, 'html.parser')
#get that table
tables = soup.find_all('table', class_='sortable')

In [4]:
# Search through the tables for the one with the headings we want.
# The wikipedia page has two tables: one for districts and one for neighborhood. 
# We are interested in the one that contains the neighborhoods
for table in tables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:5] == ['Posição', 'Bairro', 'População']:
        break

##### Please note that in the following section we will perform the table scraping while obtaining the geospatial data through an API!

In [57]:
key = 'YOUR_KEY_HERE'

In [8]:
#Lembrar de refazer essa parte e postar o metodo alternativo no medium
from opencage.geocoder import OpenCageGeocode
# Extract the columns we want and write to a colon-delimited text file.
with open('pop_city.csv', 'w') as pop:
    print('position,', 'neighborhood,', 'population,', 'latitude,', 'longitude', file = pop)
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if (not tds) or (len(tds)<3) :
            continue
        position, neighborhood, population = [td.text.strip() for td in tds[:3]]
        population = population.replace('.', '')
        #The following bit is about the API query:
        geocoder = OpenCageGeocode(key)
        query = neighborhood + ', Florianopolis, Brazil'
        results = geocoder.geocode(query)

        latitude = str(results[0]['geometry']['lat'])
        longitude = str(results[0]['geometry']['lng']) 
        print(', '.join([position, neighborhood, population, latitude, longitude]), file=pop)

##### Let us open the .csv file and see if it turned out ok:

In [9]:
df_pop = pd.read_csv('pop_city.csv', sep=r'\s*,\s*')
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude
0,1,Centro,44074,-27.597203,-48.549481
1,2,Capoeiras,19323,-27.597333,-48.590008
2,3,Trindade,15031,-27.589383,-48.5224
3,4,Agronômica,14591,-27.578145,-48.535717
4,5,Saco dos Limões,13771,-27.608268,-48.534343


## When analyzing the data obtained, I realized something was off; the data for the index #49 does not match. Let's correct that one entry manually:

In [10]:
df_pop['latitude'][df_pop['neighborhood'] == 'Santo Antônio'] = -27.511599
df_pop['longitude'][df_pop['neighborhood'] == 'Santo Antônio'] = -48.514494
df_pop.tail(36)

Unnamed: 0,position,neighborhood,population,latitude,longitude
49,50,Santo Antônio,1352,-27.511599,-48.514494
50,51,Sambaqui,1345,-27.492403,-48.526831
51,52,Ingleses Sul,1323,-27.440523,-48.364711
52,53,Bom Abrigo,1262,-27.611804,-48.59494
53,54,Jurere Oeste,1221,-27.441564,-48.491754
54,55,Porto da Lagoa,1200,-27.630842,-48.472881
55,56,Cachoeira do Bom Jesus,1199,-27.426531,-48.423675
56,57,Rio Tavares do Norte,1082,-27.59667,-48.54917
57,58,Pedregal,1034,-27.572528,-48.643225
58,59,Ratones,1023,-27.504516,-48.490399


### Now, everything looks ok!

## This is Florianópolis, and its neighborhoods:

In [11]:
# create map of Florianópolis using latitude and longitude values
map_floripa = folium.Map(location=[-27.593500, -48.558540], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(df_pop['latitude'], df_pop['longitude'], df_pop['neighborhood']):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_floripa)  
    
map_floripa

### Now, let's get the coordinates for the universities in Florianópolis
As it happens, the top rated universities in the city (according to the Brazilian index RUF) are:

* UFSC - Federal University of Santa Catarina  
* IFSC - Federal Institude of Santa Catarina  
* UDESC - University of the State of Santa Cartarina   

So, let's obtain their coordinates, shall we?

In [12]:
# Because this dataframe will only have 3 rows and 4 columns, it is hardly worth the effort to automate all of it.
# Let's fill some columns manually
unis = pd.DataFrame(columns = ['name', 'id', 'latitude', 'longitude'])
unis['name'] = ['UFSC', 'IFSC', 'UDESC']
unis['id'] = ['4baa92c1f964a520ec753ae3', '4d496f119544a093946b28e7', '540d889a498efcfbb6c4b9a9']
unis.head()

Unnamed: 0,name,id,latitude,longitude
0,UFSC,4baa92c1f964a520ec753ae3,,
1,IFSC,4d496f119544a093946b28e7,,
2,UDESC,540d889a498efcfbb6c4b9a9,,


In [58]:
# Now the fun bit
# Firing up that Foursquare API
CLIENT_ID = 'YOUR_ID_HERE' # your Foursquare ID
CLIENT_SECRET = 'YOUR_SECRET_HERE' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: YOUR_ID_HERE
CLIENT_SECRET:YOUR_SECRET_HERE


In [14]:
i = 0
for var_id in unis['id']:
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(var_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    result = requests.get(url).json()
    unis['latitude'][i] = result['response']['venue']['location']['lat']
    unis['longitude'][i] = result['response']['venue']['location']['lng']
    i = i + 1

unis.head()

Unnamed: 0,name,id,latitude,longitude
0,UFSC,4baa92c1f964a520ec753ae3,-27.5972,-48.522
1,IFSC,4d496f119544a093946b28e7,-27.5935,-48.5423
2,UDESC,540d889a498efcfbb6c4b9a9,-27.5861,-48.505


## So, where exactly are the top universities in the city located?

In [15]:
# create map of Florianópolis using latitude and longitude values
map_floripa = folium.Map(location=[-27.593500, -48.558540], zoom_start=10)

# add markers to map
for lat, lng, neighborhood in zip(unis['latitude'], unis['longitude'], unis['name']):
    label = neighborhood
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='yellow',
        fill_opacity=0.7,
        parse_html=False).add_to(map_floripa)  
    
map_floripa

##### Now that we know the locations of the top universities and neighborhoods, let's measure the distances between the neighborhoods and the universities:

In [16]:
from math import sin, cos, sqrt, atan2, radians
R = 6373.0 #Earth's approximate radius
dist = np.zeros(5)

df_pop['distance ufsc'] = float(0)
df_pop['distance ifsc'] = float(0)
df_pop['distance udesc'] = float(0)
for ind in df_pop.index:
    #getting the neighborhood's position in radians
    neigh_lat = np.radians(df_pop['latitude'][ind])
    neigh_lng = np.radians(df_pop['longitude'][ind])
    for ind2 in unis.index:
        #getting the universities' position in radians
        uni_lat = np.radians(unis['latitude'][ind2])
        uni_long = np.radians(unis['longitude'][ind2])
        #calculate the distance for each university
        dlat = uni_lat - neigh_lat
        dlon = uni_long - neigh_lng
        a = sin(dlat / 2)**2 + cos(neigh_lat) * cos(uni_lat) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))
        dist[ind2] = R * c
    df_pop['distance ufsc'][ind] = dist[0]
    df_pop['distance ifsc'][ind] = dist[1]
    df_pop['distance udesc'][ind] = dist[2]
    #falta calcular a média das distâncias e adicionar ao dataframe df_pop(dos bairros)
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,2.709233,0.819431,4.553951
1,2,Capoeiras,19323,-27.597333,-48.590008,6.70418,4.720727,8.470627
2,3,Trindade,15031,-27.589383,-48.5224,0.868572,2.015336,1.751967
3,4,Agronômica,14591,-27.578145,-48.535717,2.512783,1.824074,3.151455
4,5,Saco dos Limões,13771,-27.608268,-48.534343,1.732293,1.824608,3.801057


##### Getting the neighborhoods' areas to calculate the population density

In [17]:
area = pd.read_csv('areas.csv') # This database was scraped from the website of the state of Santa Caratina
area.columns = ['neighborhood', 'area']
print('Areas in km square:')
area.head()

Areas in km square:


Unnamed: 0,neighborhood,area
0,Centro,5.368
1,Capoeiras,2.816
2,Trindade,3.32
3,Agronômica,1.964
4,Saco dos Limões,3.106


##### Well, I have to confess that I personally crafted this database by ~~painstakingly adding the entries one by one from an unstructured governmental database~~ passionately dedicating my time to the quality of the data and, because of that, I know for sure that the dataframe for the areas is in the same order as the one we used before, so qe can go ahead and just add the data to a new column

In [18]:
df_pop['area'] = area['area']
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc,area
0,1,Centro,44074,-27.597203,-48.549481,2.709233,0.819431,4.553951,5.368
1,2,Capoeiras,19323,-27.597333,-48.590008,6.70418,4.720727,8.470627,2.816
2,3,Trindade,15031,-27.589383,-48.5224,0.868572,2.015336,1.751967,3.32
3,4,Agronômica,14591,-27.578145,-48.535717,2.512783,1.824074,3.151455,1.964
4,5,Saco dos Limões,13771,-27.608268,-48.534343,1.732293,1.824608,3.801057,3.106


##### Now, for the calculation of the population density:

In [19]:
df_pop['population density'] = df_pop['population'] / df_pop['area']
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc,area,population density
0,1,Centro,44074,-27.597203,-48.549481,2.709233,0.819431,4.553951,5.368,8210.506706
1,2,Capoeiras,19323,-27.597333,-48.590008,6.70418,4.720727,8.470627,2.816,6861.860795
2,3,Trindade,15031,-27.589383,-48.5224,0.868572,2.015336,1.751967,3.32,4527.409639
3,4,Agronômica,14591,-27.578145,-48.535717,2.512783,1.824074,3.151455,1.964,7429.226069
4,5,Saco dos Limões,13771,-27.608268,-48.534343,1.732293,1.824608,3.801057,3.106,4433.676755


##### With that, we found out that the population density is not one of the attrative factors of this city; because of that, we are gonna exclude it from the analysis

In [22]:
del(df_pop['area'])
del(df_pop['population density'])
del(df_pop['population'])
del(df_pop['position'])
del(df_pop['area'])
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,Centro,-27.597203,-48.549481,2.709233,0.819431,4.553951
1,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627
2,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967
3,Agronômica,-27.578145,-48.535717,2.512783,1.824074,3.151455
4,Saco dos Limões,-27.608268,-48.534343,1.732293,1.824608,3.801057


In [23]:
df_pop.to_csv('clean_data.csv') # Considering that we did ok so far, let's record our progress!

##### Now, the really, REALLY fun bit :D we are gonna select some amenities to look for in those neighborhoods:
* Bars
* Restaurants
* Gyms
* Markets
* Bus stops

In [24]:
bkp = df_pop # good to have some insurance, right?

In [25]:
# For this bit, it is more practical if the columns we are gonna use for this search are in Portuguese, but
# don't worry, everything will be translated at the end.
df_pop['bar'] = 0 # number of bars in the neighborhood
df_pop['restaurante'] = 0 # number of restaurants in the neighborhood
df_pop['academia'] = 0 # number of gyms in the neighborhood
df_pop['mercado'] = 0 # number of markets in the neighborhood 
df_pop['ponto de ônibus'] = 0 # number of bus stops in the neighborhood
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bar,restaurante,academia,mercado,ponto de ônibus
0,Centro,-27.597203,-48.549481,2.709233,0.819431,4.553951,0,0,0,0,0
1,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627,0,0,0,0,0
2,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,0,0,0,0,0
3,Agronômica,-27.578145,-48.535717,2.512783,1.824074,3.151455,0,0,0,0,0
4,Saco dos Limões,-27.608268,-48.534343,1.732293,1.824608,3.801057,0,0,0,0,0


In [30]:
radius = 1000 #Defining the search radius for 1km from the neighborhood center
for amenity in ['bar', 'restaurante', 'academia', 'mercado', 'ponto de ônibus']:
    search_query = amenity
    for ind in df_pop.index:
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, df_pop['latitude'][ind], df_pop['longitude'][ind], VERSION, search_query, radius, LIMIT)
        results = requests.get(url).json()# Performing API search
        print(results)
        venues = results['response']['venues']
        dataframe = json_normalize(venues)
        df_pop[amenity][ind] = dataframe.shape[0]
df_pop.head()

{'meta': {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5eb8565014a126001b951bc1'}, 'response': {}}


KeyError: 'venues'

### Ooops, looks like I did way too many queries for today! Good thing I had saved this query in a .csv file beforehand!

In [35]:
df_pop = pd.read_csv('df_pop_venues.csv')
del(df_pop['Unnamed: 0'])
del(df_pop['position'])
del(df_pop['population'])
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bar,restaurante,academia,mercado,ponto de ônibus
0,Centro,-27.597203,-48.549481,2.709233,0.819431,4.553951,50,50,47,25,9
1,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627,50,21,26,6,9
2,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5
3,Agronômica,-27.578145,-48.535717,2.512783,1.824074,3.151455,24,14,13,6,6
4,Saco dos Limões,-27.608268,-48.534343,1.732293,1.824608,3.801057,18,8,4,6,1


In [36]:
df_pop.to_csv('clean_data.csv') # Considering that we did ok so far, let's record our progress!
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bar,restaurante,academia,mercado,ponto de ônibus
0,Centro,-27.597203,-48.549481,2.709233,0.819431,4.553951,50,50,47,25,9
1,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627,50,21,26,6,9
2,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5
3,Agronômica,-27.578145,-48.535717,2.512783,1.824074,3.151455,24,14,13,6,6
4,Saco dos Limões,-27.608268,-48.534343,1.732293,1.824608,3.801057,18,8,4,6,1


### Translating those columns:

In [37]:
df_pop.columns = ['neighborhood', 'latitude', 'longitude', 'distance ufsc', 'distance ifsc', 'distance udesc','bars', 'restaurants', 'gyms', 'markets', 'bus stations']
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations
0,Centro,-27.597203,-48.549481,2.709233,0.819431,4.553951,50,50,47,25,9
1,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627,50,21,26,6,9
2,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5
3,Agronômica,-27.578145,-48.535717,2.512783,1.824074,3.151455,24,14,13,6,6
4,Saco dos Limões,-27.608268,-48.534343,1.732293,1.824608,3.801057,18,8,4,6,1


In [38]:
df_pop.to_csv('df_pop_venues.csv')

## Clustering the neighborhoods:

In [39]:
# set number of clusters
kclusters = 4

floripa_grouped_clustering = df_pop.drop(['neighborhood', 'latitude', 'longitude'], 1)

# run k-means clustering
kmeans = KMeans(n_clusters = kclusters, random_state=0).fit(floripa_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 3, 3, 2, 2, 2, 3, 3, 3, 2], dtype=int32)

In [40]:
df_pop['cluster'] = kmeans.labels_

## Visualizing the clusters:

In [41]:
# create map
map_clusters = folium.Map(location = [-27.593500, -48.558540], zoom_start = 10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(df_pop['latitude'], df_pop['longitude'], df_pop['neighborhood'], df_pop['cluster']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## Analyzing the clusters:

In [42]:
df_pop.loc[df_pop['cluster'] == 0, df_pop.columns[[0] + list(range(3, df_pop.shape[1]))]]

Unnamed: 0,neighborhood,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
10,Capivari,19.679754,20.585249,17.716058,6,8,5,7,1,0
15,São João do Rio Vermelho,16.788036,17.888995,14.76143,3,0,4,4,2,0
28,Ingleses Centro,21.582851,22.392932,19.655183,24,25,12,7,0,0
33,Santinho,21.288775,22.376551,19.259316,8,11,2,6,1,0
34,Ponta das Canas,24.200936,24.674262,22.440236,11,21,1,1,0,0
35,Vargem do Bom Jesus,19.060123,19.758953,17.193288,2,2,1,3,0,0
36,Armação,16.949528,17.637619,18.128988,9,5,2,1,0,0
37,Cachoeira do Bom Jesus Leste,21.316422,21.949212,19.476031,6,7,4,4,0,0
38,Pântano do Sul,20.373174,21.020945,21.558595,9,8,0,1,0,0
40,Jurere Leste,17.564782,17.61674,16.126519,18,19,10,0,0,0


In [43]:
df_pop.loc[df_pop['cluster'] == 1, df_pop.columns[[0] + list(range(3, df_pop.shape[1]))]]

Unnamed: 0,neighborhood,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
0,Centro,2.709233,0.819431,4.553951,50,50,47,25,9,1
11,Tapera da Base,2.679143,0.763719,4.508555,50,50,50,27,9,1
41,Campeche Norte,2.679143,0.763719,4.508555,50,50,50,27,9,1
48,Ribeirão da Ilha[1],2.679143,0.763719,4.508555,50,50,50,27,9,1
56,Rio Tavares do Norte,2.679143,0.763719,4.508555,50,50,50,27,9,1
60,Retiro,2.721909,0.835825,4.569069,50,50,47,26,9,1
70,Moenda,2.679143,0.763719,4.508555,50,50,50,27,9,1
76,Canto do Lamim,2.679143,0.763719,4.508555,50,50,50,27,9,1
77,Vargem de Fora,2.679143,0.763719,4.508555,50,50,50,27,9,1
79,Autódromo,2.679143,0.763719,4.508555,50,50,50,27,9,1


In [44]:
df_pop.loc[df_pop['cluster'] == 2, df_pop.columns[[0] + list(range(3, df_pop.shape[1]))]]

Unnamed: 0,neighborhood,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
3,Agronômica,2.512783,1.824074,3.151455,24,14,13,6,6,2
4,Saco dos Limões,1.732293,1.824608,3.801057,18,8,4,6,1,2
5,Coqueiros,6.006306,4.279352,7.979264,21,20,16,2,4,2
9,Costeira do Pirajubaé,4.115277,4.920061,5.630187,5,4,3,1,0,2
13,Monte Verde,4.907345,5.918496,3.082996,7,5,3,0,1,2
17,Abraão,7.123377,5.246284,9.00998,26,8,14,2,5,2
19,Lagoa,8.271092,9.52087,6.229953,2,15,0,0,1,2
20,Saco Grande,6.408835,6.98125,4.833196,6,6,2,5,0,2
21,Córrego Grande,2.03858,4.078602,1.483266,24,7,7,2,2,2
26,Carianos,7.615009,7.857676,9.284115,8,6,4,3,3,2


In [45]:
df_pop.loc[df_pop['cluster'] == 3, df_pop.columns[[0] + list(range(3, df_pop.shape[1]))]]

Unnamed: 0,neighborhood,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
1,Capoeiras,6.70418,4.720727,8.470627,50,21,26,6,9,3
2,Trindade,0.868572,2.015336,1.751967,50,34,25,8,5,3
6,Monte Cristo,7.884722,5.856112,9.537097,50,33,24,8,7,3
7,Jardim Atlântico,7.75898,5.730976,9.123975,44,19,16,7,2,3
8,Itacorubi,2.473906,3.98687,0.5137,30,25,31,10,10,3
12,Estreito,5.535307,3.499551,7.184731,50,36,22,7,7,3
14,Balneário,6.149678,4.126147,7.538082,35,27,17,5,6,3
16,Canto,6.43564,4.390978,8.004857,50,39,25,9,10,3
18,Santa Mônica,1.456151,3.239256,0.629822,46,27,22,5,7,3
22,Canasvieiras,19.614088,19.947491,17.959644,46,36,9,9,1,3


### As stated in the report, we are going to choose the cluster number 4 as most appropriate, since it has neighborhoods which are closest to the universities and an acceptable amount of amenities

In [47]:
df_final = df_pop.loc[df_pop['cluster'] == 3, df_pop.columns[[0] + list(range(0, df_pop.shape[1]))]]
df_final.head()

Unnamed: 0,neighborhood,neighborhood.1,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
1,Capoeiras,Capoeiras,-27.597333,-48.590008,6.70418,4.720727,8.470627,50,21,26,6,9,3
2,Trindade,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5,3
6,Monte Cristo,Monte Cristo,-27.590583,-48.601635,7.884722,5.856112,9.537097,50,33,24,8,7,3
7,Jardim Atlântico,Jardim Atlântico,-27.575492,-48.596799,7.75898,5.730976,9.123975,44,19,16,7,2,3
8,Itacorubi,Itacorubi,-27.581511,-48.504193,2.473906,3.98687,0.5137,30,25,31,10,10,3


In [48]:
df_final = df_final[df_final['distance ufsc'] < 4.5]

In [49]:
df_final.head()

Unnamed: 0,neighborhood,neighborhood.1,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
2,Trindade,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5,3
8,Itacorubi,Itacorubi,-27.581511,-48.504193,2.473906,3.98687,0.5137,30,25,31,10,10,3
18,Santa Mônica,Santa Mônica,-27.589928,-48.509703,1.456151,3.239256,0.629822,46,27,22,5,7,3
23,Pantanal,Pantanal,-27.608578,-48.521142,1.270104,2.680122,2.965065,30,20,5,5,6,3
82,Caieira,Caieira,-27.596691,-48.535275,1.309972,0.781621,3.207379,50,50,15,14,0,3


### As you can see, the neighborhood 'Caieira' does not have a good amount of bus stations. It will be excluded from the results

In [53]:
df_final.reset_index(drop = True, inplace = True)
df_final = df_final[df_final['bus stations'] > 0]
df_final.head(10)

Unnamed: 0,neighborhood,neighborhood.1,latitude,longitude,distance ufsc,distance ifsc,distance udesc,bars,restaurants,gyms,markets,bus stations,cluster
0,Trindade,Trindade,-27.589383,-48.5224,0.868572,2.015336,1.751967,50,34,25,8,5,3
1,Itacorubi,Itacorubi,-27.581511,-48.504193,2.473906,3.98687,0.5137,30,25,31,10,10,3
2,Santa Mônica,Santa Mônica,-27.589928,-48.509703,1.456151,3.239256,0.629822,46,27,22,5,7,3
3,Pantanal,Pantanal,-27.608578,-48.521142,1.270104,2.680122,2.965065,30,20,5,5,6,3


In [55]:
# create map of Florianópolis using latitude and longitude values
map_floripa = folium.Map(location=[-27.593500, -48.558540], zoom_start=12)

# add markers to map
for index, row in df_final.iterrows():
    lat = row['latitude']
    lng = row['longitude']
    label = row['neighborhood']
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius = 5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_floripa)  
    
map_floripa

In [56]:
df_final.to_csv('best_neighborhoods.csv')

# Thanks for your attention! :D