# Welcome to the Notebook for my final submission

##### You will notice that I supressed the part of the code that contains my API access data, for security reasons.  
##### Enjoy the rest of the code! :D

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import warnings
import urllib.request
import requests # library to handle requests
import folium

from geopy.geocoders import Nominatim
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from sklearn import preprocessing

warnings.filterwarnings('ignore')

##### So, for starters, let's craft our database

In [2]:
#First, let's get a list of Florianópolis'neighborhoods! 
url = 'https://pt.wikipedia.org/wiki/Lista_de_distritos_e_bairros_de_Florian%C3%B3polis'
req = urllib.request.urlopen(url)
article = req.read().decode()

with open('neighb.html', 'w') as code:
    code.write(article)

In [3]:
article = open('neighb.html').read()
soup = BeautifulSoup(article, 'html.parser')
#get that table
tables = soup.find_all('table', class_='sortable')

In [4]:
# Search through the tables for the one with the headings we want.
# The wikipedia page has two tables: one for districts and one for neighborhood. 
# We are interested in the one that contains the neighborhoods
for table in tables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:5] == ['Posição', 'Bairro', 'População']:
        break

##### Please note that in the following section we will perform the table scraping while obtaining the geospatial data through an API!

In [5]:
key = 'YOUR_KEY_HERE'

In [6]:
#Lembrar de refazer essa parte e postar o metodo alternativo no medium
from opencage.geocoder import OpenCageGeocode
# Extract the columns we want and write to a colon-delimited text file.
with open('pop_city.csv', 'w') as pop:
    print('position,', 'neighborhood,', 'population,', 'latitude,', 'longitude', file = pop)
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if (not tds) or (len(tds)<3) :
            continue
        position, neighborhood, population = [td.text.strip() for td in tds[:3]]
        population = population.replace('.', '')
        #The following bit is about the API query:
        geocoder = OpenCageGeocode(key)
        query = neighborhood + ', Florianopolis, Brazil'
        results = geocoder.geocode(query)

        latitude = str(results[0]['geometry']['lat'])
        longitude = str(results[0]['geometry']['lng']) 
        print(', '.join([position, neighborhood, population, latitude, longitude]), file=pop)

##### Let us open the .csv file and see if it turned out ok:

In [7]:
df_pop = pd.read_csv('pop_city.csv', sep=r'\s*,\s*')
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude
0,1,Centro,44074,-27.597203,-48.549481
1,2,Capoeiras,19323,-27.597333,-48.590008
2,3,Trindade,15031,-27.589383,-48.5224
3,4,Agronômica,14591,-27.578145,-48.535717
4,5,Saco dos Limões,13771,-27.608268,-48.534343


In [8]:
df_pop.tail()

Unnamed: 0,position,neighborhood,population,latitude,longitude
80,81,Forte,266,-27.421111,-48.432848
81,82,Ingleses Norte,203,-27.418779,-48.408051
82,83,Caieira,170,-27.596691,-48.535275
83,84,Praia Brava,130,-27.397613,-48.415825
84,85,Praia Mole,108,-27.603017,-48.433248


##### Seems ok to me!

### Now, let's get the coordinates for the universities in Florianópolis
As it happens, the top rated universities in the city (according to the Brazilian index RUF) are:

* UFSC - Federal University of Santa Catarina  
* IFSC - Federal Institude of Santa Catarina  
* UDESC - University of the State of Santa Cartarina   

So, let's obtain their coordinates, shall we?

In [9]:
# Because this dataframe will only have 3 rows and 4 columns, it is hardly worth the effort to automate all of it.
# Let's fill some columns manually
unis = pd.DataFrame(columns = ['name', 'id', 'latitude', 'longitude'])
unis['name'] = ['UFSC', 'IFSC', 'UDESC']
unis['id'] = ['4baa92c1f964a520ec753ae3', '4d496f119544a093946b28e7', '540d889a498efcfbb6c4b9a9']
unis.head()

Unnamed: 0,name,id,latitude,longitude
0,UFSC,4baa92c1f964a520ec753ae3,,
1,IFSC,4d496f119544a093946b28e7,,
2,UDESC,540d889a498efcfbb6c4b9a9,,


In [47]:
# Now the fun bit
# Firing up that Foursquare API
CLIENT_ID = 'YOUR_ID_HERE' # your Foursquare ID
CLIENT_SECRET = 'YOUR_SECRET_HERE' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 100
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

address = 'YOUR ADDRESS'
#geolocator = Nominatim(user_agent="foursquare_agent")
#ocation = geolocator.geocode(address)
#latitude = location.latitude
#longitude = location.longitude
#print(latitude, longitude)

Your credentails:
CLIENT_ID: YOUR_ID_HERE
CLIENT_SECRET:YOUR_SECRET_HERE


In [13]:
i = 0
for var_id in unis['id']:
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(var_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    result = requests.get(url).json()
    unis['latitude'][i] = result['response']['venue']['location']['lat']
    unis['longitude'][i] = result['response']['venue']['location']['lng']
    i = i + 1

unis.head()

Unnamed: 0,name,id,latitude,longitude
0,UFSC,4baa92c1f964a520ec753ae3,-27.5972,-48.522
1,IFSC,4d496f119544a093946b28e7,-27.5935,-48.5423
2,UDESC,540d889a498efcfbb6c4b9a9,-27.5861,-48.505


##### Now that we know the locations of the top universities and neighborhoods, let's measure the distances between the neighborhoods and the universities and calculate the mean of the distances

# Notice: we are not going to calculate the distance here, but the distance score. We assume:  
* The closer to the top universities, the higher the score
* More weigh will be given to the distance between higher-ranked universities

In [15]:
from math import sin, cos, sqrt, atan2, radians
R = 6373.0 #Earth's approximate radius
dist = np.zeros(5)

df_pop['distance ufsc'] = float(0)
df_pop['distance ifsc'] = float(0)
df_pop['distance udesc'] = float(0)
for ind in df_pop.index:
    #getting the neighborhood's position in radians
    neigh_lat = np.radians(df_pop['latitude'][ind])
    neigh_lng = np.radians(df_pop['longitude'][ind])
    for ind2 in unis.index:
        #getting the universities' position in radians
        uni_lat = np.radians(unis['latitude'][ind2])
        uni_long = np.radians(unis['longitude'][ind2])
        #calculate the distance for each university
        dlat = uni_lat - neigh_lat
        dlon = uni_long - neigh_lng
        a = sin(dlat / 2)**2 + cos(neigh_lat) * cos(uni_lat) * sin(dlon / 2)**2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))
        dist[ind2] = R * c
    df_pop['distance ufsc'][ind] = dist[0]
    df_pop['distance ifsc'][ind] = dist[1]
    df_pop['distance udesc'][ind] = dist[2]
    #falta calcular a média das distâncias e adicionar ao dataframe df_pop(dos bairros)


In [16]:
print('Please note that the distances are given in kilometers')
df_pop.head()

Please note that the distances are given in kilometers


Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,2.709233,0.819431,4.553951
1,2,Capoeiras,19323,-27.597333,-48.590008,6.70418,4.720727,8.470627
2,3,Trindade,15031,-27.589383,-48.5224,0.868572,2.015336,1.751967
3,4,Agronômica,14591,-27.578145,-48.535717,2.512783,1.824074,3.151455
4,5,Saco dos Limões,13771,-27.608268,-48.534343,1.732293,1.824608,3.801057


##### Scoring and normalizing the distances:

In [17]:
df_pop_scored = df_pop # Making a copy of the DataFrame just in case
df_pop_scored.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,2.709233,0.819431,4.553951
1,2,Capoeiras,19323,-27.597333,-48.590008,6.70418,4.720727,8.470627
2,3,Trindade,15031,-27.589383,-48.5224,0.868572,2.015336,1.751967
3,4,Agronômica,14591,-27.578145,-48.535717,2.512783,1.824074,3.151455
4,5,Saco dos Limões,13771,-27.608268,-48.534343,1.732293,1.824608,3.801057


In [18]:
for ind in ['distance ufsc', 'distance ifsc', 'distance udesc']:
    df_pop_scored[ind] = -1 * (df_pop_scored[ind] - max(df_pop_scored[ind]))
                               
df_pop_scored.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,56.304964,57.115665,56.514663
1,2,Capoeiras,19323,-27.597333,-48.590008,52.310018,53.214369,52.597987
2,3,Trindade,15031,-27.589383,-48.5224,58.145626,55.919761,59.316647
3,4,Agronômica,14591,-27.578145,-48.535717,56.501414,56.111022,57.917158
4,5,Saco dos Limões,13771,-27.608268,-48.534343,57.281905,56.110488,57.267556


##### Now, for the normalization:

In [19]:
for ind in ['distance ufsc', 'distance ifsc', 'distance udesc']:
    df_pop_scored[ind] = ((df_pop_scored[ind] - min(df_pop_scored[ind]))/(max(df_pop_scored[ind])- min(df_pop_scored[ind])))

df_pop_scored.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,0.96763,0.999026,0.93328
1,2,Capoeiras,19323,-27.597333,-48.590008,0.898975,0.930787,0.8686
2,3,Trindade,15031,-27.589383,-48.5224,0.999263,0.978108,0.979551
3,4,Agronômica,14591,-27.578145,-48.535717,0.971006,0.981453,0.95644
4,5,Saco dos Limões,13771,-27.608268,-48.534343,0.98442,0.981444,0.945713


##### Attributing weighs to the distances:

In [20]:
df_pop_scored['distance ufsc'] = 4 * df_pop_scored['distance ufsc']
df_pop_scored['distance ifsc'] = 2 * df_pop_scored['distance ifsc']
df_pop_scored.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance ufsc,distance ifsc,distance udesc
0,1,Centro,44074,-27.597203,-48.549481,3.870522,1.998051,0.93328
1,2,Capoeiras,19323,-27.597333,-48.590008,3.595901,1.861574,0.8686
2,3,Trindade,15031,-27.589383,-48.5224,3.997052,1.956215,0.979551
3,4,Agronômica,14591,-27.578145,-48.535717,3.884026,1.962906,0.95644
4,5,Saco dos Limões,13771,-27.608268,-48.534343,3.937678,1.962887,0.945713


##### Adding the score:

In [21]:
df_pop_scored['distance score'] = df_pop_scored['distance ufsc'] + df_pop_scored['distance ifsc'] + df_pop_scored['distance udesc']
# Removing the bits we are not gonna use anymore:
del(df_pop_scored['distance ufsc'])
del(df_pop_scored['distance ifsc'])
del(df_pop_scored['distance udesc'])

df_pop_scored.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance score
0,1,Centro,44074,-27.597203,-48.549481,6.801852
1,2,Capoeiras,19323,-27.597333,-48.590008,6.326074
2,3,Trindade,15031,-27.589383,-48.5224,6.932819
3,4,Agronômica,14591,-27.578145,-48.535717,6.803372
4,5,Saco dos Limões,13771,-27.608268,-48.534343,6.846279


In [22]:
df_pop = df_pop_scored

##### Getting the neighborhoods' areas to calculate the population density

In [23]:
area = pd.read_csv('areas.csv')
area.columns = ['neighborhood', 'area']
print('Areas in km square:')
area.head()

Areas in km square:


Unnamed: 0,neighborhood,area
0,Centro,5.368
1,Capoeiras,2.816
2,Trindade,3.32
3,Agronômica,1.964
4,Saco dos Limões,3.106


##### Well, I have to confess that I personally crafted this database by ~~painstakingly adding the entries one by one from an unstructured governmental database~~ passionately dedicating my time to the quality of the data and, because of that, I know for sure that the dataframe for the areas is in the same order as the one we used before, so qe can go ahead and just add the data to a new column

In [24]:
df_pop['area'] = area['area'] # I should have thouhgt the variable's names through
df_pop.head()

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance score,area
0,1,Centro,44074,-27.597203,-48.549481,6.801852,5.368
1,2,Capoeiras,19323,-27.597333,-48.590008,6.326074,2.816
2,3,Trindade,15031,-27.589383,-48.5224,6.932819,3.32
3,4,Agronômica,14591,-27.578145,-48.535717,6.803372,1.964
4,5,Saco dos Limões,13771,-27.608268,-48.534343,6.846279,3.106


##### Now, for the calculation of the population density:

In [25]:
df_pop['population density'] = df_pop['population'] / df_pop['area']
df_pop.head(50)

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance score,area,population density
0,1,Centro,44074,-27.597203,-48.549481,6.801852,5.368,8210.506706
1,2,Capoeiras,19323,-27.597333,-48.590008,6.326074,2.816,6861.860795
2,3,Trindade,15031,-27.589383,-48.5224,6.932819,3.32,4527.409639
3,4,Agronômica,14591,-27.578145,-48.535717,6.803372,1.964,7429.226069
4,5,Saco dos Limões,13771,-27.608268,-48.534343,6.846279,3.106,4433.676755
5,6,Coqueiros,13592,-27.611202,-48.580844,6.397602,1.751,7762.421473
6,7,Monte Cristo,12634,-27.590583,-48.601635,6.187591,0.588,21486.394558
7,8,Jardim Atlântico,12047,-27.575492,-48.596799,6.207435,1.786,6745.240761
8,9,Itacorubi,10307,-27.58151,-48.504193,6.773944,12.756,808.011916
9,10,Costeira do Pirajubaé,9301,-27.634175,-48.522797,6.543974,6.372,1459.667294


In [26]:
df_pop.tail(40)

Unnamed: 0,position,neighborhood,population,latitude,longitude,distance score,area,population density
45,46,Morro das Pedras,1527,-27.714329,-48.506549,5.468361,1.533,996.086106
46,47,Alto Ribeirão Leste,1493,-27.703926,-48.533895,5.619507,2.293,651.11208
47,48,Alto Ribeirão,1487,-27.703926,-48.533895,5.619507,5.396,275.5745
48,49,Ribeirão da Ilha[1],1376,-27.59667,-48.54917,6.806619,4.511,305.032144
49,50,Santo Antônio,1352,-27.975929,-48.941973,0.0,21.527,62.80485
50,51,Sambaqui,1345,-27.492403,-48.526831,5.717579,1.266,1062.401264
51,52,Ingleses Sul,1323,-27.440523,-48.364711,4.282039,1.963,673.968416
52,53,Bom Abrigo,1262,-27.611804,-48.59494,6.236446,0.179,7050.27933
53,54,Jurere Oeste,1221,-27.441564,-48.491754,5.001929,4.22,289.336493
54,55,Porto da Lagoa,1200,-27.630842,-48.472881,6.293681,2.373,505.689001


##### With that, we found out that the population density is not one of the attrative factors of this city; because of that, we are gonna exclude it from the analysis

In [27]:
del(df_pop['population density'])
del(df_pop['population'])
del(df_pop['position'])
del(df_pop['area'])
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance score,area
0,Centro,-27.597203,-48.549481,6.801852,5.368
1,Capoeiras,-27.597333,-48.590008,6.326074,2.816
2,Trindade,-27.589383,-48.5224,6.932819,3.32
3,Agronômica,-27.578145,-48.535717,6.803372,1.964
4,Saco dos Limões,-27.608268,-48.534343,6.846279,3.106


##### Ok, so proximity to universities has gotta be an important factor right? I mean, a tech startup has to be as close as possible to innovative, intelligent people eager to make a difference and/or a name for themselves, so let's rank those neighborhoods by the distance we just calculated.

In [30]:
# Ranking:
df_pop = df_pop.sort_values(by = ['distance score'], ascending = False)
df_pop = df_pop.reset_index(drop = True)
df_pop.head()

Unnamed: 0,neighborhood,latitude,longitude,distance score
0,Trindade,-27.589383,-48.5224,6.932819
1,Caieira,-27.596691,-48.535275,6.9216
2,Lagoa,-27.600524,-48.514517,6.904751
3,Santa Mônica,-27.589928,-48.509703,6.868143
4,Pantanal,-27.608578,-48.521142,6.861928


##### Arbitrarily, we are going to select the 25 neighborhoods that are closest to the universities and exclude the rest

In [31]:
df_filtered = df_pop.loc[df_pop.index.isin(range(0,25))]
df_filtered.tail(10)

Unnamed: 0,neighborhood,latitude,longitude,distance score
15,Agronômica,-27.578145,-48.535717,6.803372
16,Centro,-27.597203,-48.549481,6.801852
17,Retiro,-27.5973,-48.54961,6.800158
18,Córrego Grande,-27.599016,-48.50142,6.784649
19,Itacorubi,-27.58151,-48.504193,6.773944
20,José Mendes,-27.612261,-48.546274,6.734096
21,João Paulo,-27.560007,-48.511119,6.580499
22,Canto da Lagoa,-27.61285,-48.485435,6.548004
23,Costeira do Pirajubaé,-27.634175,-48.522797,6.543974
24,Monte Verde,-27.559504,-48.496106,6.496662


##### Now, the really, REALLY fun bit :D we are gonna select some amenities to look for in those neighborhoods:
* Bars
* Restaurants
* Gyms
* Markets
* Bus stops

In [32]:
# For this bit, it is more practical if the columns we are gonna use for this search are in Portuguese, but
# don't worry, everything will be translated.
df_filtered['bar'] = 0 # number of bars in the neighborhood
df_filtered['restaurante'] = 0 # number of restaurants in the neighborhood
df_filtered['academia'] = 0 # number of gyms in the neighborhood
df_filtered['mercado'] = 0 # number of markets in the neighborhood 
df_filtered['ponto de ônibus'] = 0 # number of bus stops in the neighborhood
df_filtered.head()

Unnamed: 0,neighborhood,latitude,longitude,distance score,bar,restaurante,academia,mercado,ponto de ônibus
0,Trindade,-27.589383,-48.5224,6.932819,0,0,0,0,0
1,Caieira,-27.596691,-48.535275,6.9216,0,0,0,0,0
2,Lagoa,-27.600524,-48.514517,6.904751,0,0,0,0,0
3,Santa Mônica,-27.589928,-48.509703,6.868143,0,0,0,0,0
4,Pantanal,-27.608578,-48.521142,6.861928,0,0,0,0,0


In [33]:
radius = 500 #Defining the search radius for 1km from the neighborhood center
for amenity in ['bar', 'restaurante', 'academia', 'mercado', 'ponto de ônibus']:
    search_query = amenity
    for ind in df_filtered.index:
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, df_filtered['latitude'][ind], df_filtered['longitude'][ind], VERSION, search_query, radius, LIMIT)
        results = requests.get(url).json()# Performing API search
        venues = results['response']['venues']
        dataframe = json_normalize(venues)
        df_filtered[amenity][ind] = dataframe.shape[0]
df_filtered.head()

Unnamed: 0,neighborhood,latitude,longitude,distance score,bar,restaurante,academia,mercado,ponto de ônibus
0,Trindade,-27.589383,-48.5224,6.932819,22,11,8,5,3
1,Caieira,-27.596691,-48.535275,6.9216,0,1,0,0,0
2,Lagoa,-27.600524,-48.514517,6.904751,10,9,4,0,2
3,Santa Mônica,-27.589928,-48.509703,6.868143,22,8,5,1,3
4,Pantanal,-27.608578,-48.521142,6.861928,8,6,2,2,1


##### Now our dataset is ready. Next, we are going to delete the columns that we are not going to use anymore and then normalize the numerical data

In [34]:
min_max_scaler = preprocessing.MinMaxScaler()
df_normalized = pd.DataFrame()
df_normalized['neighborhood'] = df_filtered['neighborhood']
for ind in ['bar', 'restaurante', 'academia', 'mercado', 'ponto de ônibus']:
    A = pd.DataFrame(df_filtered[ind])#.values #returns an array
    x_scaled = min_max_scaler.fit_transform(A)
    df_normalized[ind] = pd.DataFrame(x_scaled)
    
df_normalized.head(25)

Unnamed: 0,neighborhood,bar,restaurante,academia,mercado,ponto de ônibus
0,Trindade,0.44,0.204082,0.421053,0.238095,0.75
1,Caieira,0.0,0.0,0.0,0.0,0.0
2,Lagoa,0.2,0.163265,0.210526,0.0,0.5
3,Santa Mônica,0.44,0.142857,0.263158,0.047619,0.75
4,Pantanal,0.16,0.102041,0.105263,0.095238,0.25
5,Saco dos Limões,0.12,0.102041,0.105263,0.095238,0.25
6,Autódromo,1.0,1.0,1.0,1.0,1.0
7,Ribeirão da Ilha[1],1.0,1.0,1.0,1.0,1.0
8,Vargem de Fora,1.0,1.0,1.0,1.0,1.0
9,Tapera da Base,1.0,1.0,1.0,1.0,1.0


In [36]:
df_normalized['distance score'] = df_pop['distance score']
A = pd.DataFrame(df_normalized['distance score'])
x_scaled = min_max_scaler.fit_transform(A)
df_normalized[['distance score']] = pd.DataFrame(x_scaled)
df_normalized.head(25)

Unnamed: 0,neighborhood,bar,restaurante,academia,mercado,ponto de ônibus,distance score
0,Trindade,0.44,0.204082,0.421053,0.238095,0.75,1.0
1,Caieira,0.0,0.0,0.0,0.0,0.0,0.974278
2,Lagoa,0.2,0.163265,0.210526,0.0,0.5,0.935647
3,Santa Mônica,0.44,0.142857,0.263158,0.047619,0.75,0.851713
4,Pantanal,0.16,0.102041,0.105263,0.095238,0.25,0.837464
5,Saco dos Limões,0.12,0.102041,0.105263,0.095238,0.25,0.801584
6,Autódromo,1.0,1.0,1.0,1.0,1.0,0.710655
7,Ribeirão da Ilha[1],1.0,1.0,1.0,1.0,1.0,0.710655
8,Vargem de Fora,1.0,1.0,1.0,1.0,1.0,0.710655
9,Tapera da Base,1.0,1.0,1.0,1.0,1.0,0.710655


##### What about translating those column names now?

In [37]:
df_normalized.columns = ['Neighborhood', 'Bars', 'Restaurants', 'Gyms', 'Markets', 'Bus Stops', 'Distance Score']
df_normalized.head()

Unnamed: 0,Neighborhood,Bars,Restaurants,Gyms,Markets,Bus Stops,Distance Score
0,Trindade,0.44,0.204082,0.421053,0.238095,0.75,1.0
1,Caieira,0.0,0.0,0.0,0.0,0.0,0.974278
2,Lagoa,0.2,0.163265,0.210526,0.0,0.5,0.935647
3,Santa Mônica,0.44,0.142857,0.263158,0.047619,0.75,0.851713
4,Pantanal,0.16,0.102041,0.105263,0.095238,0.25,0.837464


##### Scoring used:  
* Distance Score = 10
* Bus Stop = 5
* Markets = 5
* Gyms = 3
* Restaurants = 5
* Bars = 5

In [40]:
score_prox = int(input('Enter the distance to the universities importance(0-10):'))
score_bus = int(input('Enter bus stop importance (0-10):'))
score_mkt = int(input('Enter market importance (0-10):'))
score_gym = int(input('Enter gym importance (0-10):'))
score_rest = int(input('Enter restaurant importance (0-10):'))
score_bar = int(input('Enter bar importance (0-10):'))
print('Acknowledged!')

Enter the distance to the universities importance(0-10):10
Enter bar importance (0-10):5
Enter restaurant importance (0-10):5
Enter gym importance (0-10):3
Enter market importance (0-10):5
Enter bus stop importance (0-10):5


In [41]:
df_normalized['Final Score'] = (score_prox * df_normalized['Distance Score']) + (score_bus * df_normalized['Bus Stops'])
+ (score_mkt * df_normalized['Markets']) + (score_gym * df_normalized['Gyms']) 
+ (score_rest * df_normalized['Restaurants']) + (score_bar * df_normalized['Bars'])
df_normalized.head(25)

Unnamed: 0,Neighborhood,Bars,Restaurants,Gyms,Markets,Bus Stops,Distance Score,Final Score
0,Trindade,0.44,0.204082,0.421053,0.238095,0.75,1.0,13.75
1,Caieira,0.0,0.0,0.0,0.0,0.0,0.974278,9.742779
2,Lagoa,0.2,0.163265,0.210526,0.0,0.5,0.935647,11.856466
3,Santa Mônica,0.44,0.142857,0.263158,0.047619,0.75,0.851713,12.267132
4,Pantanal,0.16,0.102041,0.105263,0.095238,0.25,0.837464,9.62464
5,Saco dos Limões,0.12,0.102041,0.105263,0.095238,0.25,0.801584,9.265841
6,Autódromo,1.0,1.0,1.0,1.0,1.0,0.710655,12.106548
7,Ribeirão da Ilha[1],1.0,1.0,1.0,1.0,1.0,0.710655,12.106548
8,Vargem de Fora,1.0,1.0,1.0,1.0,1.0,0.710655,12.106548
9,Tapera da Base,1.0,1.0,1.0,1.0,1.0,0.710655,12.106548


# Aaaaand the best neighborhoods in Florianópolis (Brazil) to open a tech startup are:

In [46]:
print()
print()
print('Here is the ranking acording to your criteria:')
df_normalized = df_normalized.sort_values(by = ['Final Score'], ascending = False)
df_normalized = df_normalized.reset_index(drop = True)
df_presentation = pd.DataFrame()
df_presentation['Neighborhood'] = df_normalized['Neighborhood']
df_presentation['Final Score'] = df_normalized['Final Score']
df_presentation.head(10)



Here is the ranking acording to your criteria:


Unnamed: 0,Neighborhood,Final Score
0,Trindade,13.75
1,Santa Mônica,12.267132
2,Vargem de Fora,12.106548
3,Campeche Norte,12.106548
4,Rio Tavares do Norte,12.106548
5,Moenda,12.106548
6,Canto do Lamim,12.106548
7,Tapera da Base,12.106548
8,Lagoa Pequena,12.106548
9,Ribeirão da Ilha[1],12.106548


# Thanks for your attention! :D