# Second filter: select the final location of the company 📍📍

<img width=400 src=https://media.giphy.com/media/f5IqYTxd7OlY9XHBvw/giphy.gif>

### Importing libraries and dataframes and creating connection with Mongo DB

In [1]:
import pandas as pd
import requests
import json
from pymongo import MongoClient
import geopandas
conn = MongoClient("localhost:27017")
db = conn.get_database("ironhack")
from pymongo import GEOSPHERE
import src.geoqueries as g
import src.maps as m
import src.cleaning as c
import src.cities_coordinates as cc
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
from keplergl import KeplerGl

In [2]:
df = pd.read_csv("data/df.csv")

#### Dataframe cleaning

In [3]:
df = c.preliminary_cleaning(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.drop_duplicates(inplace=True)


In [4]:
df.sample(3)

Unnamed: 0,company,city,latitude,longitude
46,Eastmedia,New York,40.752409,-73.988676
2,CastTV,San Francisco,37.780716,-122.393913
44,SecondMarket,New York,40.705439,-74.012822


In [5]:
coordinates = c.column_point(df)
df['geometry'] = coordinates

In [6]:
df.head()

Unnamed: 0,company,city,latitude,longitude,geometry
0,Digg,San Francisco,37.764726,-122.394523,"{'type': 'Point', 'coordinates': [-122.394523,..."
1,Powerset,San Francisco,37.778613,-122.395289,"{'type': 'Point', 'coordinates': [-122.395289,..."
2,CastTV,San Francisco,37.780716,-122.393913,"{'type': 'Point', 'coordinates': [-122.393913,..."
3,Grockit,San Francisco,37.775196,-122.419204,"{'type': 'Point', 'coordinates': [-122.419204,..."
4,Dropbox,San Francisco,37.790943,-122.408499,"{'type': 'Point', 'coordinates': [-122.4084994..."


#### Creating geosphere index in MongoDB

In [7]:
db.preferences.create_index([("geometry", GEOSPHERE)])

'geometry_2dsphere'

In [8]:
collection = db.get_collection("preferences")

In [9]:
collection.find_one({})

{'_id': ObjectId('607bfae4b913636b15a4fb88'),
 'name': 'Madison Square Garden',
 'location': {'type': 'Point', 'coordinates': [-73.9935421944, 40.7507519651]},
 'place': 'basket'}

### Adding to the dataframe, for each location, the number of sites that are near it

In [10]:
df = g.places_counts(coordinates,df)

In [11]:
df.head()

Unnamed: 0,company,city,latitude,longitude,geometry,party,starbucks,school,vegan,basket
0,Digg,San Francisco,37.764726,-122.394523,"{'type': 'Point', 'coordinates': [-122.394523,...",7,4,0,1,3
1,Powerset,San Francisco,37.778613,-122.395289,"{'type': 'Point', 'coordinates': [-122.395289,...",7,14,2,3,3
2,CastTV,San Francisco,37.780716,-122.393913,"{'type': 'Point', 'coordinates': [-122.393913,...",6,13,0,0,3
3,Grockit,San Francisco,37.775196,-122.419204,"{'type': 'Point', 'coordinates': [-122.419204,...",30,15,28,25,0
4,Dropbox,San Francisco,37.790943,-122.408499,"{'type': 'Point', 'coordinates': [-122.4084994...",8,14,7,5,0


### Giving a weight to each site 

In [12]:
weight_party = 0.25
weight_starbucks = 0.3
weight_basket = 0.05
weight_school = 0.25
weight_vegan = 0.15

### Rating the number of sites from 1 to 4 

**PARTY/STARBUCKS/SCHOOL/VEGAN GRADES**
    
    [0] : 0  
    [1-8] : 1  
    [8-20] : 2
    [20-30] : 3

**BASCKET GRADES**
    
    [0] : 0  
    [2] : 1  
    [3] : 2
    [4] : 3

### Evaluating the number of sites according to their score and weight

In [13]:
df['party_grades'] = df["party"].apply(g.grades)
df['starbucks_grades'] = df["starbucks"].apply(g.grades)
df['school_grades'] = df["school"].apply(g.grades)
df['vegan_grades'] = df["vegan"].apply(g.grades)
df['basket_grades'] = df["basket"]

In [14]:
df['weighted_party_grades'] = df.apply(lambda row: row['party_grades']*weight_party, axis = 1)
df['weighted_starbucks_grades'] = df.apply(lambda row: row['starbucks_grades']*weight_party, axis = 1)
df['weighted_school_grades'] = df.apply(lambda row: row['school_grades']*weight_party, axis = 1)
df['weighted_vegan_grades'] = df.apply(lambda row: row['vegan_grades']*weight_party, axis = 1)
df['weighted_basket_grades'] = df.apply(lambda row: row['basket_grades']*weight_party, axis = 1)

In [15]:
df['weighted_result'] =  df.apply(lambda row: row['weighted_party_grades'] 
                                          + row['weighted_basket_grades']
                                          + row['weighted_starbucks_grades']
                                          + row['weighted_school_grades'] 
                                          + row['weighted_vegan_grades'],
                                          axis = 1)

In [16]:
df.head()

Unnamed: 0,company,city,latitude,longitude,geometry,party,starbucks,school,vegan,basket,...,starbucks_grades,school_grades,vegan_grades,basket_grades,weighted_party_grades,weighted_starbucks_grades,weighted_school_grades,weighted_vegan_grades,weighted_basket_grades,weighted_result
0,Digg,San Francisco,37.764726,-122.394523,"{'type': 'Point', 'coordinates': [-122.394523,...",7,4,0,1,3,...,1,0,1,3,0.25,0.25,0.0,0.25,0.75,1.5
1,Powerset,San Francisco,37.778613,-122.395289,"{'type': 'Point', 'coordinates': [-122.395289,...",7,14,2,3,3,...,2,1,1,3,0.25,0.5,0.25,0.25,0.75,2.0
2,CastTV,San Francisco,37.780716,-122.393913,"{'type': 'Point', 'coordinates': [-122.393913,...",6,13,0,0,3,...,2,0,0,3,0.25,0.5,0.0,0.0,0.75,1.5
3,Grockit,San Francisco,37.775196,-122.419204,"{'type': 'Point', 'coordinates': [-122.419204,...",30,15,28,25,0,...,2,3,3,0,0.75,0.5,0.75,0.75,0.0,2.75
4,Dropbox,San Francisco,37.790943,-122.408499,"{'type': 'Point', 'coordinates': [-122.4084994...",8,14,7,5,0,...,2,1,1,0,0.25,0.5,0.25,0.25,0.0,1.25


### Selecting the best one based on that criteria

In [17]:
df_final = g.best_option(df)

In [18]:
df_final

Unnamed: 0,company,city,latitude,longitude,geometry,party,starbucks,school,vegan,basket,...,starbucks_grades,school_grades,vegan_grades,basket_grades,weighted_party_grades,weighted_starbucks_grades,weighted_school_grades,weighted_vegan_grades,weighted_basket_grades,weighted_result
38,Box UK,London,51.510774,-0.139245,"{'type': 'Point', 'coordinates': [-0.1392447, ...",27,23,24,26,0,...,3,3,3,0,0.75,0.75,0.75,0.75,0.0,3.0


In [19]:
coordinates_ = [list(df_final['latitude'])[0],list(df_final['longitude'])[0]]

In [20]:
coordinates_

[51.5107737, -0.1392447]

### Exporting dataframe

In [21]:
df_final.to_csv('data/df_final.csv')

## 🏆🏆 The coordinates of the selected location: `[51.5107737, -0.1392447]`🏆🏆

### Let's see on a map everything that is close to that location 👀

In [22]:
london = {'type': 'Point', 'coordinates': [51.509865, -0.118092]}
map_lnd = Map(location=london['coordinates'],zoom_start=15)

#### First we locate the gaming company

In [23]:
m.final_location(coordinates_,map_lnd)
#map_lnd

#### Now we create a new dataframe with the sites that are nearby 

In [24]:
df_places = g.near_option(df_final)

In [25]:
df_places.head()

Unnamed: 0,_id,name,location,place,coordinates
0,607bfb26b913636b15a4fc4c,Starbucks,"{'type': 'Point', 'coordinates': [-0.139154, 5...",starbucks,"[51.510288, -0.139154]"
1,607bfaf6b913636b15a4fba5,The Cuckoo Club,"{'type': 'Point', 'coordinates': [-0.137770716...",party,"[51.5099619985, -0.1377707164]"
2,607bfaf6b913636b15a4fb98,Tramp,"{'type': 'Point', 'coordinates': [-0.137835548...",party,"[51.5083318933, -0.1378355488]"
3,607bfb0db913636b15a4fc06,Malvern House Picadilly Centre,"{'type': 'Point', 'coordinates': [-0.134370783...",school,"[51.5101933333, -0.1343707833]"
4,607bfb26b913636b15a4fc4f,Starbucks,"{'type': 'Point', 'coordinates': [-0.139378, 5...",starbucks,"[51.513952, -0.139378]"


#### Now, we locate everything that is near the gaming company

In [26]:
m.nearby(df_places,map_lnd)
map_lnd

In [27]:
map_lnd.save('maps/final.html')

### Let's do a Kepler map! 

First, we have to clean it

In [28]:
df_places = c.kepler_cleaning(df_places, df_final)

In [29]:
df_places.head()

Unnamed: 0,name,location,place,coordinates,latitude,longitude
0,Starbucks,"{'type': 'Point', 'coordinates': [-0.139154, 5...",starbucks,"[51.510288, -0.139154]",51.510288,-0.139154
1,The Cuckoo Club,"{'type': 'Point', 'coordinates': [-0.137770716...",party,"[51.5099619985, -0.1377707164]",51.509962,-0.137771
2,Tramp,"{'type': 'Point', 'coordinates': [-0.137835548...",party,"[51.5083318933, -0.1378355488]",51.508332,-0.137836
3,Malvern House Picadilly Centre,"{'type': 'Point', 'coordinates': [-0.134370783...",school,"[51.5101933333, -0.1343707833]",51.510193,-0.134371
4,Starbucks,"{'type': 'Point', 'coordinates': [-0.139378, 5...",starbucks,"[51.513952, -0.139378]",51.513952,-0.139378


In [35]:
#with open('maps/config.py', 'w') as f:
#    f.write('config = {}'.format(map_kepler.config))

In [39]:
%run maps/config.py

In [41]:
map_kepler= KeplerGl(height=700, weight = 500, config=config)
map_kepler.add_data(data=df_places, name='companies')
map_kepler

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


KeplerGl(config={'version': 'v1', 'config': {'visState': {'filters': [], 'layers': [{'id': 'ns4p79v', 'type': …

In [31]:
map_kepler.save_to_html(file_name = "maps/final_kepler.html", read_only = True)

Map saved to maps/final_kepler.html!
