## Proyecto

In [21]:
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
import src.limpieza as lp
import src.requests as req
from pymongo import MongoClient
import json
import numpy as np
import geopandas
from folium import Map, Marker, Icon, FeatureGroup, LayerControl, Choropleth
import matplotlib.pyplot as plt
import os
import requests

### Query a MongoDb - Crunchbase Dataset

In [5]:
#conecto con MongoDb y hago un query para sacar las empresas del país que nos interesa
mongodbURL = f"mongodb://localhost/datamad0620"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database("datamad0620")

In [6]:
result= lp.companiesMongo("SGP")
pd.DataFrame(result).sort_values(by="total_money_raised", ascending=False).head(5)


Unnamed: 0,_id,name,category_code,founded_year,total_money_raised,offices
40,52cdef7f4bab8bd67529bd7e,YOOSE,mobile,2008.0,€25k,"[{'description': 'Germany', 'address1': 'Fried..."
31,52cdef7e4bab8bd67529adab,Nexway,ecommerce,2002.0,€14M,"[{'description': 'Nexway HQ', 'address1': '1 b..."
4,52cdef7c4bab8bd67529815a,Skyscanner,web,2001.0,£2.5M,"[{'description': 'Skyscanner Ltd', 'address1':..."
5,52cdef7c4bab8bd67529869f,eYeka,software,2006.0,$9.34M,"[{'description': 'HQ', 'address1': '79 rue la ..."
32,52cdef7e4bab8bd67529b07c,Majitek,software,,$7.5M,"[{'description': '', 'address1': 'SGX Centre 2..."


### Limpieza de datos


In [7]:
#Sacamos toda la info de la columna "offices" que contiene listas con direcciones de todas las oficinas
#concatenamos ambos dataframes y renombramos la columna de "id"
#Volvemos a filtrar por país para eliminar las oficinas que no nos interesan


data = lp.limipiezaData(result, "SGP")
data.head(5)

Unnamed: 0,name,company_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Alstrasoft,52cdef7c4bab8bd675297e8f,,14 Robinson Road #13-00 Far East Finance Buil,Suite 100,48545,Singapore,,SGP,,
1,mig33,52cdef7c4bab8bd675297fda,mig33 Headquarters,111 North Bridge Road,Peninsula Plaza #26-01,179098,Singapore,,SGP,37.580304,-122.343679
2,tyntec,52cdef7c4bab8bd675298060,Singapore,51 Goldhill Plaza,#11-01/02,308900,Singapore,,SGP,,
3,OANDA,52cdef7c4bab8bd67529810e,OANDA Asia Pacific,50 Collyer Quay,#04-03 OUE Bayfront,49321,Singapore,,SGP,,
4,Skyscanner,52cdef7c4bab8bd67529815a,Skyscanner Ltd,OFC Level 40,10 Collyer Quay,49315,Singapore,,SGP,,


In [8]:
# Transformo la latitus y longitud a formato geopoint para poder después visualizarlo en el mapa
data["geopoint"] = data.apply(lp.transformToGeoPoint, axis=1)

In [11]:
gdf = geopandas.GeoDataFrame(data, geometry=geopandas.points_from_xy(data.longitude, data.latitude))


In [12]:
# Elimino las columnas que no vamos a utilizar y reseteamos el índice

drop_cols = ['company_id','state_code', 'country_code','description','address1','address2','zip_code','city']
data = gdf.drop(drop_cols, axis =1)
data=data.dropna(subset=['longitude'])
data=data.dropna(subset=['latitude'])
data.reset_index
data.head(5)

Unnamed: 0,name,latitude,longitude,geopoint,geometry
1,mig33,37.580304,-122.343679,"{'type': 'Point', 'coordinates': [-122.343679,...",POINT (-122.34368 37.58030)
6,Acme Mobile,1.278031,103.84435,"{'type': 'Point', 'coordinates': [103.84435, 1...",POINT (103.84435 1.27803)
7,InMobi,1.284082,103.851942,"{'type': 'Point', 'coordinates': [103.851942, ...",POINT (103.85194 1.28408)
9,Fonemesh,1.289407,103.849962,"{'type': 'Point', 'coordinates': [103.849962, ...",POINT (103.84996 1.28941)
10,Fotegrafik,1.352083,103.819836,"{'type': 'Point', 'coordinates': [103.819836, ...",POINT (103.81984 1.35208)


In [15]:
#hacemos una primera visualización en el mapa

m = Map(location=[1.290270,103.851959],zoom_start=12)
for i, row in data.iterrows():
    Marker(
        location=row[["latitude","longitude"]],
        tooltip=row["name"],
    ).add_to(m)
m

### Aeropuerto

In [16]:
# Comprobamos que hay un aeropuerto en Singapur y sacamos sus coordenadas

req.geocode("Singapur International Airport")

{'type': 'Point', 'coordinates': [122.54365, 10.71379]}

In [22]:
def gplaces(query,radio):
    apiKey = os.getenv("GPLACES_APIKEY")
    lat = 1.298370
    long = 103.891290
    radius = {radio}
    tipo = {query}

    url = url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{long}&radius={radius}&type={tipo}&key=AIzaSyBr_5sdn9AKCx4Ptlwgbb7dIwojyiEOIdI"
    print(url)
    res = requests.get(url)
    data = res.json()
    return data

gplaces("school",2000)
result

https://maps.googleapis.com/maps/api/place/nearbysearch/json?location=1.29837,103.89129&radius={2000}&type={'school'}&key=AIzaSyBr_5sdn9AKCx4Ptlwgbb7dIwojyiEOIdI


[{'_id': ObjectId('52cdef7c4bab8bd675297e8f'),
  'name': 'Alstrasoft',
  'category_code': 'software',
  'founded_year': 1800,
  'total_money_raised': '$0',
  'offices': [{'description': None,
    'address1': '14 Robinson Road #13-00 Far East Finance Buil',
    'address2': 'Suite 100',
    'zip_code': '048545',
    'city': 'Singapore',
    'state_code': None,
    'country_code': 'SGP',
    'latitude': None,
    'longitude': None}]},
 {'_id': ObjectId('52cdef7c4bab8bd675297fda'),
  'name': 'mig33',
  'category_code': 'games_video',
  'founded_year': 2005,
  'total_money_raised': '$32.4M',
  'offices': [{'description': 'mig33 Headquarters',
    'address1': '111 North Bridge Road',
    'address2': 'Peninsula Plaza #26-01',
    'zip_code': '179098',
    'city': 'Singapore',
    'state_code': None,
    'country_code': 'SGP',
    'latitude': 37.580304,
    'longitude': -122.343679}]},
 {'_id': ObjectId('52cdef7c4bab8bd675298060'),
  'name': 'tyntec',
  'category_code': 'mobile',
  'founded_ye

### Conectamos con la API de Foursquare

In [31]:
def requestFoursquare(query,data,radio=1000):
    client_idd = os.getenv("4S_CLIENT_ID")
    client_secret=os.getenv("CLIENT_SECRET")
    
    long = data['longitude']
    lat = data['latitude']

    params = {"client_id": client_idd,
              "client_secret":client_secret,
              "v": "20180323",
              "ll":f'{lat},{long}',
              "radius":f'{radio}',
              "query":f'{query}', 
              "limit":20 }

    url = f"https://api.foursquare.com/v2/venues/explore"
    
    resp = requests.get(url=url, params=params)
    res = json.loads(resp.text)

    return res["response"]["totalResults"]
requestFoursquare("coffe",data,1000)

{'meta': {'code': 429,
  'errorType': 'quota_exceeded',
  'errorDetail': 'Quota exceeded',
  'requestId': '5efae0f130926a6e9788fbbb'},
 'response': {}}

In [24]:
total = 87
offices_w=87/total
school_w=26/total
party_w=50/total
starbucks_w=9/total
vegan_w=1/total
dog=1/total

vegan="4bf58dd8d48988d1d3941735"
nightlife="4d4b7105d754a06376d81259"
tech_startup="4bf58dd8d48988d125941735"

In [32]:
# Buscamos por cada empresa cuantos sitios de los preferidos hay cerca
# Ponderamos por el número de gente y la importancia

data["Offices near"] = data.apply(lambda row: (requestFoursquare(tech_startup,row,2000)*2*offices_w), axis=1)
data["Rest"] = data.apply(lambda row: (requestFoursquare("restaurant",row,1000)*2), axis=1)
data["School"] = data.apply(lambda row: (requestFoursquare("school",row,1200)*2*school_w), axis=1)

In [None]:
data["Starbucks"] = data.apply(lambda row: (requestFoursquare("starbucks",row,200)*starbucks_w), axis=1)
data["Night club"] = data.apply(lambda row: (requestFoursquare("nightlife",row,500)*party_w), axis=1)

In [None]:
data["Dog care"] = data.apply(lambda row: (requestFoursquare("dog care",row,2000)*0.5*dog), axis=1)
data["Vegan"] = data.apply(lambda row: (requestFoursquare(vegan,row,1000)*0.5*vegan_w), axis=1)

In [None]:
# Sumamos los puntos totales y ordenamos 

data["Total Points"] = data["Offices near"]+data['Rest']+data['School']+data['Night club']+data['Starbucks']+data['Dog care']+data["Vegan"]

#Renombramos el dataframe y nos quedamos unicamente con las puntuaciones
total_points=data
total_points["Total Points"].apply(np.floor)
total_points.sort_values(by="Total Points", ascending=False)
total_points

drop_cols = ['latitude','longitude', 'geopoint','geometry']
total_points = total_points.drop(drop_cols, axis =1)
total_points.round({"School":1, "Starbucks":1, "Night club":1, "Dog care":1,"Vegan":1,"Total Points":1 })


In [None]:
# Visualizamos la empresa con más puntos

total_points.sort_values(by="Total Points").head(1)

### Visualizción mapas

In [None]:
def mapa(coord):

    m = Map(location=coord,zoom_start=12)
    for i, row in data.iterrows():
        Marker(
            location=row[["latitude","longitude"]],
            tooltip=row["name"],
        ).add_to(m)
    return m

coordenates_sg=[1.290270,103.851959]
mapa(coordenates_sg)




In [None]:
apiKey = os.getenv('GPLACES_APIKEY')
x

In [None]:
#Restaurantes
places=req.gPlaces(1.298370,103.891290,"restaurant",1000)
results_rx=[(places[i]["name"], places[i]["geometry"]["location"]["lat"], places[i]["geometry"]["location"]["lng"]) for i in range(len(places))]
results_rx=pd.DataFrame(results_map, columns=['name', 'latitude', 'longitude'])
results_map.to_json("output/restaurants.json",orient="records")

In [None]:
#Colegios

places=req.gPlaces(1.298370,103.891290,"school",1000)
results_sch=[(places[i]["name"], places[i]["geometry"]["location"]["lat"], places[i]["geometry"]["location"]["lng"]) for i in range(len(places))]
results_sch=pd.DataFrame(results_map, columns=['name', 'latitude', 'longitude'])
results_map.to_json("output/schools.json",orient="records")

In [None]:
#Clubs

places=req.gPlaces(1.298370,103.891290,"night clubs",1000)
results_sch=[(places[i]["name"], places[i]["geometry"]["location"]["lat"], places[i]["geometry"]["location"]["lng"]) for i in range(len(places))]
results_sch=pd.DataFrame(results_map, columns=['name', 'latitude', 'longitude'])
results_map.to_json("output/night_clubs.json",orient="records")

In [None]:
#Starbucks

places=req.gPlaces(1.298370,103.891290,"starbucks",500)
results_sch=[(places[i]["name"], places[i]["geometry"]["location"]["lat"], places[i]["geometry"]["location"]["lng"]) for i in range(len(places))]
results_sch=pd.DataFrame(results_map, columns=['name', 'latitude', 'longitude'])
results_map.to_json("output/starbucks.json",orient="records")

In [None]:
data.sort_values(by="Total Points", ascending=False)


# Sacamos una tabla con los valores finales
drop_cols = ['latitude','longitude', 'geopoint','geometry']
data = gdf.drop(drop_cols, axis =1)
data

In [None]:
geocode("Singapur")