## Proyecto

In [10]:
import pandas as pd
from pymongo import MongoClient
import json
import numpy as np
import geopandas
import sys
import matplotlib.pyplot as plt
import os
import requests
from folium import Map, Marker, Icon, FeatureGroup, LayerControl, Choropleth
from folium.plugins import HeatMap
from dotenv import load_dotenv
load_dotenv()
import src.limpieza as lp


### Query a MongoDb - Crunchbase Dataset

In [11]:
#conecto con MongoDb y hago un query para sacar las empresas del país que nos interesa
mongodbURL = f"mongodb://localhost/datamad0620"
client = MongoClient(mongodbURL, connectTimeoutMS=2000, serverSelectionTimeoutMS=2000)
db = client.get_database("datamad0620")

In [12]:
def companiesMongo(country_code):
    query = {"offices.country_code":f"{country_code}",
        "category_code":{"$in": ["web", "games_video","ecommerce","mobile","social","software"]},
        }
    projection ={"offices":1, "name":1, "founded_year":1,"total_money_raised":1,"category_code":1}
    res = list(db.companies.find(query,projection))
    return res

result= companiesMongo("SGP")

pd.DataFrame(result).sort_values(by="total_money_raised", ascending=False).head(5)


Unnamed: 0,_id,name,category_code,founded_year,total_money_raised,offices
40,52cdef7f4bab8bd67529bd7e,YOOSE,mobile,2008.0,€25k,"[{'description': 'Germany', 'address1': 'Fried..."
31,52cdef7e4bab8bd67529adab,Nexway,ecommerce,2002.0,€14M,"[{'description': 'Nexway HQ', 'address1': '1 b..."
4,52cdef7c4bab8bd67529815a,Skyscanner,web,2001.0,£2.5M,"[{'description': 'Skyscanner Ltd', 'address1':..."
5,52cdef7c4bab8bd67529869f,eYeka,software,2006.0,$9.34M,"[{'description': 'HQ', 'address1': '79 rue la ..."
32,52cdef7e4bab8bd67529b07c,Majitek,software,,$7.5M,"[{'description': '', 'address1': 'SGX Centre 2..."


### Limpieza de datos


In [13]:
#Sacamos toda la info de la columna "offices" que contiene listas con direcciones de todas las oficinas
#concatenamos ambos dataframes y renombramos la columna de "id"
#Volvemos a filtrar por país para eliminar las oficinas que no nos interesan


data = lp.limipiezaData(result, "SGP")

data

AttributeError: module 'src.limpieza' has no attribute 'limipiezaData'

In [8]:
# Transformo la latitus y longitud a formato geopoint para poder después visualizarlo en el mapa

def transformToGeoPoint(s):
    if np.isnan(s.latitude) or np.isnan(s.longitude):
        return None
    return {
        "type":"Point",
        "coordinates":[s.longitude, s.latitude]
    }
    
data["geopoint"] = data.apply(transformToGeoPoint, axis=1)


NameError: name 'data' is not defined

In [None]:
gdf = geopandas.GeoDataFrame(data, geometry=geopandas.points_from_xy(data.longitude, data.latitude))


In [None]:
# Elimino las columnas que no vamos a utilizar y reseteamos el índice

drop_cols = ['company_id','state_code', 'country_code','description','address1','address2','zip_code','city']
data = gdf.drop(drop_cols, axis =1)
data=data.dropna(subset=['longitude'])
data=data.dropna(subset=['latitude'])
data.reset_index
data.head(5)

### Aeropuerto

In [None]:
# Comprobamos que hay un aeropuerto en Singapur y sacamos sus coordenadas

def geocode(address):
    """
    Saca las coordenadas de una dirección que le des.
    """
    data = requests.get(f"https://geocode.xyz/{address}?json=1").json()
    return {
        "type":"Point",
        "coordinates":[float(data["longt"]),float(data["latt"])]}

geocode("Singapur International Airport")

In [None]:
def gplaces(query,radio):
    apiKey = os.getenv("GPLACES_APIKEY")
    lat = 1.298370
    long = 103.891290
    radius = {radio}
    tipo = {query}

    url = url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat},{long}&radius={radius}&type={tipo}&key=AIzaSyBr_5sdn9AKCx4Ptlwgbb7dIwojyiEOIdI"
    print(url)
    res = requests.get(url)
    data = res.json()
    return data

gplaces("school",2000)
result

### Conectamos con la API de Foursquare

In [None]:
def requestFoursquare(query,data,radio=1000):
    client_idd = os.getenv("4S_CLIENT_ID")
    client_secret=os.getenv("CLIENT_SECRET")
    
    long = data['longitude']
    lat = data['latitude']

    params = {"client_id": client_idd,
              "client_secret":client_secret,
              "v": "20180323",
              "ll":f'{lat},{long}',
              "radius":f'{radio}',
              "query":f'{query}', 
              "limit":20 }

    url = f"https://api.foursquare.com/v2/venues/explore"
    
    resp = requests.get(url=url, params=params)
    res = json.loads(resp.text)

    return res["response"]["totalResults"]

In [None]:
# Buscamos los sitios por cada empresa

vegan="4bf58dd8d48988d1d3941735"
nightlife="4d4b7105d754a06376d81259"
tech_startup="4bf58dd8d48988d125941735"

data["offices near"] = data.apply(lambda row: (requestFoursquare(tech_startup,row,2000)*2), axis=1)
data["res"] = data.apply(lambda row: (requestFoursquare("restaurant",row,1000)*2), axis=1)
data["school"] = data.apply(lambda row: (requestFoursquare("school",row,1200)*2), axis=1)
data["starbucks"] = data.apply(lambda row: requestFoursquare("starbucks",row,200), axis=1)
data["night club"] = data.apply(lambda row: requestFoursquare("starbucks",row,500), axis=1)
data["vegan"] = data.apply(lambda row: (requestFoursquare(vegan,row,1000)*0.5), axis=1)
data["dog care"] = data.apply(lambda row: (requestFoursquare("dog care",row,2000)*0.5), axis=1)




In [None]:
# Sumamos los puntos totales y ordenamos 
data["Total Points"] = data[['res','school','night club','starbucks','dog care' ]].sum(axis=1).where(data[['res','school','night club','starbucks','dog care']] > 0, 0)
data.sort_values(by="Total Points", ascending=False)

# Sacamos una tabla con los valores finales
drop_cols = ['latitude','longitude', 'geopoint','geometry']
total_points = gdf.drop(drop_cols, axis =1)

total_points.sort_values(by="Total Points").head(1)

### Visualizción mapas

In [None]:
def mapa(coord):

    m = Map(location=coord,zoom_start=14)
    for i, row in data.iterrows():
        Marker(
            location=row[["latitude","longitude"]],
            tooltip=row["name"],
        ).add_to(m)
    return m

coordenates_sg=[1.290270,103.851959]
mapa(coordenates_sg)




In [None]:
geocode("Singapur")