# MADRID vs. LONDON

In [57]:
from pymongo import MongoClient
import pandas as pd
import geopandas as gpd
from cartoframes.viz import Map, Layer, popup_element, size_continuous_style, color_continuous_style

#### Conectamos mongo con pyhton

In [58]:
conn = MongoClient("localhost:27017")

In [59]:
# Para ver donde que databases tengo
conn.list_database_names()

['admin', 'config', 'ironhack', 'local']

In [60]:
# Elegimos Ironhack porque tendro tenemos la colleccion que queremos utilizar
db = conn.get_database("ironhack")

In [61]:
db.list_collection_names()

['companies', 'countries_small', 'books', 'restaurants']

In [62]:
# "Abrimos" nuestra collecion companies
collection = db.get_collection("companies")

### Empezamos a investigar dentro de nuestra colección
#### Hipótesis:

##### Datos que puedo encontrar en mi colección de de "Companies":
* Soy una gran emprendedora y quiero lazar mi nueva empresa
* Quiero ver cual sería la mejor ubicación para lanzar mi empresa
    - Estoy entre Madrid y Londres
* Además quiero estar rodeada tando de empresas teconológicas como de start-ups, me gusta mucho el ambiente joven e innovador y sin miedo a cambios y quiero encontrar ese entorno.

##### Datos que tengo que encontrar en Apis:
* Que haya bares cercanos a mi empresa, además de que la gente que forme mi empresa trabaje y tenga sus obligaciones quiero que esten rodeados de buen ambiente porque ¡ a quién no le gusta tomarse su cañita al acabar un día duro! 
* También quiero que haya business centers donde se organicen conferencias y eventos de todo tipo. Además de todo mi equipo de diseñadores UX/UY que están muy interesados en charlas para captar nuevas ideas y mejorar quiero que toda la empresa también tenga la oportunidad de asistir a conferencias y eventos de su campo porque ¿ a quién no le gusta aprender y actualizarse?
* Me encantaría que hubiera colegios cerca de nuestra ubicación ya que sería una ventaja y una facilidad para todos los padres que estén trabajando con nosotros.


### 1. Busco aquellas empresas que estén ubicadas en Madrid y Londres

In [63]:
collection.find_one({})

{'_id': ObjectId('52cdef7c4bab8bd675297d8a'),
 'name': 'Wetpaint',
 'permalink': 'abc2',
 'crunchbase_url': 'http://www.crunchbase.com/company/wetpaint',
 'homepage_url': 'http://wetpaint-inc.com',
 'blog_url': 'http://digitalquarters.net/',
 'blog_feed_url': 'http://digitalquarters.net/feed/',
 'twitter_username': 'BachelrWetpaint',
 'category_code': 'web',
 'number_of_employees': 47,
 'founded_year': 2005,
 'founded_month': 10,
 'founded_day': 17,
 'deadpooled_year': 1,
 'tag_list': 'wiki, seattle, elowitz, media-industry, media-platform, social-distribution-system',
 'alias_list': '',
 'email_address': 'info@wetpaint.com',
 'phone_number': '206.859.6300',
 'description': 'Technology Platform Company',
 'created_at': datetime.datetime(2007, 5, 25, 6, 51, 27),
 'updated_at': 'Sun Dec 08 07:15:44 UTC 2013',
 'overview': '<p>Wetpaint is a technology platform company that uses its proprietary state-of-the-art technology and expertise in social media to build and monetize audiences for di

In [64]:
collection.find_one({}).keys()

dict_keys(['_id', 'name', 'permalink', 'crunchbase_url', 'homepage_url', 'blog_url', 'blog_feed_url', 'twitter_username', 'category_code', 'number_of_employees', 'founded_year', 'founded_month', 'founded_day', 'deadpooled_year', 'tag_list', 'alias_list', 'email_address', 'phone_number', 'description', 'created_at', 'updated_at', 'overview', 'image', 'products', 'relationships', 'competitions', 'providerships', 'total_money_raised', 'funding_rounds', 'investments', 'acquisition', 'acquisitions', 'offices', 'milestones', 'video_embeds', 'screenshots', 'external_links', 'partners'])

* Selección de tipo de empresas que quieres que se encuentres al rededor de la tuya.
* Nosotros lo que buscamos es:
    - Empresas ubicadas en Madrid o Londres
    - Empresas cuya categoria empresarial esté relacionada con el "ecommerce"
    - Y, empresas jovenes cuya año de fundación se ecuentre entre el 2005 y 2020. 
       ¡Spoiler! He buscado empresas de nueva creacion cuyo año de funcacion haya sido 2015 (ubicadas en Madrid o Londres y de categoria "ecommer" y no hay.

## Empresas en Madrid

In [68]:
madrid_companies = collection.find({"offices.city": "Madrid", "category_code" : "ecommerce", "founded_year" : {"$gt": 2004}}, {"name": 1,"category_code":1, "founded_year":1, "offices":1, "total_money_raised":1, "_id" : 0})

In [69]:
list(madrid_companies)

[{'name': 'Digital Assets Deployment',
  'category_code': 'ecommerce',
  'founded_year': 2006,
  'total_money_raised': '$0',
  'offices': [{'description': 'Corporate Headquarters',
    'address1': 'Doctor Castelo 10',
    'address2': 'Local 1',
    'zip_code': '28009',
    'city': 'Madrid',
    'state_code': None,
    'country_code': 'ESP',
    'latitude': 40.42046,
    'longitude': -3.678121}]},
 {'name': 'Daily Flat Rental',
  'category_code': 'ecommerce',
  'founded_year': 2008,
  'total_money_raised': '$0',
  'offices': [{'description': 'Central Office',
    'address1': 'Lavapies 26 1A',
    'address2': '',
    'zip_code': '28012',
    'city': 'Madrid',
    'state_code': None,
    'country_code': 'ESP',
    'latitude': 40.412323,
    'longitude': -3.703248}]},
 {'name': 'Daily Flat Rental',
  'category_code': 'ecommerce',
  'founded_year': 2008,
  'total_money_raised': '$0',
  'offices': [{'description': 'Central Office',
    'address1': 'Lavapies 26 1A',
    'address2': '',
    'z

In [70]:
df = pd.DataFrame(collection.find(
    {"$and": [
        {"offices.city": "Madrid"},
        {"category_code" : "ecommerce"},
        {"founded_year" : {"$gt": 2004}}
    ]},
    {"name": 1, 
     "category_code": 1, 
     "founded_year": 1, 
     "offices.city": 1, 
     "offices.latitude":1, 
     "total_money_raised":1,
     "number_of_employees":1,
     "offices.longitude":1, "_id": 0}
))

df

Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,offices
0,Digital Assets Deployment,ecommerce,10.0,2006,$0,"[{'city': 'Madrid', 'latitude': 40.42046, 'lon..."
1,Daily Flat Rental,ecommerce,2.0,2008,$0,"[{'city': 'Madrid', 'latitude': 40.412323, 'lo..."
2,Daily Flat Rental,ecommerce,2.0,2008,$0,"[{'city': 'Madrid', 'latitude': 40.412323, 'lo..."
3,ticketea,ecommerce,,2010,$5.73M,"[{'city': 'Madrid', 'latitude': 40.4455155, 'l..."
4,BuyVIP,ecommerce,,2006,$20M,"[{'city': 'Madrid', 'latitude': 40.4167413, 'l..."


* Vamor a limpiar la columna de offcies y ponerlo todo mucho más visual para poder luego crear nuestros mapas de manera más sencilla y ordenada

In [71]:
df.offices

0    [{'city': 'Madrid', 'latitude': 40.42046, 'lon...
1    [{'city': 'Madrid', 'latitude': 40.412323, 'lo...
2    [{'city': 'Madrid', 'latitude': 40.412323, 'lo...
3    [{'city': 'Madrid', 'latitude': 40.4455155, 'l...
4    [{'city': 'Madrid', 'latitude': 40.4167413, 'l...
Name: offices, dtype: object

In [72]:
city = []
for i in list(df.offices):
    city.append((i[0]['city']))

In [73]:
df_city = pd.DataFrame(city)
df_city.columns = ['city']
df_city

Unnamed: 0,city
0,Madrid
1,Madrid
2,Madrid
3,Madrid
4,Madrid


In [74]:
latitude = []
for i in list(df.offices):
    latitude.append((i[0]['latitude']))

latitude

[40.42046, 40.412323, 40.412323, 40.4455155, 40.4167413]

In [75]:
df_latitude = pd.DataFrame(latitude)
df_latitude.columns = ['latitude']
df_latitude

Unnamed: 0,latitude
0,40.42046
1,40.412323
2,40.412323
3,40.445515
4,40.416741


In [76]:
longitude = []
for i in list(df.offices):
    longitude.append((i[0]['longitude']))

longitude

[-3.678121, -3.703248, -3.703248, -3.7061764, -3.7032498]

In [77]:
df_longitude = pd.DataFrame(longitude)
df_longitude.columns = ['longitude']
df_longitude

Unnamed: 0,longitude
0,-3.678121
1,-3.703248
2,-3.703248
3,-3.706176
4,-3.70325


In [78]:
# quitamos los valores NAN porque no quedan nada de profesionales y los igualamos a cero
df_Madrid_nulls = pd.DataFrame(df[['number_of_employees']])
                                           
df_Madrid_nulls['fill_0_employees'] = df_Madrid_nulls['number_of_employees'].fillna(0) 
df_Madrid_nulls[['number_of_employees', 'fill_0_employees']]

Unnamed: 0,number_of_employees,fill_0_employees
0,10.0,10.0
1,2.0,2.0
2,2.0,2.0
3,,0.0
4,,0.0


In [79]:
df_Madrid = pd.concat([df, df_city, df_latitude, df_longitude, df_Madrid_nulls], axis=1)
df_Madrid

Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,offices,city,latitude,longitude,number_of_employees.1,fill_0_employees
0,Digital Assets Deployment,ecommerce,10.0,2006,$0,"[{'city': 'Madrid', 'latitude': 40.42046, 'lon...",Madrid,40.42046,-3.678121,10.0,10.0
1,Daily Flat Rental,ecommerce,2.0,2008,$0,"[{'city': 'Madrid', 'latitude': 40.412323, 'lo...",Madrid,40.412323,-3.703248,2.0,2.0
2,Daily Flat Rental,ecommerce,2.0,2008,$0,"[{'city': 'Madrid', 'latitude': 40.412323, 'lo...",Madrid,40.412323,-3.703248,2.0,2.0
3,ticketea,ecommerce,,2010,$5.73M,"[{'city': 'Madrid', 'latitude': 40.4455155, 'l...",Madrid,40.445515,-3.706176,,0.0
4,BuyVIP,ecommerce,,2006,$20M,"[{'city': 'Madrid', 'latitude': 40.4167413, 'l...",Madrid,40.416741,-3.70325,,0.0


In [112]:
# Eliminamos nuestra columna de offices y de employees sucia (ya que ya hemos sacado los datos que queriamos y los hemps pues de manera ordenada)
df_Madrid_clean= df_Madrid.drop(df_Madrid.columns[[2, 5]], axis = 'columns').copy()
df_Madrid_clean

Unnamed: 0,name,category_code,founded_year,total_money_raised,city,latitude,longitude,fill_0_employees
0,Digital Assets Deployment,ecommerce,2006,$0,Madrid,40.42046,-3.678121,10.0
1,Daily Flat Rental,ecommerce,2008,$0,Madrid,40.412323,-3.703248,2.0
2,Daily Flat Rental,ecommerce,2008,$0,Madrid,40.412323,-3.703248,2.0
3,ticketea,ecommerce,2010,$5.73M,Madrid,40.445515,-3.706176,0.0
4,BuyVIP,ecommerce,2006,$20M,Madrid,40.416741,-3.70325,0.0


In [113]:
# Quitar el signo del dolar y de millones

df_Madrid_clean["total_money_raised"] = df_Madrid_clean["total_money_raised"].str.replace('$', '')
df_Madrid_clean["total_money_raised"] = df_Madrid_clean["total_money_raised"].str.replace('M', '')
df_Madrid_clean["total_money_raised"]

0       0
1       0
2       0
3    5.73
4      20
Name: total_money_raised, dtype: object

In [114]:
#Quitamos el signo del dólar de la comuna "total_money_raised" porque al hacer el mapa necesitamos una variable numérica NO categórica
df_Madrid_clean["total_money_raised"] = df_Madrid_clean["total_money_raised"].astype(float)
df_Madrid_clean["total_money_raised"]

0     0.00
1     0.00
2     0.00
3     5.73
4    20.00
Name: total_money_raised, dtype: float64

In [139]:
gdf_Madrid = gpd.GeoDataFrame(df_Madrid_clean, geometry=gpd.points_from_xy(df_Madrid.longitude, df_Madrid.latitude))
print(f'Tipo: {type(gdf)}')

# Además elimino la segunda fila porque si te fijas son las mismas compañias!!
gdf_Madrid_clean = gdf_Madrid.drop([2],axis=0)
gdf_Madrid_clean

Tipo: <class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,name,category_code,founded_year,total_money_raised,city,latitude,longitude,fill_0_employees,geometry
0,Digital Assets Deployment,ecommerce,2006,0.0,Madrid,40.42046,-3.678121,10.0,POINT (-3.67812 40.42046)
1,Daily Flat Rental,ecommerce,2008,0.0,Madrid,40.412323,-3.703248,2.0,POINT (-3.70325 40.41232)
3,ticketea,ecommerce,2010,5.73,Madrid,40.445515,-3.706176,0.0,POINT (-3.70618 40.44552)
4,BuyVIP,ecommerce,2006,20.0,Madrid,40.416741,-3.70325,0.0,POINT (-3.70325 40.41674)


In [140]:
gdf_Madrid_clean.to_csv('../output/Madrid_clean.csv')

In [141]:
# CREAMOS ESTA TABLA SOLO PARA PODER EXPORTARLA LUEGO A NUESTRO JUPYTER DE APIS
gdf_coodenadas_Madrid = gdf_Madrid_clean.drop(gtf_Madrid_clean.columns[[1, 2, 3, 4, 7]], axis = 'columns')
gdf_coodenadas_Madrid

Unnamed: 0,name,latitude,longitude,geometry
0,Digital Assets Deployment,40.42046,-3.678121,POINT (-3.67812 40.42046)
1,Daily Flat Rental,40.412323,-3.703248,POINT (-3.70325 40.41232)
3,ticketea,40.445515,-3.706176,POINT (-3.70618 40.44552)
4,BuyVIP,40.416741,-3.70325,POINT (-3.70325 40.41674)


In [142]:
gdf_coodenadas_Madrid.to_csv('../output/Madrid_coordenadas_clean.csv')

#### Ubicación de las cuatro empresas 

In [143]:
Map(Layer(gdf_Madrid_clean, popup_hover=[popup_element("name")]))

#### Ubicación de las empresas y proproción del total money raised de cada una de ellas
* Así podemos ver de manera mucho más visual la diferencia del total money raised entre las cautro

In [144]:
Map(Layer(gdf_Madrid_clean, size_continuous_style('total_money_raised', size_range=[10,40]), title='Total money raised ($/Units of millon)'))

#### QUEREMOS DAR UN MAPA MÁS DETALLADO Y POR ELLO EN ESTE:
* vemos la ubicación de nuestras de empresas
* vemos el total money raise de cada una y además podemos compararlas visualmente entre ellas
* y añadimos el nº de trabajdores por cada empresa para poder razonar un poco el total money raised de cada una de ellas

In [145]:
Map([
  Layer(gdf_Madrid_clean, color_continuous_style('fill_0_employees'), title='Number of employees'),
  Layer(gdf_Madrid_clean, size_continuous_style('total_money_raised', size_range=[20,40]), title='Total money raised ($/units of million)') 
])

## Empresas en Londres

In [88]:
london_companies = collection.find({"offices.city": "London", "category_code" : "ecommerce", "founded_year" : {"$gt": 2004}}, {"name": 1, "category_code":1,"founded_year":1, "_id" : 0})

In [89]:
list(london_companies)

[{'name': 'Twenga', 'category_code': 'ecommerce', 'founded_year': 2006},
 {'name': 'Seatwave', 'category_code': 'ecommerce', 'founded_year': 2006},
 {'name': 'ProcServe', 'category_code': 'ecommerce', 'founded_year': 2006},
 {'name': 'Voices', 'category_code': 'ecommerce', 'founded_year': 2005},
 {'name': 'Veedow', 'category_code': 'ecommerce', 'founded_year': 2007},
 {'name': 'Spectackler', 'category_code': 'ecommerce', 'founded_year': 2008},
 {'name': 'Shutl', 'category_code': 'ecommerce', 'founded_year': 2009},
 {'name': 'Oaven', 'category_code': 'ecommerce', 'founded_year': 2006},
 {'name': 'Aroxo', 'category_code': 'ecommerce', 'founded_year': 2006},
 {'name': 'Autoquake', 'category_code': 'ecommerce', 'founded_year': 2005},
 {'name': 'Schway', 'category_code': 'ecommerce', 'founded_year': 2007},
 {'name': 'emarket', 'category_code': 'ecommerce', 'founded_year': 2008}]

In [90]:
df_London = pd.DataFrame(collection.find(
    {"$and": [
        {"offices.city": "London"},
        {"category_code" : "ecommerce"},
        {"founded_year" : {"$gt": 2004}}
    ]},
    {"name": 1, 
     "category_code": 1, 
     "founded_year": 1,
     "number_of_employees": 1,
     "total_money_raised" : 1,
     "offices.city":1,
     "offices.longitude": 1,
     "offices.latitude": 1, 
     "_id": 0}
))

df_London

Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,offices
0,Twenga,ecommerce,,2006,€2.6M,"[{'city': 'Paris', 'latitude': 48.856667, 'lon..."
1,Seatwave,ecommerce,,2006,$53M,"[{'city': 'London', 'latitude': 51.500152, 'lo..."
2,ProcServe,ecommerce,55.0,2006,$0,"[{'city': 'London', 'latitude': 51.494067, 'lo..."
3,Voices,ecommerce,23.0,2005,$0,"[{'city': 'London', 'latitude': 43.014006, 'lo..."
4,Veedow,ecommerce,4.0,2007,$0,"[{'city': 'London', 'latitude': None, 'longitu..."
5,Spectackler,ecommerce,3.0,2008,$0,"[{'city': 'London', 'latitude': None, 'longitu..."
6,Shutl,ecommerce,10.0,2009,£7.66M,"[{'city': 'London', 'latitude': 51.5196135, 'l..."
7,Oaven,ecommerce,4.0,2006,$0,"[{'city': 'London', 'latitude': None, 'longitu..."
8,Aroxo,ecommerce,10.0,2006,$0,"[{'city': 'London', 'latitude': None, 'longitu..."
9,Autoquake,ecommerce,100.0,2005,$39.9M,"[{'city': 'London', 'latitude': 51.543337, 'lo..."


In [91]:
# quitamos los valores NAN porque no quedan nada profesionales
df_London_nulls = pd.DataFrame(df_London[['number_of_employees']])
                                           
df_London_nulls['fill_0_employees'] = df_London_nulls['number_of_employees'].fillna(0) 
df_London_nulls[['number_of_employees', 'fill_0_employees']]

Unnamed: 0,number_of_employees,fill_0_employees
0,,0.0
1,,0.0
2,55.0,55.0
3,23.0,23.0
4,4.0,4.0
5,3.0,3.0
6,10.0,10.0
7,4.0,4.0
8,10.0,10.0
9,100.0,100.0


In [92]:
df_London.offices[1]

[{'city': 'London', 'latitude': 51.500152, 'longitude': -0.126236}]

In [93]:
city_L = []
for i in list(df_London.offices):
    city_L.append((i[0]['city']))

In [94]:
df_city_L = pd.DataFrame(city_L)
df_city_L.columns = ['city']
df_city_L

Unnamed: 0,city
0,Paris
1,London
2,London
3,London
4,London
5,London
6,London
7,London
8,London
9,London


In [95]:
longitude_L = []
for i in list(df_London.offices):
    longitude_L.append((i[0]['longitude']))

In [96]:
df_longitude_L = pd.DataFrame(longitude_L)
df_longitude_L.columns = ['longitude']
df_longitude_L

Unnamed: 0,longitude
0,2.350987
1,-0.126236
2,-0.146665
3,-81.280364
4,
5,
6,-0.102027
7,
8,
9,-0.172364


In [97]:
latitude_L = []
for i in list(df_London.offices):
    latitude_L.append((i[0]['latitude']))

In [98]:
df_latitude_L = pd.DataFrame(latitude_L)
df_latitude_L.columns = ['latitude']
df_latitude_L

Unnamed: 0,latitude
0,48.856667
1,51.500152
2,51.494067
3,43.014006
4,
5,
6,51.519613
7,
8,
9,51.543337


In [99]:
df_London_clean = pd.concat([df_London, df_city_L, df_latitude_L, df_longitude_L, df_London_nulls], axis=1)
df_London_clean

Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,offices,city,latitude,longitude,number_of_employees.1,fill_0_employees
0,Twenga,ecommerce,,2006,€2.6M,"[{'city': 'Paris', 'latitude': 48.856667, 'lon...",Paris,48.856667,2.350987,,0.0
1,Seatwave,ecommerce,,2006,$53M,"[{'city': 'London', 'latitude': 51.500152, 'lo...",London,51.500152,-0.126236,,0.0
2,ProcServe,ecommerce,55.0,2006,$0,"[{'city': 'London', 'latitude': 51.494067, 'lo...",London,51.494067,-0.146665,55.0,55.0
3,Voices,ecommerce,23.0,2005,$0,"[{'city': 'London', 'latitude': 43.014006, 'lo...",London,43.014006,-81.280364,23.0,23.0
4,Veedow,ecommerce,4.0,2007,$0,"[{'city': 'London', 'latitude': None, 'longitu...",London,,,4.0,4.0
5,Spectackler,ecommerce,3.0,2008,$0,"[{'city': 'London', 'latitude': None, 'longitu...",London,,,3.0,3.0
6,Shutl,ecommerce,10.0,2009,£7.66M,"[{'city': 'London', 'latitude': 51.5196135, 'l...",London,51.519613,-0.102027,10.0,10.0
7,Oaven,ecommerce,4.0,2006,$0,"[{'city': 'London', 'latitude': None, 'longitu...",London,,,4.0,4.0
8,Aroxo,ecommerce,10.0,2006,$0,"[{'city': 'London', 'latitude': None, 'longitu...",London,,,10.0,10.0
9,Autoquake,ecommerce,100.0,2005,$39.9M,"[{'city': 'London', 'latitude': 51.543337, 'lo...",London,51.543337,-0.172364,100.0,100.0


In [100]:
df_London_super_clean= df_London_clean.drop(df_London_clean.columns[[2, 5]], axis = 'columns').copy()
df_London_super_clean

Unnamed: 0,name,category_code,founded_year,total_money_raised,city,latitude,longitude,fill_0_employees
0,Twenga,ecommerce,2006,€2.6M,Paris,48.856667,2.350987,0.0
1,Seatwave,ecommerce,2006,$53M,London,51.500152,-0.126236,0.0
2,ProcServe,ecommerce,2006,$0,London,51.494067,-0.146665,55.0
3,Voices,ecommerce,2005,$0,London,43.014006,-81.280364,23.0
4,Veedow,ecommerce,2007,$0,London,,,4.0
5,Spectackler,ecommerce,2008,$0,London,,,3.0
6,Shutl,ecommerce,2009,£7.66M,London,51.519613,-0.102027,10.0
7,Oaven,ecommerce,2006,$0,London,,,4.0
8,Aroxo,ecommerce,2006,$0,London,,,10.0
9,Autoquake,ecommerce,2005,$39.9M,London,51.543337,-0.172364,100.0


In [101]:
# Quitar el signo del dolar y de millones

df_London_super_clean["total_money_raised"] = df_London_super_clean["total_money_raised"].str.replace('$', '')
df_London_super_clean["total_money_raised"] = df_London_super_clean["total_money_raised"].str.replace('£', '')
df_London_super_clean["total_money_raised"] = df_London_super_clean["total_money_raised"].str.replace('€', '')
df_London_super_clean["total_money_raised"] = df_London_super_clean["total_money_raised"].str.replace('M', '')
df_London_super_clean["total_money_raised"]

0      2.6
1       53
2        0
3        0
4        0
5        0
6     7.66
7        0
8        0
9     39.9
10       0
11       0
Name: total_money_raised, dtype: object

In [102]:
#Quitamos el signo del dólar de la comuna "total_money_raised" porque al hacer el mapa necesitamos una variable numérica NO categórica
df_London_super_clean["total_money_raised"] = df_London_super_clean["total_money_raised"].astype(float)
df_London_super_clean["total_money_raised"]

0      2.60
1     53.00
2      0.00
3      0.00
4      0.00
5      0.00
6      7.66
7      0.00
8      0.00
9     39.90
10     0.00
11     0.00
Name: total_money_raised, dtype: float64

In [103]:
# Como este proyecto se basa en ubicaniones y por tanto en coordenadas tengo que eliminar las empresas que NO tenga coordenadas
#df_London_super_clean= df_London_clean.drop(df_London_clean[[4, 5, 7, 8, 11]], axis = 0)
#df_London_super_clean
df_London_limpio = df_London_super_clean.drop([0, 3, 4, 5, 7, 8, 11],axis=0)

In [104]:
gdf_London = gpd.GeoDataFrame(df_London_limpio, geometry=gpd.points_from_xy(df_London_limpio.longitude, df_London_limpio.latitude))
print(f'Tipo: {type(gdf)}')
gdf_London

Tipo: <class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,name,category_code,founded_year,total_money_raised,city,latitude,longitude,fill_0_employees,geometry
1,Seatwave,ecommerce,2006,53.0,London,51.500152,-0.126236,0.0,POINT (-0.12624 51.50015)
2,ProcServe,ecommerce,2006,0.0,London,51.494067,-0.146665,55.0,POINT (-0.14666 51.49407)
6,Shutl,ecommerce,2009,7.66,London,51.519613,-0.102027,10.0,POINT (-0.10203 51.51961)
9,Autoquake,ecommerce,2005,39.9,London,51.543337,-0.172364,100.0,POINT (-0.17236 51.54334)
10,Schway,ecommerce,2007,0.0,London,51.519816,-0.136162,5.0,POINT (-0.13616 51.51982)


In [106]:
gdf_London.to_csv('../output/London_clean.csv')

In [148]:
# CREAMOS SOLAMENTE ESTA TABLA PARA PODER IMPORTARLA LUEGO EN NUESTRO JUPYTER DE APIS
gdf_coodenadas_London = gdf_London.drop(gdf_London.columns[[1, 2, 3, 4, 7, 8]], axis = 'columns')
gdf_coodenadas_London

Unnamed: 0,name,latitude,longitude
1,Seatwave,51.500152,-0.126236
2,ProcServe,51.494067,-0.146665
6,Shutl,51.519613,-0.102027
9,Autoquake,51.543337,-0.172364
10,Schway,51.519816,-0.136162


In [149]:
gdf_coodenadas_London.to_csv('../output/London_coordenadas_clean.csv')

#### Ubicación de las cinco empresas

In [53]:
Map(Layer(gdf_London, popup_hover=[popup_element("name")]))

#### Ubicación de las empresas y proproción del total money raised de cada una de ellas
* Así podemos ver de manera mucho más visual la diferencia del total money raised entre todas ellas

In [54]:
Map(Layer(gdf_London, size_continuous_style('total_money_raised', size_range=[10,40]), title='Total money raised ($/Units of millon)'))

#### QUEREMOS DAR UN MAPA MÁS DETALLADO Y POR ELLO EN ESTE:
* vemos la ubicación de nuestras de empresas
* vemos el total money raise de cada una y además podemos compararlas visualmente entre ellas
* y añadimos el nº de trabajdores por cada empresa para poder razonar un poco el total money raised de cada una de ellas

In [152]:
map_london_companies = Map([
  Layer(gdf_London, color_continuous_style('fill_0_employees'), title='Number of employees'),
  Layer(gdf_London, size_continuous_style('total_money_raised', size_range=[20,50]), title='Total money raised ($/units of million)') 
])
map_london_companies

### RECURSOS UTILIZADOS PARA ESTE ANÁLISIS
* El dataset de "Companies" de Ironhack --> utilizado mediante la herramienta Mongo
* Apuntes proporcionados de Ironhack --> https://github.com/leticia-sobrino/teaching-ironhack-data-madrid-2020
* Research en Google 
* Para la realización de mapas/visualizaciones ayuda de la siguiente documentación --> https://carto.com/developers/cartoframes/