# First Filter: select 3 cities  🌆🌇

<img width=500 src=https://assets.hongkiat.com/uploads/creative-world-maps/10-creative-world-map-remake.jpg>

### Importing libraries  and creating connection with Mongo DB

In [23]:
import re
import pandas as pd
import numpy as np
import src.dataframes as f
from pymongo import MongoClient
client = MongoClient("localhost:27017")
import folium
from folium import Choropleth, Circle, Marker, Icon, Map
from folium.plugins import HeatMap, MarkerCluster
import src.maps as m

In [24]:
db = client.get_database("ironhack")

**"companies" collection has information on 18.8K companies all over the world 🌎🌍🌏**

In [25]:
companies = db.get_collection("companies")

### Doing some queries to filter

#### We want to keep those cities with: 
* Many design companies
* Tech startups that have raised at least 1 Million dollars

In [26]:
query_1 = {'$and' : [{"tag_list" : {'$regex':'design'}}, {"tag_list" : {'$regex':'(?s)^((?!fashion).)*'}},
                     {"tag_list" : {'$regex':'(?s)^((?!interior).)*'}}]}
query_2 = {'$and' : [{"total_money_raised" : {'$regex':'M'}},{"tag_list" : {'$regex':'tech'}}]}
proj = {'name': 1, '_id': 0, 'total_money_raised' : 1, 'offices.city' : 1, 'offices.latitude': 1, 'offices.longitude': 1}
companies_filtered = list(companies.find({'$or' : [query_2, query_1]}, proj))

In [27]:
companies_filtered[0]

{'name': 'Digg',
 'total_money_raised': '$45M',
 'offices': [{'city': 'San Francisco',
   'latitude': 37.764726,
   'longitude': -122.394523}]}

### Creating a Dataframe with the companies filtered

In [28]:
name = []
city = []
latitude = []
longitude = []
zip_code = []
for i in companies_filtered:
    name.append(i['name'])
    try: 
        if i['offices'][0]['city'] == '':
            city.append(np.nan)
        else:
            city.append(i['offices'][0]['city'])
        latitude.append(i['offices'][0]['latitude'])
        longitude.append(i['offices'][0]['longitude'])
    except:
        city.append(np.nan)
        latitude.append(np.nan)
        longitude.append(np.nan)
        zip_code.append(np.nan)

In [29]:
dict_ = {'company' : name, 'city' : city, 'latitude' : latitude, 'longitude': longitude}

In [30]:
companies_df = pd.DataFrame.from_dict(dict_, orient='columns')

In [31]:
companies_df.head()

Unnamed: 0,company,city,latitude,longitude
0,Digg,San Francisco,37.764726,-122.394523
1,Powerset,San Francisco,37.778613,-122.395289
2,CastTV,San Francisco,37.780716,-122.393913
3,eBuddy,Amsterdam,52.364093,4.891946
4,Grockit,San Francisco,37.775196,-122.419204


### Selecting the cities with more companies and creating a subdata frame with them

In [32]:
dict_of_cities = f.vc_to_dict(companies_df.city)

In [33]:
cities = f.subdata(11,dict_of_cities)

In [34]:
cities

['San Francisco', 'New York', 'London']

### 🏆🏆 The selected cities are `San Francisco`, `New York` and `London` 🏆🏆

In [35]:
df = f.create(companies_df,'city',cities)

In [36]:
df.sample(4)

Unnamed: 0,company,city,latitude,longitude
24,Gilt Groupe,New York,40.74727,-73.980064
487,Rhxo Technology Group,New York,40.762897,-73.974714
463,Make It Rain,London,51.4626,-0.139082
179,Squiz UK,London,51.521603,-0.083444


### Exporting the dataframe

In [37]:
df.to_csv("data/df.csv", index=False)

## Let's make maps with those cities and their companies!! 🗺 

**First, we need the coordinates of those cities:**

In [39]:
san_francisco = {'type': 'Point', 'coordinates': [37.773972, -122.431297]}
london = {'type': 'Point', 'coordinates': [51.509865, -0.118092]}
new_york = {'type': 'Point', 'coordinates': [40.730610, -73.935242]}

In [40]:
map_sf = Map(location=san_francisco['coordinates'],zoom_start=15)
map_lnd = Map(location=london['coordinates'],zoom_start=15)
map_ny = Map(location=new_york['coordinates'],zoom_start=15)

In [41]:
m.institution(df, 'London', map_lnd)

In [42]:
m.institution(df, 'San Francisco', map_sf)

In [43]:
m.institution(df, 'New York', map_ny)

### Saving the maps

In [44]:
map_lnd.save("maps/map_london.html")

In [45]:
map_sf.save("maps/map_san_francisco.html")

In [46]:
map_ny.save("maps/map_new_york.html")