In [1]:
import os
import requests
import json

from dotenv import load_dotenv
import pandas as pd
import numpy as np

from pymongo import MongoClient
from pymongo import GEOSPHERE

## Load token

In [2]:
load_dotenv()


True

In [3]:
token_fsq = os.getenv("foursquare_token")

## Connection to MongoDB to check companies

In [4]:
client = MongoClient("localhost:27017")

In [5]:
db = client.get_database("ironhack")

In [6]:
c = db.get_collection("companies")

In [7]:
# Para filtrar la mongodb
condition_money = {"total_money_raised": {"$regex": '[$|€].*[M|B]'}}
condition_gaming = {"category_code": {"$regex": "[.*]?game[.*]?"}}
condition_design = {"tag_list": {"$regex": "[.*]?design[.*]?"}}
condition_money_gaming = [condition_money, condition_gaming]

projection = {"_id": 0, "name": 1,"total_money_raised": 1, "offices": 1}

In [8]:
companies_df = list(c.find({"$or": 
                 [{"$and": condition_money_gaming}, condition_design]}, 
                    projection))

In [9]:
companies_df = pd.DataFrame(companies_df)

In [10]:
companies_df = companies_df.explode("offices")

In [11]:
companies_df = pd.concat([companies_df, companies_df["offices"].apply(pd.Series)[["city", "country_code", "latitude", "longitude"]]], axis=1)

In [12]:
companies_df.drop("offices", axis=1, inplace=True)

In [14]:
def return_money(value):
    """ input: a string containing the money rised by a company
        output: a float with the money properly formated turning M to milions and B to billions
    """
    if value.startswith("$") or value.startswith("€"):
        if value.endswith("k"):
            return round(float(value[1:-1])*100)
        elif value.endswith("M"):
            return round(float(value[1:-1])*100000)
        elif value.endswith("B"):
            return round(float(value[1:-1])*100000000)
        else:
            return 0
        

In [15]:
companies_df["total_money_raised"] = companies_df["total_money_raised"].apply(return_money)

In [16]:
companies_df["city"].replace(to_replace="", value=np.nan, inplace=True)

In [17]:
companies_df["city"].value_counts(dropna=True).head(10)

San Francisco    62
New York         53
London           32
Los Angeles      21
Palo Alto        13
San Diego        13
Paris            13
Santa Monica     11
Seattle          11
Bangalore        11
Name: city, dtype: int64

In [18]:
companies_df.groupby("city").sum()["total_money_raised"].sort_values(ascending=False)

city
San Francisco     254178000.0
Los Angeles       111248000.0
Baltimore          86000000.0
New York           76653000.0
Philadelphia       72500000.0
                     ...     
Idukki                    0.0
Hyderabad                 0.0
Hove                      0.0
Houston                   0.0
virginia beach            0.0
Name: total_money_raised, Length: 453, dtype: float64

### Since both San Francisco and New York appear in the top of the list of cities with more design companies and with more gaming companies with higher total money raised we will investigate those 2.

In [19]:
san_fancisco_companies = companies_df[companies_df["city"] == "San Francisco"].dropna(axis=0, subset=["latitude", "longitude"])

In [20]:
new_york_companies = companies_df[companies_df["city"] == "New York"].dropna(axis=0, subset=["latitude", "longitude"])

In [21]:
san_fancisco_companies.to_csv("../data/san_francisco_companies.csv", index=False)
new_york_companies.to_csv("../data/new_york_companies.csv", index=False)

In [22]:
san_fancisco_companies

Unnamed: 0,name,total_money_raised,city,country_code,latitude,longitude
0,Digg,4500000.0,San Francisco,USA,37.764726,-122.394523
4,Kyte,2340000.0,San Francisco,USA,37.788482,-122.409173
9,Ustream,6010000.0,San Francisco,USA,37.392936,-122.07948
12,Revision3,900000.0,San Francisco,USA,37.757758,-122.388243
13,CastTV,310000.0,San Francisco,USA,37.780716,-122.393913
19,hi5,5200000.0,San Francisco,USA,37.788668,-122.400558
20,Curse,1200000.0,San Francisco,USA,37.787092,-122.399972
31,Metacafe,5000000.0,San Francisco,USA,37.437328,-122.159928
34,Kongregate,1900000.0,San Francisco,USA,37.786942,-122.401245
35,DanceJam,450000.0,San Francisco,USA,37.781557,-122.407959
