In [10]:
#Import needed libraries 

In [11]:
import requests
import json
import pandas as pd
from getpass import getpass

In [12]:
#Connect to MongoDB

import pymongo
client = pymongo.MongoClient("localhost:27017")
db = client["London"]
c = db.get_collection("companies")

In [13]:
#Search for cities and their count on the database to choose a city with multiple data

list_ = list(c.find(projection= { "_id":0, "offices.city":1}))
city_count = []
for company in list_:
    for office in company["offices"]:
        city_count.append(office["city"])     

count = pd.Series(city_count).value_counts()
print("City Count")
print(count.head())

City Count
San Francisco    906
New York         837
                 646
London           616
Seattle          221
Name: count, dtype: int64


In [14]:
#Checking existing categories to see what can we filter on 
list_ = list(c.find(projection= { "_id":0, "category_code":1}))
category_count = []
for companies in list_:
    category_count.append(companies["category_code"])     

count = pd.Series(category_count).value_counts()

##Decision: Using desing and games_video categories

In [15]:
#Searching for the location of the design companies
condition_1 = {"category_code": "design"}
projection = {"_id":0, "name":1, "category_code":1, "offices.city":1}
list(c.find(condition_1, projection))

[{'name': '99designs',
  'category_code': 'design',
  'offices': [{'city': 'San Francisco'},
   {'city': 'Collingwood'},
   {'city': 'Berlin'}]},
 {'name': 'Graticle',
  'category_code': 'design',
  'offices': [{'city': 'Ellensburg'}]},
 {'name': 'Kickstarter',
  'category_code': 'design',
  'offices': [{'city': 'Brooklyn'}]},
 {'name': 'Moonfruit',
  'category_code': 'design',
  'offices': [{'city': 'London'}]}]

In [16]:
#Checking that the distribution of companies of design and games_video is similar to all companies --> NY, SF and London as finalists
condition_2 = {"category_code": {"$in": ["design", "games_video"]}}
projection = {"_id":0, "name":1, "category_code":1, "offices.city":1}
category_list = list(c.find(condition_2, projection))

category_city = []
for company in category_list:
    for office in company["offices"]:
        category_city.append(office["city"])
        
count = pd.Series(category_city).value_counts()
print("Category Count")
print(count.head())

Category Count
New York         75
San Francisco    69
                 54
London           37
Los Angeles      31
Name: count, dtype: int64


In [17]:
#Adding conditions to get a finalists list
condition_category = {"category_code": {"$in": ["design", "games_video"]}}
condition_city = {"offices.city": "London"}
condition_raised_amount = {"funding_rounds.raised_amount": {"$gte": 1000000}}
projection = {"_id":0, "name":1, "category_code":1, "offices.city":1, "funding_rounds.raised_amount":1, "founded_year":1, "offices.longitude":1, "offices.latitude":1}
finalists = list(c.find({"$and": [condition_category, condition_city,condition_raised_amount]}, projection).sort("founded_year",-1))
finalists

[{'name': 'Babelgum',
  'category_code': 'games_video',
  'founded_year': 2007,
  'funding_rounds': [{'raised_amount': 13200000}],
  'offices': [{'city': 'London',
    'latitude': 53.344104,
    'longitude': -6.267494}]},
 {'name': 'Playfish',
  'category_code': 'games_video',
  'founded_year': 2007,
  'funding_rounds': [{'raised_amount': 3000000},
   {'raised_amount': 1000000},
   {'raised_amount': 17000000}],
  'offices': [{'city': 'London',
    'latitude': 51.4991094,
    'longitude': -0.1984802},
   {'city': 'TromsÃ¸', 'latitude': 69.636216, 'longitude': 18.931062}]},
 {'name': 'Pikum',
  'category_code': 'games_video',
  'founded_year': 2007,
  'funding_rounds': [{'raised_amount': 650000}, {'raised_amount': 5270000}],
  'offices': [{'city': 'London',
    'latitude': 51.539778,
    'longitude': -0.152998}]},
 {'name': 'MUBI',
  'category_code': 'games_video',
  'founded_year': 2007,
  'funding_rounds': [{'raised_amount': 750000},
   {'raised_amount': 2400000},
   {'raised_amount': 

In [18]:
#Df of final results that might be a good fit for the company offices 
data = []
for company in finalists:
    name = company["name"]
    category = company["category_code"]
    offices = company.get("offices", [])
    for office in offices:
        city = office.get("city", "")
        latitude = office.get("latitude", None)
        longitude = office.get("longitude", None)
        if city == "London" and latitude is not None and longitude is not None:
            data.append({"Company": name, "Latitude": latitude, "Longitude": longitude, "Category": category})

df = pd.DataFrame(data)
print(df.to_string(index=False))

     Company  Latitude  Longitude    Category
    Babelgum 53.344104  -6.267494 games_video
    Playfish 51.499109  -0.198480 games_video
       Pikum 51.539778  -0.152998 games_video
     WorldTV 51.500152  -0.126236 games_video
Wazoo Sports 37.128600 -84.083400 games_video


In [19]:
#Babelgum in Ireland
#WorldTV and Kublax same location
#Wazoo Sports in Pittsburg, USA 

#Choosen companies: 
    #Playfish
    #Pikum
    #WorldTV
    

In [20]:
#Companies are placed in Camden, Kensington and Westminster, we will check their sorroundings to decide.