In [1]:
import pandas as pd
import src.search_functions as sf
import src.cleaning as cl
import src.visualization as vz

In [2]:
# available conditions to search for:

num_emjpl = {"number_of_employees": {"$lt": 10000}}
founded_year = {"founded_year": {"$gt": 2007}}
money_raised_gt1M = {"total_money_raised": {"$regex": ".*[MB].*"}}
games_video = {"category_code": "games_video"}
web = {"category_code": "web"}
#web_and_games = {"category_code": ["games_video", "web"]}

In [3]:
q1_web_startups =sf.mongo_search(founded_year, money_raised_gt1M, web)
q1_web_startups.head(2)


Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,city1,city2,lat,lon
0,Fixya,web,30.0,2013,$8M,San Mateo,,37.566879,-122.323895
42,Tongxue,web,,2013,$6M,,,,


In [4]:
q1_games_startups =sf.mongo_search(founded_year, money_raised_gt1M, games_video)
q1_games_startups.head(2)

Unnamed: 0,name,category_code,number_of_employees,founded_year,total_money_raised,city1,city2,lat,lon
1,Social Gaming Network,games_video,100.0,2011,$17.1M,Los Angeles,Beverly Hills,37.446823,-122.161523
6,Titan Gaming,games_video,18.0,2010,$1M,Santa Monica,,53.544711,-113.515769


In [5]:
startup_top10_cities = cl.merge_and_get_top10_cities(q1_web_startups,q1_games_startups)

In [6]:
startup_top10_cities

Unnamed: 0,City,Count
0,San Francisco,14
1,New York,12
2,Palo Alto,5
3,London,4
4,San Mateo,3
5,Los Angeles,3
6,Menlo Park,3
7,Tel Aviv,3
8,Istanbul,3
9,South Pasadena,2


In [7]:
q2_game =sf.mongo_search(games_video)
q2_web =sf.mongo_search(web)
top10_cities=cl.merge_and_get_top10_cities(q2_game,q2_web)

In [8]:
cities = cl.merge_cities(startup_top10_cities,top10_cities)
cities

Unnamed: 0,City,Count_startups,Count_comp
0,San Francisco,14,274
1,New York,12,258
2,Palo Alto,5,64
3,London,4,140
4,Los Angeles,3,90


#### Finding the average coordinates from all games and video companies in a City which are 75km from city centre

In [9]:
SF_avg_coor = cl.get_avg_coordinates('San Francisco', 37.7749, -122.4194, q1_web_startups, q1_games_startups,q2_game,q2_web)
SF_avg_coor

(37.761929979999955, -122.39218991688902)

In [10]:
NY_avg_coor = cl.get_avg_coordinates ( 'New York',40.7128,-74.0060,q1_web_startups, q1_games_startups,q2_game,q2_web)
NY_avg_coor

(40.74314475163043, -73.98603873478262)

In [11]:
PA_avg_coor = cl.get_avg_coordinates ( 'Palo Alto',  37.4419,-122.1430,q1_web_startups, q1_games_startups,q2_game,q2_web)
PA_avg_coor

(37.453118378947366, -122.15274914035086)

In [12]:
LND_avg_coor = cl.get_avg_coordinates ( 'London',51.5074,-0.1278,q1_web_startups, q1_games_startups,q2_game,q2_web)
LND_avg_coor

(51.51345527285715, -0.1320104214285714)

In [13]:
LA_coord = cl.get_avg_coordinates ( 'Los Angeles', 34.0522,-118.2437,q1_web_startups, q1_games_startups,q2_game,q2_web)
LA_coord

(34.04855827746479, -118.35274467887322)

#### Finding the coordinates from all games and video companies in a City which are 75km from city centre 

In [14]:
SF_coord=cl.df_city_coordinates('San Francisco', 37.7749, -122.4194,q1_web_startups, q1_games_startups,q2_game,q2_web)

In [15]:
NY_coord=cl.df_city_coordinates( 'New York',40.7128,-74.0060,q1_web_startups, q1_games_startups,q2_game,q2_web)

In [16]:
PA_coord=cl.df_city_coordinates( 'Palo Alto',  37.4419,-122.1430,q1_web_startups, q1_games_startups,q2_game,q2_web)

In [17]:
LND_coord=cl.df_city_coordinates( 'London',51.5074,-0.1278,q1_web_startups, q1_games_startups,q2_game,q2_web)

In [18]:
LA_coord=cl.df_city_coordinates( 'Los Angeles', 34.0522,-118.2437,q1_web_startups, q1_games_startups,q2_game,q2_web)

#### Creating visual maps 

In [19]:
SF_map = vz.crear_mapa (37.761929979999955,-122.39218991688902, SF_coord,zoom = 13)
SF_map

In [20]:
NY_map = vz.crear_mapa (40.74314475163043, -73.98603873478262, NY_coord,zoom = 13)
NY_map

In [21]:
PA_map = vz.crear_mapa ( 37.453118378947366,  -122.15274914035086, PA_coord,zoom = 13)

In [22]:
LND_map = vz.crear_mapa (51.51689597746479, -0.1473155197183098, LND_coord,zoom = 13)

In [23]:
STB_SF = sf.requests_for_foursquare ('Starbucks', 37.761929979999955,-122.39218991688902, radius=1000, limit=5)

#### Quering for the rest of the data:
    first group: looking for those that should be closer than 500m
    1- vegan restaurants
    2- distance to the airport
    3- shcools
    4- bars
    5- Karaokes 
    6- dog hairdresser


In [24]:
queries = ('Starbucks','vegan restaurant','school', 'Pet Grooming', 'bar','karaoke')

In [25]:
SF_queries = sf.queries_for_a_city (37.761929979999955, -122.39218991688902, 'Starbucks','vegan restaurant','school', 'Pet Grooming', 'bar','karaoke', 'Basket Stadium')

In [26]:
NY_queries = sf.queries_for_a_city (40.74314475163043, -73.98603873478262, 'Starbucks','vegan restaurant','school', 'Pet Grooming', 'bar','karaoke', 'Basket Stadium')

In [27]:
PA_queries = sf.queries_for_a_city (37.453118378947366, -122.15274914035086, 'Starbucks','vegan restaurant','school', 'Pet Grooming', 'bar','karaoke')

In [28]:
LND_queries = sf.queries_for_a_city (51.51689597746479, -0.1473155197183098, 'Starbucks', 'Pet Grooming','vegan restaurant','Middle School', 'Elementary School','bar','karaoke')

In [29]:
LA_queries = sf.queries_for_a_city (34.04855827746479, -118.35274467887322, 'Starbucks','vegan restaurant','school', 'Pet Grooming', 'bar','karaoke')


Now summarizing above queries in df to see average distance of each available category

In [30]:
SF_venues = cl.distance_venue_city (queries, SF_queries)
SF_venues

Unnamed: 0_level_0,avg_dist,category_qty
category,Unnamed: 1_level_1,Unnamed: 2_level_1
Pet Grooming,344.6,5
bar,117.0,5
school,256.2,5
vegan restaurant,417.5,4


In [31]:
NY_venues = cl.distance_venue_city (queries, NY_queries)
NY_venues

Unnamed: 0_level_0,avg_dist,category_qty
category,Unnamed: 1_level_1,Unnamed: 2_level_1
Basket Stadium,304.4,5
Pet Grooming,136.8,5
Starbucks,271.8,5
bar,68.6,5
karaoke,336.2,5
school,112.0,5
vegan restaurant,175.0,5


In [32]:
PA_venues = cl.distance_venue_city (queries, PA_queries)
PA_venues

Unnamed: 0_level_0,avg_dist,category_qty
category,Unnamed: 1_level_1,Unnamed: 2_level_1
bar,459.0,2


In [33]:
LND_venues = cl.distance_venue_city (queries, LND_queries)
LND_venues

Unnamed: 0_level_0,avg_dist,category_qty
category,Unnamed: 1_level_1,Unnamed: 2_level_1
Pet Grooming,93.6,5
Starbucks,321.6,5
bar,71.0,5
karaoke,392.333333,3
school,333.333333,3
vegan restaurant,260.8,5


In [34]:
LA_venues = cl.distance_venue_city (queries, LA_queries)
LA_venues

Unnamed: 0_level_0,avg_dist,category_qty
category,Unnamed: 1_level_1,Unnamed: 2_level_1
Pet Grooming,202.0,1
bar,83.8,5
school,227.6,5
vegan restaurant,97.0,1


Palo Alto is not longer an option as it lacks many of the requirements as well as LA

In a second step we search for the distance to a Basketball stadium and we accept a distance up to 10km for the 3 remaining cities