In [1]:
 # SETUP NEEDED LIBS

import requests
from pymongo import MongoClient
import pandas as pd
import os
import src.transforming as transf
import matplotlib.pyplot as plt
import seaborn as sns


#### 1. Connect to dataset **companies** stored in MongoDB

In [2]:
#1.1 Connect to Companies dataset in Mongo

client = MongoClient("localhost:27017")
db = client.get_database("IronHack")
companies = db.get_collection("Companies")



In [3]:
#2 Get context of conditions for Startups > 1000000. Developers company are discarded as I didn't find a match between them.
# Online is a good option to talk or to discuss about development.

filters = {
    "$and": [
        {"total_money_raised": {"$regex": ".*[MB].*"}},
        {"tag_list": {"$regex": ".*start.*"}},        
    ]
}

df_context = transf.filtering_companies(companies,filters)
df_context
df_companies  = df_context.dropna(subset=["lat"]) # Dropping companies without coordinates.

# Resetting the indices using df.reset_index()
df_companies = df_companies.reset_index(drop=True)

df_companies





Unnamed: 0,name,category_code,number_of_employees,founded_year,tag_list,total_money_raised,city1,city2,country,lat,lon
0,Getyoo,mobile,10.0,2009,"social-networks, networking, web, real-life, w...",€1.13M,Brussels,,,50.83997,4.346472
1,Samasource,enterprise,25.0,2008,"outsourcing, crowdsourcing, non-profit-outsour...",$1.21M,San Francisco,,,37.778991,-122.401803
2,Echo,enterprise,50.0,2007,"platform, real-time, website, digital, social-...",$4.8M,San francisco,,,38.072936,-122.187963
3,Seedcamp,finance,4.0,2007,"seedcamp, business, startup, incubator, entrep...",€5M,London,,,51.51088,-0.141897
4,Idea Shower,public_relations,1.0,2007,"ideas, startups, web-development, programming",$2.3M,Minneapolis,,,44.977482,-93.264351
5,Pageflakes,web,20.0,2006,"homepage, ajaxhomepage, ajax, startpage",$4.1M,San Francisco,,,37.758113,-122.414689
6,Netvibes,web,35.0,2005,"homepage, ajaxhomepage, ajax, startpage, pagef...",$16M,Paris,San Francisco,USA,48.870806,2.34668
7,Moviestorm,software,10.0,2005,"animation, start-up, consumer-software, cambridge",£4.3M,Cambridge,,,52.197471,0.130588
8,Kareo,software,250.0,2004,"vc-firm, startup, cloud-based-solutions, healt...",$51.4M,Irvine,,,33.676444,-117.8587
9,SecondMarket,finance,140.0,2004,"private-company-stock, auction-rate-securities...",$34.2M,New York,San Francsico,USA,40.705439,-74.012822


#### Base dataset is alredy done and ready to be explored

#### **Start actions with Foursquares to find services regarding project conditions**

In [None]:
# Get all topics from Foursquare to check which of companies that I filtered has best position to accomplish conditions

result = transf.get_topics(df_context,["Elementary School","Airport Terminal","Basketball Court","Starbucks","Coffee Shop","Vegan Restaurant","Bar","pets"])



In [None]:
result.to_csv("../project-III-geospatial-data/data/from_f4s.csv")

#### Checkpoint: saving all FSQ results to CSV file to avoid blocking risk.

In [4]:
result = pd.read_csv("../project-III-geospatial-data/data/from_f4s.csv")

In [5]:
# Filtering items from FSQ as many of them are in heterogeneous categories. 
# Let's normalize data. I'll create datasets by for every topic and I'll merge them later.
# As output,I'll have a dataframe with all needed venues based on candidates places.

rs_school = result[result["Category"]=='Elementary School']
rs_vegan = result[result['Category'].str.contains("vegan", case=False)]
rs_starbucks = result[result['Name'].str.contains("starbucks", case=False)]
rs_starbucks.loc[:, 'Category'] = "Starbucks"

rs_basket = result[result['Category'].str.contains("basketball", case=False)]
rs_basket.loc[:, 'Category'] = "Basket"

rs_airport = result[result['Category'].str.contains("Airport Terminal", case=False)]
rs_bars  = result[result['Category'].str.contains("bar", case=False)]
rs_bars.loc[:, 'Category'] = "Bar"
rs_pets  = result[result['Category'].str.contains("grooming", case=False)]


df_final = pd.concat([rs_school, rs_vegan, rs_starbucks, rs_basket, rs_airport,rs_bars,rs_pets], ignore_index=True)



In [6]:
df_final.info()
df_final = df_final.drop(['Unnamed: 0'], axis=1)
df_final



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 926 entries, 0 to 925
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  926 non-null    int64  
 1   Name        926 non-null    object 
 2   Lat         926 non-null    float64
 3   Long        926 non-null    float64
 4   Distance    926 non-null    float64
 5   Category    926 non-null    object 
 6   Company     926 non-null    object 
 7   City        926 non-null    object 
dtypes: float64(3), int64(1), object(4)
memory usage: 58.0+ KB


Unnamed: 0,Name,Lat,Long,Distance,Category,Company,City
0,Basisschool St-Jan Berchmanscollege,50.841114,4.345791,143.0,Elementary School,Getyoo,Brussels
1,Vrije Basisschool - Sint-Joris,50.838791,4.342871,255.0,Elementary School,Getyoo,Brussels
2,Ecole d apllication charles buls,50.838187,4.342025,369.0,Elementary School,Getyoo,Brussels
3,Sint-Joris Basisschool,50.844420,4.348585,505.0,Elementary School,Getyoo,Brussels
4,Bsgo De Bron,50.834831,4.340237,719.0,Elementary School,Getyoo,Brussels
...,...,...,...,...,...,...,...
921,Woof in Boots | Doggy Day Care | Boarding | Gr...,39.729080,-104.996898,1967.0,Pet Grooming Service,Stratavia,Denver
922,Urban Dogg,39.746304,-104.978933,1009.0,Pet Grooming Service,Stratavia,Denver
923,Kriser's Natural Pet,39.756777,-104.999448,1346.0,Pet Grooming Service,Stratavia,Denver
924,Peaceful Touch Pet Salon and Massage,39.740222,-104.971794,1780.0,Pet Grooming Service,Stratavia,Denver


In [7]:
# Now., I'll generate a pivot table with dataframe to check which candidate accomplish better expected conditions
pivot_company = pd.crosstab(df_final['Category'], df_final['Company'],margins=False,margins_name="Total")
pivot_company


Company,Echo,Getyoo,Idea Shower,Kareo,Moviestorm,Netvibes,Pageflakes,Samasource,SecondMarket,Seedcamp,Stratavia
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Airport Terminal,0,0,1,6,0,0,0,0,0,0,0
Bar,1,41,26,9,17,34,27,17,22,23,36
Basket,0,2,4,0,1,2,3,2,8,2,0
Elementary School,6,47,22,6,14,50,50,50,43,32,24
Pet Grooming Service,0,1,4,0,0,2,9,10,8,1,5
Starbucks,9,12,17,18,6,40,9,39,34,29,21
Vegan and Vegetarian Restaurant,0,5,0,1,4,8,0,1,1,4,0


In [8]:
#After check this pivot table, we can see that Idea Shower company in Minneapolis, USA , it's a good candidate to cover our conditions as expected
df_candidate = df_companies[df_companies["name"]=='Idea Shower']
df_candidate

Unnamed: 0,name,category_code,number_of_employees,founded_year,tag_list,total_money_raised,city1,city2,country,lat,lon
4,Idea Shower,public_relations,1.0,2007,"ideas, startups, web-development, programming",$2.3M,Minneapolis,,,44.977482,-93.264351


In [9]:
# Now, I'll build a new dataframe filtering all venues to get just those that are close to our candidate

df_surround_candidate  = df_final[df_final['Company'] ==  "Idea Shower"]

df_surround_candidate # All venues that are close to this company in a radius of 2500 mts
df_surround_candidate

Unnamed: 0,Name,Lat,Long,Distance,Category,Company,City
135,Emerson Elementary School,44.968294,-93.280364,1581.0,Elementary School,Idea Shower,Minneapolis
136,Hennepin Elementary School,44.961302,-93.271211,1902.0,Elementary School,Idea Shower,Minneapolis
137,Minneapolis Public Schools,44.981505,-93.262388,472.0,Elementary School,Idea Shower,Minneapolis
138,Downtown Open Elementary School,44.975665,-93.269897,480.0,Elementary School,Idea Shower,Minneapolis
139,Religion Information Resources,44.972300,-93.260737,642.0,Elementary School,Idea Shower,Minneapolis
...,...,...,...,...,...,...,...
764,Aster Cafe,44.984604,-93.255118,1077.0,Bar,Idea Shower,Minneapolis
898,Downtown Dogs,44.978620,-93.290687,2107.0,Pet Grooming Service,Idea Shower,Minneapolis
899,City Paws Pet Club,44.980918,-93.262398,394.0,Pet Grooming Service,Idea Shower,Minneapolis
900,The Good Pet Groomer,44.962471,-93.277028,1942.0,Pet Grooming Service,Idea Shower,Minneapolis


## LET'S PLOT

In [14]:
import folium
from folium import Choropleth, Circle, Marker, Icon, Map, TileLayer
from folium.plugins import HeatMap, MarkerCluster
import geopandas as gpd
import json


In [11]:
transf.create_geojson(df_surround_candidate, '../project-III-geospatial-data/data/all_items_pivot.geojson')


In [12]:
geo_json = "../project-III-geospatial-data/data/all_items_pivot.geojson"
df = pd.read_json(geo_json)
df


Unnamed: 0,type,features
0,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Em..."
1,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'He..."
2,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Mi..."
3,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Do..."
4,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Re..."
...,...,...
69,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'As..."
70,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Do..."
71,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Ci..."
72,FeatureCollection,"{'type': 'Feature', 'properties': {'Name': 'Th..."


In [36]:
rs_starbucks.info()

<class 'pandas.core.frame.DataFrame'>
Index: 234 entries, 769 to 2293
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  234 non-null    int64  
 1   Name        234 non-null    object 
 2   Lat         234 non-null    float64
 3   Long        234 non-null    float64
 4   Distance    234 non-null    float64
 5   Category    234 non-null    object 
 6   Company     234 non-null    object 
 7   City        234 non-null    object 
dtypes: float64(3), int64(1), object(4)
memory usage: 16.5+ KB


#### MAP SURROUNDING TARGET

In [117]:
v_lat = df_final.Lat.mean()
v_lon =df_final.Long.mean()
map_all = Map(location = [v_lat, v_lon], zoom_start=2.)


for index, row in df_companies.iterrows():
    folium.Circle(
    [row["lat"], row["lon"]],
    radius=2000, 
    popup='Range distance', 
    color="#e74c3c",       # rouge
    fill_color="#27ae60",  # vert
    fill_opacity=0.2
    ).add_to(map_all)
    

# Adding datasets by category

for index, row in df_final.iterrows():

    bubble = folium.CircleMarker(
    [row['Lat'], row['Long']],
    radius=10, 
    popup='Candidates location', 
    color="#e74c3c",       # 
    fill_color="#cc99ff",  # 
    fill_opacity=0.7


    #folium.Marker(
    #    location=[row['Lat'], row['Long']],
    #    popup=row['Name'],
    #    icon=folium.Icon(color='orange', icon='')

    ).add_to(map_all)
    
folium.TileLayer('cartodbpositron').add_to(map_all)

map_all

In [114]:
v_lat = df_candidate.lat.values[0]
v_lon =df_candidate.lon.values[0]
map_start = Map(location = [v_lat, v_lon], zoom_start=15)
candidate = Marker(location = [v_lat, v_lon], tooltip="IDEA SHOWER - New office!")
circle = folium.Circle(
    [v_lat, v_lon],
    radius=2500, 
    popup='Range distance', 
    color="#e74c3c",       # 
    fill_color="#cc99ff",  # 
    fill_opacity=0.2
)
circle_center = folium.Circle(
    [v_lat, v_lon],
    radius=500, 
    popup='Range distance', 
    color="#ffffff",       # 
    fill_color="#ffff99",  # 
    fill_opacity=0.5
)
circle.add_to(map_start)
circle_center.add_to(map_start)
candidate.add_to(map_start)

# Adding datasets by category



for index, row in rs_basket.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='orange', prefix="fa", icon='futbol')

    ).add_to(map_start)

for index, row in rs_pets.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='pink', prefix="fa", icon='scissors ')

    ).add_to(map_start)

for index, row in rs_bars.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='black', prefix="fa", icon='beer')
    ).add_to(map_start)

for index, row in rs_school.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='red', prefix="fa", icon='graduation-cap')
    ).add_to(map_start)

for index, row in rs_starbucks.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='green', icon='star')
    ).add_to(map_start)

for index, row in rs_vegan.iterrows():
    folium.Marker(
        location=[row['Lat'], row['Long']],
        popup=row['Name'],
        icon=folium.Icon(color='red', prefix='fa', icon='pied-piper')
    ).add_to(map_start)

folium.Marker(
        location=[44.886401125849154, -93.21280029988941],
        popup="Aeropuerto Internacional de Mineápolis-Saint Paul",
        icon=folium.Icon(color='purple', prefix='fa', icon='plane')
    ).add_to(map_start)

folium.PolyLine([(44.968294, -93.280364),(44.886401125849154, -93.21280029988941)]).add_to(map_start)
folium.TileLayer('Stamen Watercolor').add_to(map_start)

map_start