In [1]:
import os
import requests
import json

from dotenv import load_dotenv
import pandas as pd
import numpy as np

from pymongo import MongoClient
from pymongo import GEOSPHERE

In [2]:
san_fancisco_companies = pd.read_csv("../data/san_francisco_companies.csv")

In [3]:
new_york_companies = pd.read_csv("../data/new_york_companies.csv")

In [4]:
load_dotenv()

True

In [5]:
token_fsq = os.getenv("foursquare_token")

#### The foursquare filters will be the following:

For the parents

    category = 12056,12058
        12056 - Preschool
        12058 - Elementary School

Travel Airport

    category = 19037
        19037 - Airport terminal

Travel Train

    category = "19022,19047"
        19022 - Platform
        19047 - Rail Station

For dancing

    category = 10032, 10013
        10032 - Night Club
        10013 - Dance Hall

For the maintenance guy

    query = "basketball"
    category = 10051
        10051 - Stadium

For the dog

    category = 11134
        11134 - Pet Grooming Service


Some lettuce for the CEO

    category = 13377
        13377 - Vegan Restaurant

In [6]:
def get_distance (latitude, longitude, query="", category="", limit=1):
    """ 
        inputs: latitude to search around.
                longitude to search around.
                *optional* query: a kind of place to search for.
                *optional* category: a foursquare category to search for.
                *optional* limit: limit of results to return, 1 by default.
                
        output: An integer, the distance to the closest location of the kind searched for.
                Note: If no location is found it returns 10000.
    """
    
    if query != "":
        query = f"query={query}&"
    
    if category != "":
        category = f"&categories={category}"
        
    # Doing the call for foursquare     
    ll = f"{latitude}%2C{longitude}"
    url = f"https://api.foursquare.com/v3/places/search?{query}ll={ll}{category}&sort=DISTANCE&limit={str(limit)}"

    headers = {
        "accept": "application/json",
        "Authorization": token_fsq,
    }
    
    response = requests.get(url, headers=headers).json()
    
    if response["results"] == []:
        return 10000
    else:
        return int(response["results"][0]["distance"])

In [7]:
def add_distance_column(dataframe, new_column, query="", category=""):
    """
        inputs: A DataFrame to append the new column with distance.
                The name of the new column.
                *optional* query: a kind of place to search for.
                *optional* category: a foursquare category to search for.
                
        output: The DataFrame with a new column including the distance to the nearest place found.
        returns None
    """
    distances_list = []
    
    for index, row in dataframe.iterrows():
        distances_list.append(get_distance(row["latitude"], row["longitude"], query=query, category=category))
        
    dataframe[f"{new_column}"] = distances_list
    
    return None

####  Add distance to closest school

In [8]:
add_distance_column(san_fancisco_companies, "school_distance", category="12056,12058")
add_distance_column(new_york_companies, "school_distance", category="12056,12058")

#### Add distance to closest airport

In [9]:
add_distance_column(san_fancisco_companies, "airport_distance", category="19037")
add_distance_column(new_york_companies, "airport_distance", category="19037")

#### Add distance to closest train station

In [10]:
add_distance_column(san_fancisco_companies, "train_distance", category="19022,19047")
add_distance_column(new_york_companies, "train_distance", category="19022,19047")

#### Add distance to closest nigth club

In [11]:
add_distance_column(san_fancisco_companies, "club_distance", category="10032, 10013")
add_distance_column(new_york_companies, "club_distance", category="10032, 10013")

#### Add distance to closes vegan restaurant

In [12]:
add_distance_column(san_fancisco_companies, "vegan_distance", category="13377")
add_distance_column(new_york_companies, "vegan_distance", category="13377")

#### Add distance to basketball stadium

In [13]:
add_distance_column(san_fancisco_companies, "stadium_distance", query="basketball", category="10051")
add_distance_column(new_york_companies, "stadium_distance", query="basketball", category="10051")

#### Add distance to pet groomer

In [14]:
add_distance_column(san_fancisco_companies, "pet_groomer_distance", category="11134")
add_distance_column(new_york_companies, "pet_groomer_distance", category="11134")

#### Add location center per city

In [112]:
san_fancisco_latitude_mean = san_fancisco_companies["latitude"].mean()
san_fancisco_longitude_mean = san_fancisco_companies["longitude"].mean()
new_york_latitude_mean = new_york_companies["latitude"].mean()
new_york_longitude_mean = new_york_companies["longitude"].mean()

#### Concatenate Dataframes

In [67]:
san_fancisco_companies.drop("proximity_index", axis=1, inplace=True)
new_york_companies.drop("proximity_index", axis=1, inplace=True)

In [68]:
candidate_companies = pd.concat([san_fancisco_companies, new_york_companies])

In [79]:
def add_normalized_column(dataframe, column_name):
    """
        inputs: a DataFrame
                a column with distances to be normalized
                
        output: the DataFrame with the distances normalized.
        
        *returns None
    """
    
    normalized_distances_list = []
    
    max_in_column =  dataframe[f"{column_name}"].max()
    
    for index, row in dataframe.iterrows():
        normalized_distances_list.append(row[f"{column_name}"]/max_in_column)
        
    dataframe[f"{column_name}_normalized"] = normalized_distances_list
    
    return None

In [85]:
distances_list = ['school_distance', 'airport_distance', 'train_distance', 
                  'club_distance', 'vegan_distance', 'stadium_distance',
                    'pet_groomer_distance']

In [86]:
for distance in distances_list:
    add_normalized_column(candidate_companies, distance)

In [119]:
def calculate_proximity_index(dataframe):
    """
        input: a DataFrame with distances to different locations.
        output: the DataFrame including the proximity index as a new column
    """
    proximity_index = (
        0.25 * dataframe.school_distance_normalized +
        0.10 * dataframe.airport_distance_normalized +
        0.25 * dataframe.train_distance_normalized +
        0.20 * dataframe.club_distance_normalized +
        0.1 * dataframe.vegan_distance_normalized +
        0.05 * dataframe.stadium_distance_normalized +
        0.05 * dataframe.pet_groomer_distance_normalized
    )
    
    return round(100 - proximity_index*100, 2)

#### Calculating the proximity index

In [None]:
candidate_companies["proximity_index"] = calculate_proximity_index(candidate_companies)

In [123]:
candidate_companies[candidate_companies["proximity_index"] == candidate_companies["proximity_index"].max()]

Unnamed: 0,name,total_money_raised,city,country_code,latitude,longitude,school_distance,airport_distance,train_distance,club_distance,...,stadium_distance,pet_groomer_distance,school_distance_normalized,airport_distance_normalized,train_distance_normalized,club_distance_normalized,vegan_distance_normalized,stadium_distance_normalized,pet_groomer_distance_normalized,proximity_index
37,Ramp Up Tech,0.0,New York,USA,40.746514,-73.993035,674,4454,338,226,...,472,376,0.085685,0.291759,0.036042,0.024961,0.0084,0.055412,0.029898,93.03


### The best location for the company is (40.746514, -73.993035) in New York