## Section 1 
### Set up

### Importing all relevant packages

In [27]:
from bs4 import BeautifulSoup as bs
from folium.plugins import HeatMap
import pandas as pd
import requests
import geocoder
import json
import folium
import os
import numpy as np

### Defining methods

In [3]:
def get_long_lat(p_code):
    lat_lng_coords = None

    while(lat_lng_coords is None):
      g = geocoder.google('{}, Toronto, Ontario'.format(p_code))
      lat_lng_coords = g.latlng
        
    return lat_lng_coords

In [4]:
def get_local_places(lat_long): 
    url = 'https://api.foursquare.com/v2/venues/explore'

    params = dict(
      client_id='XXX',
      client_secret='XXX',
      v='20180323',
      ll=",".join(lat_long),
      DISTANCE = 600,
      limit=16
    )
    
    resp = requests.get(url=url, params=params)
    data = json.loads(resp.text)
    
    return data

In [5]:
def get_save_json_from_poi_id(poi_id):
    folder_loc = r"C:\Users\twmar\OneDrive\Desktop\can_poi_json"
    file_name = f"{folder_loc}\\{poi_id}.json"
    
    
    if not os.path.exists(file_name):
        url = f'https://api.foursquare.com/v2/venues/{poi_id}'

        params = dict(
          client_id='XXX',
          client_secret='XXX',
          v='20180323',
        )

        resp = requests.get(url=url, params=params)
        data = json.loads(resp.text)

        
        with open(file_name, 'w') as json_file:
            json.dump(resp.text, json_file)
        
        return data    

### Importing Wikipedia page as a BeautifulSoup object

In [7]:
response = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
content = response.text
soup = bs(content, "html.parser")

### Finding the table and all entries of that table

In [8]:
chart = soup.find("table", {"class": "wikitable sortable"})
entries = chart.find_all("tr")

### Creating list and filtering 'Not assigned" columns as well as joining duplicate Postal Codes

##### NOTE: Since the parsing of this data is index based: this is assuming that the columns stay in the same order on Wikipedia, if they change the order this code will not work properly

In [9]:
entries_list = []
for ent in entries[1:]:
    dup = False
    l = ent.text.strip().split("\n")
    if l[1] == "Not assigned" and l[2] == "Not assigned":
        continue
    elif l[2] == "Not assigned":
        l[2] = l[1]
    for entry in entries_list:
        if l[0] == entry[0]:
            entry[2] = ", ".join((entry[2], l[2]))
            dup = True
            break
    if not dup:    
        entries_list.append(ent.text.strip().split("\n"))

### Ceating DataFrame

In [10]:
postal_codes = pd.DataFrame(entries_list, columns=["PostalCode", "Borough", "Neighborhood"])

In [11]:
postal_codes.shape

(103, 3)

### Adding Latittude and Longitude to postal_codes DataFrame

#### NOTE: the commented out code tries to get long/lat data from geocoder method defined above. Geocoder was not working (timed out) so data was imported from .csv

In [12]:
# postal_codes.apply(lambda x: pd.Series(get_long_lat(x["PostalCode"]), index=['Latitude', 'Longitude']), axis=1)

In [13]:
long_lat_df = pd.read_csv(r"C:\Users\twmar\Downloads\Geospatial_Coordinates.csv")

In [14]:
postal_codes = postal_codes.set_index('PostalCode').join(long_lat_df.set_index('Postal Code'))

### Creating Map

In [30]:
canada_map = folium.Map(location=[43.70, -79.39], zoom_start=11)

for i, datum in postal_codes.iterrows():
    label = datum["Neighborhood"]
    lat, lng = datum["Latitude"], datum["Longitude"]
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(canada_map)  
    
canada_map


## Section 2 

### Introduction

### Whenever Choosing a neighborhood to live an import factor to consider is cost of living. While most areas in this city are reachable within a quick bus or car ride. The most convenient place is one that is close by. If a cheap budget is required it would be beneficial to see areas that have more Points of Interest (POIs) that have a lower cost. Conversely, if a higher budget can be allocated to food/activities it would be convenient to be in an area that would facilitate a higher quality of life, based on a higher spending on activities.

### Data

### The map above already breaks down the neighborhoods of Toronto, Canada. The data that will be collected to solve to point made above is from foursquare (https://developer.foursquare.com/). For each of the neighborhoods on the map above a search will be conducted of Points of Interest in the nearby area. All the points of interest will be queried for their price rating, and separated into 2 lists, cheap and expensive. These points will be then placed on the map to compare neighborhoods which are surrounded by cheaper options from those surrounded by expensive options. 

#### Iterating through postal codes and query foursquare for local POIs

In [56]:
all_places = dict()
for i, code in postal_codes.iterrows():
    js = get_local_places([str(code["Latitude"]), str(code["Longitude"])])
    for poi in js["response"]["groups"][0]["items"]:
        poi_id, lat, long = poi["venue"]["id"], poi["venue"]["location"]["lat"], poi["venue"]["location"]["lng"]
        if poi_id not in all_places.keys():
            all_places[poi_id] = [lat, long]    

In [57]:
print(len(all_places))

1121


#### Get info for all pois, and save then to local json File directory (due to limit of foursquare queries at current developer plan all queries are saved so that they can be searched when all are collected)

In [16]:
for i in all_places.keys():
    datum = get_save_json_from_poi_id(i)

NameError: name 'all_places' is not defined

#### Go through each local json file and parse out the price rating as well as latitude and longitude.

In [18]:
exp_heat_data = []
cheap_heat_data = []
for file in os.listdir(r"C:\Users\twmar\OneDrive\Desktop\can_poi_json"):
    with open(f"C:\\Users\\twmar\\OneDrive\\Desktop\\can_poi_json\\{file}") as json_file:
        try:
            raw_data = json.loads(json.load(json_file))
            data = [raw_data["response"]["venue"]["location"]["lat"], 
                    raw_data["response"]["venue"]["location"]["lng"],
                    raw_data["response"]["venue"]["price"]["tier"]]
            if data[2] > 2:
                exp_heat_data.append(data[:-1])
            else:
                cheap_heat_data.append(data[:-1])
        except KeyError as e:
            pass
    

exp_heat_data = np.array(exp_heat_data)
cheap_heat_data = np.array(cheap_heat_data)

#### Plot data for cheap options on map

In [32]:
HeatMap(cheap_heat_data).add_to(canada_map)
canada_map.save("cheap_heat.html")
canada_map

#### Reset map

In [20]:
canada_map = folium.Map(location=[43.70, -79.39], zoom_start=11)

for i, datum in postal_codes.iterrows():
    label = datum["Neighborhood"]
    lat, lng = datum["Latitude"], datum["Longitude"]
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=8,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(canada_map)  

#### Plot data for expensive options on map

In [33]:
HeatMap(exp_heat_data).add_to(canada_map)
canada_map.save("exp_heat.html")
canada_map

## Section 3 

### Results

### The data collected from foursquare of Points of Interest in the area were queried for their price rating, and separated into 2 lists, cheap and expensive. These points were placed on the map to compare neighborhoods which are surrounded by cheaper options from those surrounded by expensive options. 


### Whenever Choosing a neighborhood to live an import factor to consider is cost of living. While most areas in this city are reachable within a quick bus or car ride. The most convenient place is one that is close by. If a cheap budget is required it would be beneficial to see areas that have more Points of Interest (POIs) that have a lower cost. Conversely, if a higher budget can be allocated to food/activities it would be convenient to be in an area that would facilitate a higher quality of life, based on a higher spending on activities.

### Conclusion

### While there appears to be cheap option throughout the city, you can at least see few hot spots for higher cost of living POIs. This information would be useful for anyone who is looking for a higher quality of activities. Marketing teams could also find this useful if they are trying to reach a wealthier target audience. 