# Analysis of Gas Prices in the GTA

In [16]:
"""
Analysis of Gas Prices in the Greater Toronto Area
Stages:
1. Get addresses of gas stations from yellowpages.ca, remove stations with no address or incomplete addresses.
2. Use geopy and the GoogleV3 engine to find the coordinates of each address.
3. Search google maps to get the all available prices (diesel, regular, premium, etc.) for all gas stations

4+. and more...
"""

from bs4 import BeautifulSoup
import urllib
import requests
import math
import re
import pandas as pd
import numpy as np
import datetime
import sys
import time
import json
from geopy.geocoders import GoogleV3
import seaborn as sns
from time import sleep
geolocator = GoogleV3()
%matplotlib inline
today = datetime.datetime.now()

In [2]:
# Get the names and addressess of gas stations listed in Yellow Pages in the Toronto region

def get_addresses (page_num):
    with open('gas_dict.json', 'r') as infile:
        gas_dict = json.load(infile)
        
    url = 'https://www.yellowpages.ca/search/si/'+str(page_num)+'/Gas%20Stations/Toronto+ON'
    try:
        html = urllib.request.urlopen(url).read()
    except:
        return page_num
    soup = BeautifulSoup(html, 'html.parser')
        
    content = soup.findAll('div',{'class':'listing__content__wrap'})

    for n in range(len(content)):
        try:
            name = content[n].findAll('a')[0].get('title').split('-')[0]
        except:
            name = '#NO NAME'
        
        try:
            addr_ele = content[n].findAll('span',{'class':'listing__address--full'})[0].findAll('span')
            addr = ','.join([addr_ele[i].string for i,e in enumerate(addr_ele)])
        except:
            addr = '#NO ADDRESS#'
        
        gas_dict[addr] = {'name':name}
 
    with open('gas_dict.json', 'w') as outfile:
        json.dump(gas_dict,outfile)
    
    return page_num + 1

def address_main():
    # If file doesn't exist, create it
    try:
        with open('gas_dict.json', 'r') as infile:
            gas_dict = json.load(infile)
    except:
        with open('gas_dict.json', 'w') as outfile:
            gas_dict={}
            json.dump(gas_dict,outfile)
    
    page_num = 1
    while page_num < 22:
        print('Fetching page ',page_num)
        page_num = get_addresses(page_num)
        sleep(np.random.random(1)*2+2)

In [3]:
# Get rid of incomplete or empty addresses
def clean_addresses():
    with open('gas_dict.json', 'r') as infile:
        gas_dict = json.load(infile)
    
    gas_dict.pop('#NO ADDRESS#',None)
    gas_dict.pop('ON',None)
    gas_dict.pop('12001 Hwy 400,Maple,ON,L7B 1A8',None)    # This is actually both redundant and the wrong city ...
    
    with open('gas_dict.json', 'w') as outfile:
        json.dump(gas_dict,outfile)

In [4]:
# for ontariogasprices.com scraping, we need a list of postal codes to search with
# the site focuses on finding the lowest price in an area and does not prioritize towards specific addresses
# but this works to our advantage since we can potentially find stations not included in the yellowpages data
# gas types are
#  A: regular, B: mid, C: premium, D: diesel

def get_area_prices(gas_type, postal_code):
    with open('gas_stations.json', 'r') as infile:
        gas_stations = json.load(infile)
    
    url = 'http://www.ontariogasprices.com/GasPriceSearch.aspx?fuel='+gas_type+'&qsrch='+postal_code[0]+'%20'+postal_code[1]
    print ('Fetching from ', url)    
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')    

    if len(soup.findAll('tbody')) == 0:
        pass
    else:
        data_rows = soup.findAll('tbody')[0].findAll('tr')

        for i in range(len(data_rows)):
            try:
                name = data_rows[i].findAll('td')[0].find('a').string
                st_addr = data_rows[i].findAll('td')[0].find('dd').string.split('&')[0].split('near')[0]
                city = data_rows[i].findAll('td')[2].find('a').string
                addr = st_addr + ', '+ city + ', Ontario'
                price = float(data_rows[i].find('a').string)
                if addr not in gas_stations:
                    gas_stations[addr] = {'name':name}
                else:
                    pass
                gas_stations[addr]['type_'+gas_type] = price
            except:
                # The "other stations in the area" row can just be ignored
                pass
        
        with open('gas_stations.json', 'w') as outfile:
            json.dump(gas_stations,outfile)
    
def price_main():
    # first get postal codes
    with open('gas_dict.json', 'r') as infile:
        gas_dict = json.load(infile)
        
    try:
        with open('gas_stations.json', 'r') as infile:
            gas_stations = json.load(infile)
    except:
        with open('gas_stations.json', 'w') as outfile:
            gas_stations={}
            json.dump(gas_stations,outfile)
        
    postal_list=[]
    for k,v in gas_dict.items():
        postal_list.append(k.split(',')[-1].split())
    
    # If no postal code, just ignore for now...
    postal_list = [postal_list[i] for i,p in enumerate(postal_list) if len(p) == 2]

    break_point = 0
    while (break_point < len(postal_list)):
        postal_code = postal_list[break_point]
        try:
            for gas_type in list('ABCD'):
                get_area_prices(gas_type, postal_code)
                sleep(np.random.random(1)*2+0.5)                 
                break_point += 1
        except:
            # Try again
            print('Trying again ...')
            sleep(np.random.random(1)*2+2)

In [5]:
# Get not only the coordinates but also the Google Maps address, which will likely clean up any messes left over
# from the ontariogasprices.com splits

def get_coord(addr):
    print ('Getting' , addr)
    geoloc = geolocator.geocode(addr)
    sleep(np.random.random(1)*2+2)
    print ('Got ', geoloc, '\n')
    return geoloc
            
def coords_main():
    # Start at the beginning of gas_dict
    break_point = 0

    # open gas_stations, and get the list of all the addresses
    with open('gas_stations.json', 'r') as infile:
        gas_dict = json.load(infile)
    addr_list = [k for k,v in gas_dict.items()]
        
    # do while there are still addresses to process
    while(break_point < len(addr_list)):
        # run collect_coords starting with the current breakpoint, and gets the
        # address that it's stuck on
        addr = addr_list[break_point]
        try: 
            geoloc = get_coord(addr)
            if geoloc == None:
                # GoogleV3 can't get a hold of it. Need to look at it later. Most likely it's a highway.
                break_point += 1
            else:
                gas_dict[addr]['address'] = geoloc.address
                gas_dict[addr]['longitude'] = geoloc.longitude
                gas_dict[addr]['latitude'] = geoloc.latitude
                # If it works, get to the next one. Once break_point gets to the length of the list, the conditions of
                # the loop will no longer be satisfied, and the loop ends.
                break_point +=1
                with open('gas_stations.json','w') as outfile:
                    json.dump(gas_dict,outfile)            
        except:
            # If it doesn't work, don't increase the break_point and try again
            print ('Could not get ', addr)

    with open('gas_stations.json','w') as outfile:
        json.dump(gas_dict,outfile)     
    #return gas_dict

In [118]:
def xy_distance(xlat,xlong,ylat,ylong):
    # The radius of the planet is 6371.01 km
    xlat = math.radians(xlat)
    xlong = math.radians(xlong)
    ylat = math.radians(ylat)
    ylong = math.radians(ylong)
    dist = 6371.01 * math.acos(math.sin(xlat)*math.sin(ylat) + math.cos(xlat)*math.cos(ylat)*math.cos(xlong - ylong))
    return dist

def xy_closest(coords, x):
    return (min(coords, key=lambda y: xy_distance(x[0],x[1],y[0],y[1])))

In [117]:
closest_coord

(-79.37059889999999, 43.7700037)

In [109]:
coords_df['combined']

 12731 Hwy 48   , Whitchurch-Stouffville, Ontario                   (-79.282378, 43.9763464)
 1610 Keele St , Toronto - Central, Ontario                        (-79.4720845, 43.6823199)
 6897 Finch Ave W , Toronto - West, Ontario                        (-79.6173637, 43.7346325)
1 Harwood Ave S , Ajax, Ontario                                    (-79.0251004, 43.8610274)
1 Thornhill Woods Dr , Vaughan, Ontario                     (-79.46388639999999, 43.8266602)
1 Westney Rd N , Ajax, Ontario                              (-79.0412386, 43.85815669999999)
10 Brisdale Dr , Brampton, Ontario                          (-79.81231919999999, 43.6830606)
10 Station St , Ajax, Ontario                                        (-79.0222625, 43.85174)
1000 Rowntree Dairy Rd , Vaughan, Ontario                   (-79.56811429999999, 43.7841385)
1000 The Queensway , Toronto - West, Ontario                        (-79.5153431, 43.623835)
10010 McLaughlin Rd N , Brampton, Ontario                          (-7

In [None]:
address_main()

In [None]:
clean_addresses()

In [None]:
price_main()

In [6]:
coords_main()

Getting 2145 Dundas St W , Toronto - South, Ontario
Got  2145 Dundas St W, Toronto, ON M6R 1X2, Canada 

Getting 2010 Dundas St W , Toronto - South, Ontario
Got  2010 Dundas St W, Toronto, ON M6R 1W6, Canada 

Getting Taymall Ave , Toronto - West, Ontario
Got  Taymall Ave, Etobicoke, ON M8Z, Canada 

Getting 840 Dupont St , Toronto - Central, Ontario
Got  840 Dupont St, Toronto, ON M6G 1Z8, Canada 

Getting 150 Dupont St , Toronto - Central, Ontario
Could not get  150 Dupont St , Toronto - Central, Ontario
Getting 150 Dupont St , Toronto - Central, Ontario
Got  150 Dupont St, Toronto, ON M5R 2E6, Canada 

Getting 1292 Dupont St , Toronto - Central, Ontario
Got  1292 Dupont St, Toronto, ON M6H 2A4, Canada 

Getting 1110 Bathurst St , Toronto - Central, Ontario
Got  1110 Bathurst St, Toronto, ON M5R 3H2, Canada 

Getting 875 The Queensway , Toronto - West, Ontario
Got  875 The Queensway, Etobicoke, ON M8Z 1N8, Canada 

Getting 3466 Dundas St W , Toronto - Central, Ontario
Got  3466 Dunda

Got  2699 Keele St, North York, ON M3M 2E9, Canada 

Getting 230 Lloyd Manor Rd , Toronto - West, Ontario
Got  230 Lloyd Manor Rd, Etobicoke, ON M9B 5K7, Canada 

Getting 3069 Bathurst St , Toronto - Central, Ontario
Could not get  3069 Bathurst St , Toronto - Central, Ontario
Getting 3069 Bathurst St , Toronto - Central, Ontario
Got  3069 Bathurst St, York, ON M5P 3L1, Canada 

Getting 613 Evans Ave , Toronto - West, Ontario
Got  613 Evans Ave, Etobicoke, ON M8W 2W5, Canada 

Getting 829 Lake Shore Blvd E , Toronto - South, Ontario
Got  829 Lake Shore Blvd E, Toronto, ON M4M 1B2, Canada 

Getting 1890 Jane St , Toronto - Central, Ontario
Got  1890 Jane St, York, ON M9N 2T5, Canada 

Getting 3639 Dufferin St , Toronto - North, Ontario
Got  3639 Dufferin St, North York, ON M3K, Canada 

Getting 2747 Keele St , Toronto - North, Ontario
Got  2747 Keele St, North York, ON M3M 2E9, Canada 

Getting 929 Queen St E , Toronto - South, Ontario
Got  929 Queen St E, Toronto, ON M4M 1J6, Canada 



Got  2669 Jane St, North York, ON M3L 1S3, Canada 

Getting 515 Drewry Ave , Toronto - North, Ontario
Got  515 Drewry Ave, North York, ON M2R 2K9, Canada 

Getting 1514 Steeles Ave W , Toronto - North, Ontario
Got  1514 Steeles Ave E, Toronto, ON L3R, Canada 

Getting 6000 Dufferin St , Toronto - North, Ontario
Got  6000 Dufferin St, North York, ON M3H 5T5, Canada 

Getting 7011 Bathurst St , Toronto - North, Ontario
Got  7011 Bathurst St, Thornhill, ON L4J 2J6, Canada 

Getting 6255 Bathurst St , Toronto - North, Ontario
Got  6255 Bathurst St, North York, ON M2R, Canada 

Getting 1869 Leslie St , Toronto - Central, Ontario
Got  Leslie St, Toronto, ON, Canada 

Getting 5571 Yonge St , Toronto - North, Ontario
Got  5571 Yonge St, North York, ON M2N 5S4, Canada 

Getting 1015 Sheppard Ave E , Toronto - North, Ontario
Could not get  1015 Sheppard Ave E , Toronto - North, Ontario
Getting 1015 Sheppard Ave E , Toronto - North, Ontario
Got  1015 Sheppard Ave E, Toronto, ON M2K, Canada 

Gett

Got  3405 Dixie Rd, Mississauga, ON L4Y 2A9, Canada 

Getting 5014 Dixie Rd , Mississauga, Ontario
Got  5014 Dixie Rd, Mississauga, ON L4W, Canada 

Getting 354 Dundas St E , Mississauga, Ontario
Got  354 Dundas St E, Mississauga, ON L5A 1X2, Canada 

Getting 5495 Eglinton Ave W , Toronto - West, Ontario
Got  5495 Eglinton Ave E, Scarborough, ON M1L 2T3, Canada 

Getting 585 Dixon Rd , Toronto - West, Ontario
Got  585 Dixon Rd, Etobicoke, ON M9W 1H7, Canada 

Getting 627 Dixon Rd , Toronto - West, Ontario
Got  627 Dixon Rd, Etobicoke, ON M9W 1H7, Canada 

Getting 2320 Lawrence Ave E , Toronto - East, Ontario
Got  2320 Lawrence Ave E, Scarborough, ON M1P 2P9, Canada 

Getting 640 Markham Rd , Toronto - East, Ontario
Got  640 Markham Rd, Scarborough, ON M1H, Canada 

Getting 3304 Lawrence Ave E , Toronto - East, Ontario
Got  3304 Lawrence Ave E, Scarborough, ON M1H, Canada 

Getting 1860 Ellesmere Rd , Toronto - East, Ontario
Got  1860 Ellesmere Rd, Scarborough, ON M1H 2V5, Canada 

Gett

Got  456 Lakeshore Rd E, Mississauga, ON L5G 1J1, Canada 

Getting 155 North Service Rd , Mississauga, Ontario
Got  155 N Service Rd, Mississauga, ON L5A 1A2, Canada 

Getting 1175 Hurontario St , Mississauga, Ontario
Got  1175 Hurontario St, Mississauga, ON L5G 3H1, Canada 

Getting 1553 Shawson Dr , Mississauga, Ontario
Got  1553 Shawson Dr, Mississauga, ON L4W 1T7, Canada 

Getting 3411 Mavis Rd , Mississauga, Ontario
Got  3411 Mavis Rd, Mississauga, ON L5B 4E8, Canada 

Getting 20 Bristol Rd W , Mississauga, Ontario
Got  20 Bristol Rd W, Mississauga, ON L5R 3K3, Canada 

Getting 5020 McLaughlin Rd , Mississauga, Ontario
Got  5020 McLaughlin Rd, Mississauga, ON L5R 3R8, Canada 

Getting 5008 Hurontario St , Mississauga, Ontario
Got  5008 Hurontario St, Mississauga, ON L5R, Canada 

Getting 695 Burnhamthorpe Rd W , Mississauga, Ontario
Got  695 Burnhamthorpe Rd W, Mississauga, ON L5C 3A6, Canada 

Getting 5555 Kennedy Rd , Mississauga, Ontario
Got  5555 Kennedy Rd, Mississauga, ON L4

Got  995 Eglinton Ave E, Mississauga, ON L4W 4H3, Canada 

Getting 2645 Steeles Ave E , Brampton, Ontario
Got  2645 Steeles Ave E, Brampton, ON L6T, Canada 

Getting 8155 Torbram Rd , Brampton, Ontario
Got  8155 Torbram Rd, Brampton, ON L6T 5C5, Canada 

Getting 2439 Steeles Ave E , Brampton, Ontario
Got  2439 Steeles Ave E, Brampton, ON L6T 5J9, Canada 

Getting 55 New Huntington Rd , Vaughan, Ontario
Got  55 New Huntington Rd, Woodbridge, ON L4H 3M9, Canada 

Getting 5445 Steeles Ave E , Toronto - East, Ontario
Got  5445 Steeles Ave E, Scarborough, ON M1V 5C2, Canada 

Getting 5270 Steeles Ave E , Toronto - East, Ontario
Got  5270 Steeles Ave E, Markham, ON L3S 1N1, Canada 

Getting 8330 Kennedy Rd , Markham, Ontario
Could not get  8330 Kennedy Rd , Markham, Ontario
Getting 8330 Kennedy Rd , Markham, Ontario
Could not get  8330 Kennedy Rd , Markham, Ontario
Getting 8330 Kennedy Rd , Markham, Ontario
Got  8330 Kennedy Rd, Unionville, ON L3R 0P5, Canada 

Getting 4780 Hwy 7 E , Markham

Got  1326 Eglinton Ave W, Mississauga, ON L5M 6J3, Canada 

Getting 3255 Dundas St W , Mississauga, Ontario
Got  3255 Dundas St W, Mississauga, ON L5L 5V7, Canada 

Getting 6015 McLaughlin Rd , Mississauga, Ontario
Got  6015 McLaughlin Rd, Mississauga, ON L5R 1B9, Canada 

Getting 3020 Unity Dr , Mississauga, Ontario
Got  3020 Unity Dr, Mississauga, ON L5L 4X5, Canada 

Getting 6625 Kennedy Rd , Mississauga, Ontario
Got  6625 Kennedy Rd, Mississauga, ON L5T 2W4, Canada 

Getting 6995 Dixie Rd , Mississauga, Ontario
Got  6995 Dixie Rd, Mississauga, ON L5T 1A8, Canada 

Getting 6009 Hurontario St , Mississauga, Ontario
Got  6009 Hurontario St, Mississauga, ON L4Z 1S9, Canada 

Getting 7015 Kennedy Rd , Mississauga, Ontario
Got  7015 Kennedy Rd, Mississauga, ON L5S 0A4, Canada 

Getting 480 Derry Rd E , Mississauga, Ontario
Got  480 Derry Rd E, Mississauga, ON L5T 2P3, Canada 

Getting 7280 Dixie Rd , Mississauga, Ontario
Got  7280 Dixie Rd, Mississauga, ON L5S 1E1, Canada 

Getting 5980 

Got  13735 Bathurst St, Richmond Hill, ON L4E 3Z1, Canada 

Getting 7800 Jane St , Vaughan, Ontario
Got  7800 Jane St, Concord, ON L4K 4R6, Canada 

Getting 2956 Queen St E , Brampton, Ontario
Got  2956 Queen St E, Brampton, ON L6S 5Y1, Canada 

Getting 2182 Queen St E , Brampton, Ontario
Got  2182 Queen St E, Brampton, ON L6S 4G9, Canada 

Getting 2963 Queen St E , Brampton, Ontario
Got  2963 Queen St E, Brampton, ON L6T 5J1, Canada 

Getting 1707 Queen St E , Brampton, Ontario
Got  1707 Queen St E, Brampton, ON L6T 2H2, Canada 

Getting 9431 Torbram Rd , Brampton, Ontario
Got  9431 Torbram Rd, Brampton, ON L6S 6E6, Canada 

Getting 9495 Airport Rd , Brampton, Ontario
Got  9495 Airport Rd, Brampton, ON L6S 6C7, Canada 

Getting 145 Clark Blvd , Brampton, Ontario
Got  145 Clark Blvd, Brampton, ON L6T 4G6, Canada 

Getting 2200 Credit Valley Rd , Mississauga, Ontario
Got  2200 Credit Valley Rd, Mississauga, ON L5M 3C9, Canada 

Getting 4530 Erin Mills Pkwy , Mississauga, Ontario
Got  45

Got  99 McEwan Dr E, Bolton, ON L7E 2Z7, Canada 

Getting 281 Richvale Dr S , Brampton, Ontario
Got  281 Richvale Dr S, Brampton, ON L6Z 4W5, Canada 

Getting  12731 Hwy 48   , Whitchurch-Stouffville, Ontario
Got  12731 ON-48, Whitchurch-Stouffville, ON L4A 7X3, Canada 

Getting 5842 Main St Stouffville , Whitchurch-Stouffville, Ontario
Got  5842 Main St, Whitchurch-Stouffville, ON L4A 2S8, Canada 

Getting 5946 Main St , Whitchurch-Stouffville, Ontario
Got  5946 Main St, Whitchurch-Stouffville, ON L4A 3A1, Canada 

Getting 2210 Stouffville Rd , Whitchurch-Stouffville, Ontario
Got  2210 Stouffville Rd, Whitchurch-Stouffville, ON L0H, Canada 

Getting 2329 Stouffville Rd , Whitchurch-Stouffville, Ontario
Could not get  2329 Stouffville Rd , Whitchurch-Stouffville, Ontario
Getting 2329 Stouffville Rd , Whitchurch-Stouffville, Ontario
Got  2329 Stouffville Rd, Gormley, ON L0H 1G0, Canada 

Getting 12717 Woodbine Ave , Whitchurch-Stouffville, Ontario
Got  12717 Woodbine Ave, Gormley, ON L0

Got  5600 Mainway, Burlington, ON L7L 6C4, Canada 

Getting 2267 Lakeshore Rd W , Oakville, Ontario
Got  2267 Lakeshore Rd W, Oakville, ON L6L 1H1, Canada 

Getting 620 Thompson Rd S , Milton, Ontario
Got  620 Thompson Rd S, Milton, ON L9T 0H1, Canada 

Getting 850 Appleby Line , Burlington, Ontario
Got  850 Appleby Line, Burlington, ON L7L 2Y7, Canada 

Getting 4499 Mainway , Burlington, Ontario
Got  4499 Mainway, Burlington, ON L7L 7P3, Canada 

Getting 120 Thompson Rd S , Milton, Ontario
Got  120 Thompson Rd S, Milton, ON L9T 0J4, Canada 

Getting 1515 Main St E , Milton, Ontario
Got  1515 Main St E, Milton, ON L9T 0R3, Canada 

Getting 4525 Dundas St , Burlington, Ontario
Got  4525 Dundas St, Burlington, ON L7M 5B4, Canada 

Getting 6788 Regional Rd 25 , Milton, Ontario
Got  6788 Regional Rd 25, Milton, ON L9T 2X5, Canada 

Getting 5267 Aurora Rd , Whitchurch-Stouffville, Ontario
Got  5267 Aurora Rd, Whitchurch-Stouffville, ON L4A 7X4, Canada 

Getting 15641 Hwy 48 , Whitchurch-Sto

In [44]:
with open('gas_stations.json', 'r') as infile:
    gas_stations = json.load(infile)
len(gas_stations)

713

In [28]:
gas_stations['12001 Hwy 400 NB , Vaughan, Ontario']['address'] = geoloc.address
gas_stations['12001 Hwy 400 NB , Vaughan, Ontario']['longitude'] = geoloc.longitude
gas_stations['12001 Hwy 400 NB , Vaughan, Ontario']['latitude'] =geoloc.latitude

In [46]:
gas_df = pd.DataFrame.from_dict(gas_stations, orient='index')

In [47]:
gas_df.head()

Unnamed: 0,name,type_A,type_B,type_C,type_D,address,longitude,latitude
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",Ultramar,115.6,130.9,132.9,117.9,"12731 ON-48, Whitchurch-Stouffville, ON L4A 7X...",-79.282378,43.976346
"1610 Keele St , Toronto - Central, Ontario",Shell,122.9,136.9,144.9,,"1610 Keele St, Toronto, ON M6M 3V9, Canada",-79.472084,43.68232
"6897 Finch Ave W , Toronto - West, Ontario",Esso,121.9,132.9,142.9,118.9,"6897 Finch Ave W, Etobicoke, ON M9W 0A6, Canada",-79.617364,43.734632
"1 Harwood Ave S , Ajax, Ontario",Pioneer,112.9,,131.9,118.6,"1 Harwood Ave S, Ajax, ON L1S 2C1, Canada",-79.0251,43.861027
"1 Thornhill Woods Dr , Vaughan, Ontario",Esso,122.9,135.9,141.9,118.9,"1 Thornhill Woods Dr, Thornhill, ON L4J 8Y2, C...",-79.463886,43.82666


In [48]:
gas_df.groupby('name')['type_A'].describe().sort_values(by='count', ascending=False)[:10]

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Petro-Canada,190.0,121.508947,2.095112,113.6,119.9,122.9,122.9,127.9
Esso,175.0,121.038857,2.496475,113.3,119.6,122.9,122.9,123.9
Shell,117.0,121.098291,2.340645,113.9,119.9,122.9,122.9,122.9
Husky,41.0,119.980488,2.414458,111.9,119.3,119.8,121.9,122.9
Canadian Tire,31.0,120.758065,2.52887,114.9,119.25,120.8,122.9,125.3
Pioneer,21.0,118.195238,2.109378,112.9,116.9,118.6,119.6,121.4
Ultramar,15.0,118.84,2.445345,114.9,117.9,118.4,120.4,122.9
7-Eleven,10.0,119.43,2.847631,113.9,117.6,119.45,122.175,122.6
Costco,7.0,110.9,1.414214,109.9,109.9,110.9,110.9,113.9
Global,7.0,118.828571,3.227339,113.9,117.8,117.9,120.25,123.9


In [49]:
gas_df.iloc[:5]

Unnamed: 0,name,type_A,type_B,type_C,type_D,address,longitude,latitude
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",Ultramar,115.6,130.9,132.9,117.9,"12731 ON-48, Whitchurch-Stouffville, ON L4A 7X...",-79.282378,43.976346
"1610 Keele St , Toronto - Central, Ontario",Shell,122.9,136.9,144.9,,"1610 Keele St, Toronto, ON M6M 3V9, Canada",-79.472084,43.68232
"6897 Finch Ave W , Toronto - West, Ontario",Esso,121.9,132.9,142.9,118.9,"6897 Finch Ave W, Etobicoke, ON M9W 0A6, Canada",-79.617364,43.734632
"1 Harwood Ave S , Ajax, Ontario",Pioneer,112.9,,131.9,118.6,"1 Harwood Ave S, Ajax, ON L1S 2C1, Canada",-79.0251,43.861027
"1 Thornhill Woods Dr , Vaughan, Ontario",Esso,122.9,135.9,141.9,118.9,"1 Thornhill Woods Dr, Thornhill, ON L4J 8Y2, C...",-79.463886,43.82666


In [36]:
gas_df.to_csv('gas_stations.csv')

In [37]:
with open('gas_stations.json','w') as outfile:
    json.dump(gas_stations,outfile) 

In [50]:
coords_df = gas_df[['longitude','latitude']].copy()

In [51]:
coords_df.head()

Unnamed: 0,longitude,latitude
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",-79.282378,43.976346
"1610 Keele St , Toronto - Central, Ontario",-79.472084,43.68232
"6897 Finch Ave W , Toronto - West, Ontario",-79.617364,43.734632
"1 Harwood Ave S , Ajax, Ontario",-79.0251,43.861027
"1 Thornhill Woods Dr , Vaughan, Ontario",-79.463886,43.82666


In [53]:
coords_df['long_rad'] = coords_df['longitude'].apply(lambda l:math.radians(l))
coords_df['lat_rad'] = coords_df['latitude'].apply(lambda l:math.radians(l))

In [119]:
coords_df.head()

Unnamed: 0,longitude,latitude,long_rad,lat_rad,within_25m,combined
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",-79.282378,43.976346,-1.383739,0.767532,,"(-79.282378, 43.9763464)"
"1610 Keele St , Toronto - Central, Ontario",-79.472084,43.68232,-1.38705,0.7624,,"(-79.4720845, 43.6823199)"
"6897 Finch Ave W , Toronto - West, Ontario",-79.617364,43.734632,-1.389585,0.763313,,"(-79.6173637, 43.7346325)"
"1 Harwood Ave S , Ajax, Ontario",-79.0251,43.861027,-1.379248,0.765519,,"(-79.0251004, 43.8610274)"
"1 Thornhill Woods Dr , Vaughan, Ontario",-79.463886,43.82666,-1.386906,0.76492,,"(-79.46388639999999, 43.8266602)"


In [76]:
coords_df['combined'] = list(zip(coords_df['longitude'],coords_df['latitude']))

In [148]:
within_100 = []
for i in range(len(coords_df)):
    closest_coord = xy_closest(coords_df['combined'][coords_df.index != coords_df.index[i]], coords_df.iloc[i]['combined'])
    within_100.append(xy_distance(coords_df.iloc[i]['combined'][0],coords_df.iloc[i]['combined'][1], closest_coord[0], closest_coord[1]) <= 0.1)

In [149]:
within_200 = []
for i in range(len(coords_df)):
    closest_coord = xy_closest(coords_df['combined'][coords_df.index != coords_df.index[i]], coords_df.iloc[i]['combined'])
    within_200.append(xy_distance(coords_df.iloc[i]['combined'][0],coords_df.iloc[i]['combined'][1], closest_coord[0], closest_coord[1]) <= 0.2)

In [154]:
within_500 = []
for i in range(len(coords_df)):
    closest_coord = xy_closest(coords_df['combined'][coords_df.index != coords_df.index[i]], coords_df.iloc[i]['combined'])
    within_500.append(xy_distance(coords_df.iloc[i]['combined'][0],coords_df.iloc[i]['combined'][1], closest_coord[0], closest_coord[1]) <= 0.5)

In [152]:
sum(within_100)

158

In [153]:
sum(within_200)

283

In [155]:
sum(within_500)

526

In [174]:
coords_df['within_100'] = np.asarray(within_100).astype(int)
coords_df['within_200'] = np.asarray(within_200).astype(int)
coords_df['within_500'] = np.asarray(within_500).astype(int)

In [175]:
coords_df.head()

Unnamed: 0,longitude,latitude,long_rad,lat_rad,combined,within_100,within_200,within_500
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",-79.282378,43.976346,-1.383739,0.767532,"(-79.282378, 43.9763464)",0,0,0
"1610 Keele St , Toronto - Central, Ontario",-79.472084,43.68232,-1.38705,0.7624,"(-79.4720845, 43.6823199)",0,0,1
"6897 Finch Ave W , Toronto - West, Ontario",-79.617364,43.734632,-1.389585,0.763313,"(-79.6173637, 43.7346325)",0,0,0
"1 Harwood Ave S , Ajax, Ontario",-79.0251,43.861027,-1.379248,0.765519,"(-79.0251004, 43.8610274)",1,1,1
"1 Thornhill Woods Dr , Vaughan, Ontario",-79.463886,43.82666,-1.386906,0.76492,"(-79.46388639999999, 43.8266602)",0,0,0


In [178]:
gas_df['within_100'] = coords_df['within_100']
gas_df['within_200'] = coords_df['within_200']
gas_df['within_500'] = coords_df['within_500']

In [179]:
gas_df.head()

Unnamed: 0,name,type_A,type_B,type_C,type_D,address,longitude,latitude,within_100,within_200,within_500
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",Ultramar,115.6,130.9,132.9,117.9,"12731 ON-48, Whitchurch-Stouffville, ON L4A 7X...",-79.282378,43.976346,0,0,0
"1610 Keele St , Toronto - Central, Ontario",Shell,122.9,136.9,144.9,,"1610 Keele St, Toronto, ON M6M 3V9, Canada",-79.472084,43.68232,0,0,1
"6897 Finch Ave W , Toronto - West, Ontario",Esso,121.9,132.9,142.9,118.9,"6897 Finch Ave W, Etobicoke, ON M9W 0A6, Canada",-79.617364,43.734632,0,0,0
"1 Harwood Ave S , Ajax, Ontario",Pioneer,112.9,,131.9,118.6,"1 Harwood Ave S, Ajax, ON L1S 2C1, Canada",-79.0251,43.861027,1,1,1
"1 Thornhill Woods Dr , Vaughan, Ontario",Esso,122.9,135.9,141.9,118.9,"1 Thornhill Woods Dr, Thornhill, ON L4J 8Y2, C...",-79.463886,43.82666,0,0,0


For general clustering, use longitude and latitude. For gasoline-specific clustering, also use the price of the specific type of gas. We can just use k means for general clustering, but when looking at only certain types of gas (such as regular type_A), maybe use separating hyperplane methods.

In [180]:
cluster_df = gas_df[['type_A','longitude','latitude']]

In [181]:
cluster_df.head()

Unnamed: 0,type_A,longitude,latitude
"12731 Hwy 48 , Whitchurch-Stouffville, Ontario",115.6,-79.282378,43.976346
"1610 Keele St , Toronto - Central, Ontario",122.9,-79.472084,43.68232
"6897 Finch Ave W , Toronto - West, Ontario",121.9,-79.617364,43.734632
"1 Harwood Ave S , Ajax, Ontario",112.9,-79.0251,43.861027
"1 Thornhill Woods Dr , Vaughan, Ontario",122.9,-79.463886,43.82666
