In [1]:
import requests
import os
import pandas as pd
from datetime import datetime

In [2]:
postal_code = "m5h 2n2"
address = "276 Queensdale Avenue, Toronto"
city = "Riverdale, ON"

In [3]:
def get_lat_lon(location: str, google_api_key=None):
    """
    Get latitude and longitude for a city.

    Parameters
    ----------
    @location [string]: Locatin entered by user from widget
    @google_api_key [string]: Api Key

    Returns
    -------
    [string]: Latitude and longitude
    """
    # api-endpoint 
    URL = "https://maps.googleapis.com/maps/api/geocode/json"
    google_api_key = google_api_key or os.environ.get("GOOGLE_GEOCODING_API_KEY")

    assert google_api_key, "Please set the GOOGLE_GEOCODING_API_KEY environment variable or pass in the API key."
    

    # defining a params dict for the parameters to be sent to the API 
    PARAMS = {'address':location, 'key':google_api_key} 

    # sending get request and saving the response as response object 
    r = requests.get(url = URL, params = PARAMS) 

    # get data from response
    data = r.json()

    # location
    location_dict = data['results'][0]['geometry']['location']
    lattitude = location_dict['lat']
    longitude = location_dict['lng']

    return lattitude, longitude

In [4]:
lat, lon = get_lat_lon(city)
lat, lon

(43.678985, -79.34491009999999)

In [5]:
def get_city_statistics(lat, lon, api_key=None):
  """
  Get city statistics from canadian realtor api.

  Parameters
  ----------
  @api_key_id [string]: Key API realtor id
  @lat [float]: Lattitude
  @lon [float]: Longitude

  Returns
  -------
  [json]: Dictionary of city statistics

  """
  # url for api
  url = "https://realty-in-ca1.p.rapidapi.com/properties/get-statistics"
  api_key = api_key or os.environ.get("RAPID_API_KEY")

  assert api_key, "Please set the RAPID_API_KEY environment variable or pass in the API key."

  # enter parameters
  querystring = {
    "CultureId":"1", # return in english
    "Latitude": str(lat),
    "Longitude": str(lon)
  }

  # header
  headers = {
    'x-rapidapi-host': "realty-in-ca1.p.rapidapi.com",
    'x-rapidapi-key': api_key
  }

  # response
  response = requests.request("GET", url, headers=headers, params=querystring)
  return response.json() # json format

In [6]:
city_stats = get_city_statistics(lat, lon)

In [7]:
for i, data_table in enumerate(city_stats['Data'], start=1):
    print("Table {0}:".format(i), data_table['key'])

Table 1: 
Table 2: Retail Sales
Table 3: Population by Age Group
Table 4: Population Growth/Projection
Table 5: Education
Table 6: Marital Status
Table 7: Languages
Table 8: Household income
Table 9: Children at Home
Table 10: Ownership
Table 11: Construction Date
Table 12: Occupations


In [8]:
stats_response = city_stats

In [9]:
stats_date_str = stats_response['ErrorCode']['ProductName'].split("[")[-1].replace("]", "")
stats_date = datetime.strptime(stats_date_str, '%A, %B %d, %Y %I:%M:%S %p')
stats_data_concat_str = stats_date.strftime('%Y-%m-%d')
stats_data_concat_str

'2021-12-01'

In [11]:
stats_date_str

'Wednesday, December 1, 2021 2:55:01 PM'

In [10]:
df_table_1 = pd.DataFrame(stats_response['Data'][0]['value'])
df_table_1

Unnamed: 0,key,value
0,Daytime Population,775
1,Number of Businesses,50
2,Population size,625
3,Median age,48.7
4,Average Household Size,1.9
5,Average Household Income,"$117,659.24"
6,Households with Children (%),45
7,Households without Children (%),55
8,Number of Households,329


In [11]:
df_table_2 = pd.DataFrame(stats_response['Data'][1]['value'])
df_table_2

Unnamed: 0,key,value
0,Unknown,14
1,< 1,26
2,1 - 4.9,7
3,5 - 19.9,2


In [12]:
df_table_3 = pd.DataFrame(stats_response['Data'][2]['value'])
df_table_3

Unnamed: 0,key,value
0,0 - 4 years old,22
1,5 - 9 years old,23
2,10 - 19 years old,44
3,20 - 34 years old,135
4,35 - 49 years old,161
5,50 - 54 years old,52
6,55 - 64 years old,68
7,65 - 69 years old,27
8,70 - 79 years old,68
9,80 - 84 years old,31


In [92]:
df_table_4 = pd.DataFrame(stats_response['Data'][3]['value'])
df_table_4

Unnamed: 0,key,value
0,2013,600
1,2018,625
2,2021,639
3,2023,642
4,2028,647


In [13]:
df_table_5 = pd.DataFrame(stats_response['Data'][4]['value'])
df_table_5

Unnamed: 0,key,value
0,No cert. / Diploma / Degree,68
1,High school,90
2,Apprenticeship / Trade cert. / Diploma,78
3,Non-university cert. / Diploma,41
4,University cert. / Diploma below bachelor,23
5,University degree,255


In [14]:
df_table_6 = pd.DataFrame(stats_response['Data'][5]['value'])
df_table_6

Unnamed: 0,key,value
0,Married,198
1,Common law,52
2,Single,166
3,Separated,15
4,Divorced,33
5,Widowed,91


In [15]:
df_table_7 = pd.DataFrame(stats_response['Data'][6]['value'])
df_table_7['value'] = df_table_7.apply(lambda x: int(x['value']), axis = 1)
df_table_7 = df_table_7.sort_values(by=['value'], ascending=False)
print('Length of table:', len(df_table_7))
df_table_7.head(10)

Length of table: 16


Unnamed: 0,key,value
0,English,398
10,Greek,86
14,Other Languages,26
5,Spanish,20
2,Italian,13
1,French,12
4,Cantonese,10
15,English & Non-Official,10
6,Tagalog,8
12,Bengali,8


In [16]:
# notes:
# assumptions - (1) in canadian dollars
df_table_8 = pd.DataFrame(stats_response['Data'][7]['value'])
df_table_8

Unnamed: 0,key,value
0,"$0 - $29,999",129
1,"$30,000 - $59,999",100
2,"$60,000 - $79,999",28
3,"$80,000 - $99,999",12
4,"$100,000 - $149,999",30
5,"$150,000 - $199,999",6
6,"$200,000+",24


In [17]:
df_table_9 = pd.DataFrame(stats_response['Data'][8]['value'])
df_table_9

Unnamed: 0,key,value
0,0 - 4 years old,18
1,5 - 9 years old,23
2,10 - 14 years old,22
3,15 - 19 years old,19
4,20 - 24 years old,11
5,25+ years old,18


In [18]:
df_table_10 = pd.DataFrame(stats_response['Data'][9]['value'])
df_table_10

Unnamed: 0,key,value
0,Own,127
1,Rent,202


In [19]:
df_table_11 = pd.DataFrame(stats_response['Data'][10]['value'])
df_table_11

Unnamed: 0,key,value
0,Before 1960,192
1,1961 - 1980,45
2,1981 - 1990,41
3,1991 - 2000,33
4,2006 - 2010,2
5,2011 - 2016,10
6,After 2016,6


In [20]:
df_table_12 = pd.DataFrame(stats_response['Data'][11]['value'])
df_table_12['value'] = df_table_12.apply(lambda x: int(x['value']), axis = 1)
df_table_12 = df_table_12.sort_values(by=['value'], ascending=False)
df_table_12

Unnamed: 0,key,value
5,"Social Sciences, Education, Government, Religion",70
7,Sales and service,66
1,Management,58
2,"Business, Finance, Admin",49
6,"Art, Culture, Recreation, Sport",44
3,Sciences,36
4,Health,33
8,"Trades, Transport, Operators",7
0,Not Applicable,3


In [1]:
from real_estate_hub.data_generators.location_stats import LocationStatsGenerator

In [2]:
l = LocationStatsGenerator("Riverdale, ON")

2022-01-09 03:09:14.375 | INFO     | real_estate_hub.data_generators.location_stats:_get_location_data:81 - Latitude: 43.678985, Longitude: -79.34491009999999


In [3]:
l.get_language()

TypeError: data type 'int4' not understood