# Canadian Rental Prices Data Collection

## Imports

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

In [None]:
# Import custom code
from src.helper_functions import export_list_to_csv

In [3]:
root_url = 'https://www.rentfaster.ca'
map_url = 'https://www.rentfaster.ca/api/map.json'

In [None]:
# Get all cities from the site map
cities_url = f'{root_url}/cities'

response = requests.get(cities_url)
soup = BeautifulSoup(response.content, 'html.parser')

ul_items = soup.find_all('ul', {'class': 'bullets'})

for ul_item in ul_items:
	li_items = ul_item.find_all('li')

	for li_item in li_items:
		a_item = li_item.find('a')

		# Get all listings for each city
		city_link = a_item['href']
		city_cookie = city_link[1:-1] # remove first and last slash
		province_abbrv = city_cookie.split('/')[0]

		province = ''

		match province_abbrv:
			case 'ab':
				province = 'Alberta'
			case 'bc':
				province = 'British Columbia'
			case 'mb':
				province = 'Manitoba'
			case 'nb':
				province = 'New Brunswick'
			case 'nl':
				province = 'Newfoundland and Labrador'
			case 'nt':
				province = 'Northwest Territories'
			case 'ns':
				province = 'Nova Scotia'
			case 'on':
				province = 'Ontario'
			case 'qc':
				province = 'Quebec'
			case 'sk':
				province = 'Saskatchewan'

		# Get JSON response with all listings for a specific city

		headers = {
			'Content-Type': 'application/x-www-form-urlencoded',
			'User-Agent': 'Mozilla/5.0'
		}

		cookies = {
			"lastcity": city_cookie
		}

		listing_response = requests.post(map_url, headers=headers, cookies=cookies)
		listing_data = listing_response.json()

		listings = listing_data.get('listings', [])

		# Add listings in batch to a CSV file
		rental_list = []
		for listing in listings:
			link = listing.get('link')
			rental = {
				'ref_id': listing.get('ref_id'),
				'availability': listing.get('availability'),
				'v': listing.get('v'),
				'f': listing.get('f'),
				'vtour': listing.get('vtour'),
				'title': listing.get('title'),
				'intro': listing.get('intro'),
				'address': listing.get('address'),
				'city': listing.get('city'),
				'province': province,
				'community': listing.get('community'),
				'latitude': listing.get('latitude'),
				'longitude': listing.get('longitude'),
				'link': listing.get('link'),
				'type': listing.get('type'),
				'price': listing.get('price'),
				'price2': listing.get('price2') if 'price2' in listing.keys() else None,
				'beds': listing.get('beds'),
				'beds2': listing.get('beds2') if 'beds2' in listing.keys() else None,
				'sq_feet': listing.get('sq_feet'),
				'sq_feet2': listing.get('sq_feet2') if 'sq_feet2' in listing.keys() else None,
				'baths': listing.get('baths'),
				'baths2': listing.get('baths2') if 'baths2' in listing.keys() else None,
				'cats': listing.get('cats'),
				'dogs': listing.get('dogs'),
				'utilities_included': ', '.join(listing.get('utilities_included'))

			}
			rental_list.append(rental)
		
		export_list_to_csv(rental_list, '../data/canada_rent.csv')

In [None]:
# TODO: get link to scrape data of each page instead of raw JSON response