In [86]:
import time
import requests
import re
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
from bs4 import BeautifulSoup

In [87]:
labels = ['Long Term Rentals', 'Room Rentals & Roommates', 'Short Term Rentals']

In [88]:
base_url = 'https://www.kijiji.ca/'

In [89]:
start_page = 1
short_term_url = base_url + f'b-short-term-rental/ontario/page-{start_page}/c42l9004'
room_and_rental_term_url = base_url + f'b-room-rental-roommate/ontario/page-{start_page}/c36l9004'

In [90]:
def get_links(url: str) -> list:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    ads = soup.find_all("div", attrs={'class': 'search-item'})
    # remove third-party ad
    ads = [x for x in ads if ("cas-channel" not in x["class"]) & ("third-party" not in x["class"])]

    # create a list to store all of the URLs from the 
    ad_links = []
    for ad in ads:
        # parse the link from the ad
        link = ad.find_all("a", {"class": "title"})
        # add the link to the list
        for l in link:
            ad_links.append(base_url[:-1] + l["href"])
    return ad_links

In [154]:
long_term_list = []
for i in range(1, 5):
    long_term_url = base_url + f'b-apartments-condos/city-of-toronto/page-{i}/c37l1700273?ll=43.653226%2C-79.383184&address=Toronto%2C+ON&radius=18.0'
    long_term_list.append(get_links(long_term_url))
# short_term_list = get_links(short_term_url)
# room_and_rental_term_list = get_links(room_and_rental_term_url)

In [205]:
def get_context(url: str) -> None:
    global df1
    print(url)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # get ad title
    try:
        title = soup.find("h1", class_=lambda cls: cls and 'title' in cls).text
    except AttributeError:
        
        title = None


    # get ad price
    try:
        # price = soup.find("span", attrs={"itemprop": "price"}).text
        price = soup.find("div", class_=lambda cls: cls and 'price' in cls).text
    except AttributeError:
        price = None

    # get date posted
    try:
        datetime_obj = soup.find("time")
        date_posted = datetime.fromisoformat(datetime_obj['datetime'][:-1])
    except (AttributeError, TypeError):
        date_posted = None

    # get ad description
    try:
        # description = soup.find("div", attrs={"itemprop": "description"}).text
        desc = soup.find("div", class_=lambda cls: cls and 'descriptionContainer' in cls)
        desc_html = desc.prettify()
        cleaned_text = re.sub('<.*?>', '', desc_html)
        description = re.sub(r'\n+\s+', '', cleaned_text)
    except AttributeError:
        description = None

    # get the ad city
    try:
        address = soup.find("span", attrs={"itemprop": "address"}).text
    except AttributeError:
        address = None
    # apend information to the dataframe
    df1 = df1.append({
        "title": title,
        "price": price,
        "description": description,
        "date_posted": date_posted,
        "address": address, 
        "url": url},
        ignore_index=True
    )


In [206]:
df = pd.DataFrame(columns=['title', 'description', 'price', 'date_posted', 'address', 'url'])

In [233]:
df1 = pd.DataFrame(columns=['title', 'description', 'price', 'date_posted', 'address', 'url'])

In [234]:
for url in long_term_list[3]:
    get_context(url)
    # context = get_context(url)
    # print(context)
    # df1.append(context, ignore_index=True)
# pd.concat([df, df1], ignore_index=True)


https://www.kijiji.ca/v-apartments-condos/city-of-toronto/renovated-1-bedroom-yonge-and-davisville/1646179670
https://www.kijiji.ca/v-apartments-condos/mississauga-peel-region/2-bdrm-10-blackfriar-ave-186-kingsview-blvd-15-55-brides/1631153619
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/beautiful-one-bedroom/1651159435
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/2-bedroom-apartments-for-rent-on-jackes-avenue/1646071531
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/1-month-free-large-1-bedroom-toronto-apartment-near-subway/1649856290
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/upscale-2-bedroom-suite-don-mills-sheppard-ave-e/1631277280
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/2-bedroom-legal-basement-apt-april-1-new/1651145779
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/bright-one-bedroom-penthouse-w-private-rooftop-terrace/1651094368
https://www.kijiji.ca/v-apartments-condos/city-of-toronto/brand-new-d

In [235]:
df1.isnull().sum()

title          5
description    5
price          5
date_posted    5
address        5
url            0
dtype: int64

In [236]:
df1

Unnamed: 0,title,description,price,date_posted,address,url
0,Renovated 1 Bedroom @ Yonge and Davisville,DescriptionViewings:Please call or text our Le...,"$2,395",2023-02-20 12:43:47,"155 Balliol Street, Toronto, ON, M4S 1C2",https://www.kijiji.ca/v-apartments-condos/city...
1,"2 Bdrm - 10 Blackfriar Ave., 186 Kingsview Blv...",DescriptionVisit our Website: https://www.capr...,"$2,385",2023-01-28 00:07:33,"10 Blackfriar Ave., 186 Kingsview Blvd., 15 & ...",https://www.kijiji.ca/v-apartments-condos/miss...
2,Beautiful One Bedroom,DescriptionSpacious and newly-renovated lovely...,"$2,380FEATURED",2023-02-21 17:59:05,"M1L 1W3, Canada",https://www.kijiji.ca/v-apartments-condos/city...
3,2 Bedroom Apartments for Rent on Jackes Avenue,DescriptionJoin Us for Our In-person Open Hous...,"$3,098",2023-02-21 14:12:46,"7 Jackes Avenue, Toronto, ON, M4T 1E3",https://www.kijiji.ca/v-apartments-condos/city...
4,1 MONTH FREE! Large 1-Bedroom Toronto Apartmen...,"DescriptionAt Last, This Is What You've Been S...","$2,299",2023-02-09 13:24:01,"377 Ridelle Avenue, Toronto, ON, M6B 1K2",https://www.kijiji.ca/v-apartments-condos/city...
5,Upscale 2 Bedroom Suite Don Mills & Sheppard A...,DescriptionOur renovated apartments at 25 Leit...,"$2,747",2023-02-21 16:01:47,"25 Leith Hill Road., Toronto, ON, M2J 1Z1",https://www.kijiji.ca/v-apartments-condos/city...
6,2 Bedroom - Legal Basement Apt (April 1) **NEW**,DescriptionBeautiful Newly Renovated 2 Bedroom...,"$2,000",2023-02-21 15:59:00,"Fairglen Avenue, Scarborough, ON",https://www.kijiji.ca/v-apartments-condos/city...
7,Bright One Bedroom Penthouse w Private Roofto...,DescriptionRenovated penthouse; top appliances...,"$3,500",2023-02-21 15:58:49,"Toronto, ON M4V 1Z6",https://www.kijiji.ca/v-apartments-condos/city...
8,Brand New Downtown 1 bedroom Condo w Lakeview ...,Description* Move in by March 1 and get one mo...,"$2,290",2023-02-21 15:54:45,"138 Downes Street, Toronto, ON",https://www.kijiji.ca/v-apartments-condos/city...
9,"3 Bdrm Townhouse - 1560 Bloor Street, 3455 Hav...",DescriptionVisit our Website: https://www.capr...,"$2,965",2023-02-21 15:53:47,"1560 Bloor Street, 3455 Havenwood Drive, Missi...",https://www.kijiji.ca/v-apartments-condos/miss...


In [237]:
df = pd.concat([df1, df], ignore_index=True)

In [238]:
df

Unnamed: 0,title,description,price,date_posted,address,url
0,Renovated 1 Bedroom @ Yonge and Davisville,DescriptionViewings:Please call or text our Le...,"$2,395",2023-02-20 12:43:47,"155 Balliol Street, Toronto, ON, M4S 1C2",https://www.kijiji.ca/v-apartments-condos/city...
1,"2 Bdrm - 10 Blackfriar Ave., 186 Kingsview Blv...",DescriptionVisit our Website: https://www.capr...,"$2,385",2023-01-28 00:07:33,"10 Blackfriar Ave., 186 Kingsview Blvd., 15 & ...",https://www.kijiji.ca/v-apartments-condos/miss...
2,Beautiful One Bedroom,DescriptionSpacious and newly-renovated lovely...,"$2,380FEATURED",2023-02-21 17:59:05,"M1L 1W3, Canada",https://www.kijiji.ca/v-apartments-condos/city...
3,2 Bedroom Apartments for Rent on Jackes Avenue,DescriptionJoin Us for Our In-person Open Hous...,"$3,098",2023-02-21 14:12:46,"7 Jackes Avenue, Toronto, ON, M4T 1E3",https://www.kijiji.ca/v-apartments-condos/city...
4,1 MONTH FREE! Large 1-Bedroom Toronto Apartmen...,"DescriptionAt Last, This Is What You've Been S...","$2,299",2023-02-09 13:24:01,"377 Ridelle Avenue, Toronto, ON, M6B 1K2",https://www.kijiji.ca/v-apartments-condos/city...
...,...,...,...,...,...,...
175,,,,NaT,,https://www.kijiji.ca/v-apartments-condos/city...
176,,,,NaT,,https://www.kijiji.ca/v-apartments-condos/city...
177,,,,NaT,,https://www.kijiji.ca/v-apartments-condos/city...
178,,,,NaT,,https://www.kijiji.ca/v-apartments-condos/city...


In [239]:
df.isnull().sum()

title          20
description    20
price          20
date_posted    20
address        20
url             0
dtype: int64

In [240]:
final_df = df.copy()
final_df.shape

(180, 6)

In [162]:
final_df.to_csv("kijiji_watch_data.csv")