In [18]:
# Import required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import time
from datetime import date, datetime, timedelta
import re
import random


Base_URL = 'https://privateproperty.ng'
start_page = 0
end_page = 16

 # Setting headers 
Headers = {"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36', 
               "Accept-Language": "en-US, en;q=0.9"}


# ----Helpers
#------Fuction to parse dates--------
def parse_listing_date(text):
    text = text.lower()
    today = datetime.today()

    # Handle relative dates first
    if 'added today' in text:
        return today.date()
    if 'added yesterday' in text:
        return (today - timedelta(days=1)).date()

    if 'updated today' in text:
        return today.date()
    if 'updated yesterday' in text:
        return (today - timedelta(days=1)).date()

    #Extract all absolute dates
    dates = re.findall(r'(\d{1,2} \w{3} \d{4})', text)

    def to_date(d):
        return datetime.strptime(d, '%d %b %Y').date()
    # priority: added > updated > any
    if 'added' in text:
        for d in dates:
            if text.index('added') < text.index(d):
                return to_date(d)

    if 'updated' in text:
        for d in dates:
            if text.index('updated') < text.index(d):
                return to_date(d)

    if dates:
        return to_date(dates[0])

    return None


#---Function to extract digits in property benefits----
def ex_property_features(listing):
    ul = listing.find('ul', class_ = 'property-benefit')

    if not ul:
        return{
            'bedrooms' : None,
            'bathrooms' : None,
            'toilets' : None
        }

    values = []
    
    for li in ul.find_all('li'):
        text = li.get_text(strip = True)
        match = re.search(r"\d+", text)
        values.append(int(match.group()) if match else None)

    return {
        'bedrooms': values[0] if len(values)>0 else None,
        'bathrooms': values[1] if len(values)>1 else None,
        'toilets': values[2] if len(values)>2 else None
    }


        
data = []

for i in range(start_page, end_page):
    print(f"Scraping page {i}...")
    # Attributing the sites url to a variable
    url = f'https://privateproperty.ng/flats-apartments-for-rent?search=Ibadan+%2C+Oyo&auto=&bedroom=&min_price=&max_price=&button=&page={i}'
    
    # Sending a request to the website
    response = requests.get(url, headers = Headers)
    
    # Parsing the web content to html
    soup = BeautifulSoup(response.content, "html.parser")
    
    # To by pass sponsored listings
    page = soup.find('div', class_ = 'result-listings')
    if not page:
        continue

    #To find all listings in each page
    original_listings =[div for div in page.find_all('div', class_ = 'similar-listings-item')
                        if 'sponsored-listing' not in div.get('class', [])]
        
    

    
    for listing in original_listings:
        #----TITLE----
        Title_tag = listing.find('h3')
        Title = Title_tag.text if Title_tag else None
    
        #----Listing URL--------
        link_tag = listing.find('a',  href = True)
        listing_url = Base_URL + link_tag['href'] if link_tag else None
    
        
        # -------location------
        location_tag = listing.find('p', class_ = 'listings-location').text.strip()
        location = location_tag if location_tag else None
    
        # -----price--------
        h4= listing.find('h4')
        spans= h4.find_all('span')
        price = spans[1].text.replace(',', '').split('/')[0] if spans else None

        #---Listing date--------
        date_tag = listing.find('h5')
        posted_date = date_tag.get_text()
        clean_date = parse_listing_date(posted_date)
    
        # -----Property Benefits-------
       
        features = (ex_property_features(listing))
        beds = features['bedrooms']
        baths = features['bathrooms']
        toilets = features['toilets']
        
    
    
    
        #---To append extraced values into 'data'----
        data.append({
            'Title': Title,
            'Address': location,
            'Price (₦)': price,
            'Rent_period': 'Annually',
            'Bedrooms': beds,
            'Bathrooms': baths,
            'Toilets': toilets, 
            'Listing_link': listing_url,
            'Listed_Date' : clean_date
        })

     #----polite delay------
    time.sleep(random.uniform(3,6))


# ----Convert to DataFrame----
df = pd.DataFrame(data)

df['Bedrooms'] = df['Bedrooms'].astype('Int64')
df['Bathrooms'] = df['Bathrooms'].astype('Int64')
df['Toilets'] = df['Toilets'].astype('Int64')




#-----To get the Specific area/location of the property:

# importing a dataset of known Areas in Ibadan
streets_df = pd.read_excel(r"C:\Users\USER\OneDrive\Documents\ibadan_areas_reference_only.xlsx")
street_list = streets_df["area_name"].str.lower().tolist()




#--To match the Areas in the street dataset with linstings' address and extract the specific location from the address
def extract_street(location, street_list):
    if pd.isna(location):
        return None

    location = location.lower()

    for street in street_list:
        # word-boundary match to avoid partial matches
        pattern = r"\b" + re.escape(street) + r"\b"
        if re.search(pattern, location):
            return street.title()

    return None




# Creating a new column for the extracted location area
df["Location"] = df["Address"].apply(
    lambda x: extract_street(x, street_list)
)




# Extracting the property type from the home Title

# property types on the listings website
property_types = [
    "block of flats",
    "mini flat",
    "self contain",
    "shared apartment"
]


# Matching the property type data to the home titlein the scraped data to extract the 'property type'

def get_property_type(text):
    if pd.isna(text):
        return None
    
    text_lower = text.lower()
    for p in property_types:
        # use word boundary so we match whole terms, not substrings
        if re.search(r"\b" + re.escape(p) + r"\b", text_lower):
            return p.title()  # e.g., "Block Of Flats"
    return None




# Apply to description or title field
df["property_type"] = df["Title"].apply(get_property_type)



# -----To save as a csv file------
df.to_csv('Ibadan_city_homes_for_rent.csv', index = False)

print(f"Done ✅ Scraped {len(df)} listings.")


Scraping page 0...
Scraping page 1...
Scraping page 2...
Scraping page 3...
Scraping page 4...
Scraping page 5...
Scraping page 6...
Scraping page 7...
Scraping page 8...
Scraping page 9...
Scraping page 10...
Scraping page 11...
Scraping page 12...
Scraping page 13...
Scraping page 14...
Scraping page 15...
Done ✅ Scraped 227 listings.
