<h1> Project Titile - Web Scrapping - Real Estate </h1>
<p> The project focuses on extracting the properties information from each page of a Real Estate website (www.propertypro.ng) using BeautifulSoup Package and saves it on CSV. </p>

In [183]:
# importing required libraries

import pandas as pd
from bs4 import BeautifulSoup
import requests
import time

In [184]:
# target URL to scrap

base_url = 'https://www.propertypro.ng/property-for-rent/in/lagos'

In [185]:
headers = {'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Mobile Safari/537.36'}

In [186]:
# send request to download the data

req = requests.get (base_url, headers=headers)

In [187]:
req.status_code

200

In [188]:
req.text[1:200]

'!DOCTYPE html>\n<html lang="en">\n<head>\n<title> Property &amp; Houses for rent  in Lagos   (25,160 listings) | PropertyPro.ng</title>\n\n<meta charset="UTF-8" />\n<meta name="viewport" content="width=dev'

In [189]:
# parse the downloaded data

soup = BeautifulSoup(req.text,'html.parser')

In [190]:
soup.text[1:200]

'\n\n Property & Houses for rent  in Lagos   (25,160 listings) | PropertyPro.ng\n\n\n\n\n\n\n\n\n\n\n\n\n            #ajaxWait {\n                display: none;\n                width: 100%;\n                position: '

# find all location of houses by descrict

In [191]:
#storing the features:

locations = soup.select('.single-room-text > h4')

In [192]:
#confirming the length of location features

len(locations)

22

In [193]:
locations[1]

<h4><img alt="icon" src="/assets/assets/img/resultpage/a6b70a0c1c4423f60780bdda189b91e3-location.svg" title="icon"/> Akoka Yaba Lagos</h4>

In [195]:
#fixing all loaction into a list

all_location = []

for i in locations:
    location = i.get_text().split()
    location = location[-2]
    location = location.replace(',', '')
    all_location.append(location)

# find all house prices 

In [118]:
#storing the features

prices = soup.select('.n50 > h3 > span')

In [119]:
#confirming the length of prices features

len(prices)

44

In [120]:
prices[2]

<span content="NGN" itemprop="priceCurrency">₦</span>

In [146]:
# fixing all the prices data into a list

all_price = []

for i,price in enumerate(prices):
    if i % 2 != 0:
        all_price.append(price.getText().replace('/year', ''))

In [147]:
len(all_price)

22

# Number of bedrooms and house description

In [205]:
#storing the features

titles = soup.select('.single-room-text > a > h2')

In [206]:
#confirming the length of titles features

len(titles)

22

In [207]:
titles[5]

<h2 class="listings-property-title">3 BEDROOM FLAT / APARTMENT FOR RENT</h2>

In [226]:
#fixing all bedrooms and description into a list

all_description = []
all_bedroom = []

for i in titles:
    title_split = i.get_text().split()
    bedroom = title_split[0]
    description = title_split[2:4]
    f_house = ' '.join(description)
    all_description.append((f_house).replace('/', ''))
    all_bedroom.append(bedroom)

In [228]:
#confirming the length of description features

len(all_description)

22

In [229]:
#confirming the length of bedroom features

len(all_bedroom)

22

In [230]:
#creating a function to be able to return all list generated

def all_data(url):
    req = requests.get (url, headers=headers)
    soup = BeautifulSoup(req.text,'html.parser')
    
    
    locations = soup.select('.single-room-text > h4')
    all_location = []
    for i in locations:
        location = i.get_text().split()
        location = location[-2]
        location = location.replace(',', '')
        all_location.append(location)
    
    
    
    prices = soup.select('.n50 > h3 > span')
    all_price = []
    for i,price in enumerate(prices):
        if i % 2 != 0:
            all_price.append(price.getText().replace('/year', ''))
    
    
    
    titles = soup.select('.single-room-text > a > h2')
    all_description = []
    all_bedroom = []
    for i in titles:
        title_split = i.get_text().split()
        bedroom = title_split[0]
        description = title_split[2:4]
        f_house = ' '.join(description)
        all_description.append((f_house).replace('/', ''))
        all_bedroom.append(bedroom)
    
    

    return (all_location, all_price, all_description, all_bedroom)   

In [231]:
#crawls all the pages on the website from page 1 to 100

urls = [base_url]
for i in range(1,100):
    url = base_url + '?page=' + str(i)
    urls.append(url)

In [232]:
urls[60]

'https://www.propertypro.ng/property-for-rent/in/lagos?page=60'

In [233]:
master_location = []
master_price = []
master_description = []
master_bedroom = []

for url in urls:
    all_location, all_price, all_description, all_bedroom = all_data(url)
    
    master_location += all_location
    master_price += all_price
    master_description += all_description
    master_bedroom += all_bedroom
    
    time.sleep(30)

In [235]:
#confirming the length of each list of attributes created

len(master_bedroom)

2200

In [243]:
# create a data frame from the list of dictionaries

properties = pd.DataFrame ({"House Details": master_description,
                            "Location": master_location,
                            "Price (N)": master_price,
                            "Bedroom": master_bedroom
                           })

In [244]:
#converting dataframe to CSV

properties.to_csv("house_property.csv")

In [245]:
house_to_rent = pd.read_csv('house_property.csv')

In [246]:
house_to_rent

Unnamed: 0.1,Unnamed: 0,House Details,Location,Price (N),Bedroom
0,0,DETACHED DUPLEX,Lekki,9000000,6
1,1,FLAT,Yaba,1100000,3
2,2,FLAT,Ikeja,1800000,2
3,3,DETACHED DUPLEX,Lekki,9000000,6
4,4,FLAT,Ojodu,2200000,3
...,...,...,...,...,...
2195,2195,MINI FLAT,Yaba,700000,1
2196,2196,BLOCKS OF,Lekki,2750000,3
2197,2197,FLAT,Ikoyi,15000000,4
2198,2198,FLAT,Ajah,900000,2
