In [1]:
# import libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from requests import get
import numpy as np

#### Beautiful Soup will help us read this html. It picks the text from the response and parses the information in a way that makes it easier for us to navigate in its structure and get its contents.
#### The requests module allows you to send HTTP requests using Python

In [2]:
# Assigning the url(Uniform Resource Locator) of website from which data needs to be scarpped.
base_url = 'https://www.propertypro.ng/property-for-rent?search=Lagos&type=&bedroom=&min_price=&max_price='
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}

#### Now, let's test if we can communicate with the website. You can get several codes from this command, but if we get '200' it’s usually a sign that we’re good to go.

In [3]:
#  Requesting the data  from url.
r = requests.get(base_url, headers=headers)
r.status_code

200

In [4]:
soup = BeautifulSoup(r.text, 'html.parser')
print(soup.title.get_text())

 Property & Houses for rent  in Nigeria (27,200 listings) | PropertyPro.ng


In [5]:
house_containers = soup.select('div', class_="single-room-text")

In [6]:
titles = soup.select('.single-room-text > a > h2')

In [7]:
len(titles)

22

In [8]:
# Creating empty lists in-order to append data scrapped from url's.
# The number of lists depends on the number of features we are extracting from the url.

title=[]
location=[]
price=[]
Amenities=[]

In [9]:
titles_list = []
for title in titles:
    title = title.get_text()
    title = title.split()
    title = title[0:3]
    title = ' '.join(title)
    titles_list.append(title)
titles_list

['COMMERCIAL PROPERTY FOR',
 '1 BEDROOM MINI',
 '3 BEDROOM FLAT',
 '5 BEDROOM DETACHED',
 '4 BEDROOM HOUSE',
 '4 BEDROOM TERRACED',
 '3 BEDROOM FLAT',
 '3 BEDROOM FLAT',
 '4 BEDROOM MASSIONETTE',
 '3 BEDROOM FLAT',
 '5 BEDROOM DETACHED',
 '3 BEDROOM FLAT',
 '4 BEDROOM DETACHED',
 '4 BEDROOM SEMI',
 '3 BEDROOM FLAT',
 '1 BEDROOM MINI',
 '3 BEDROOM BLOCKS',
 '3 BEDROOM FLAT',
 '3 BEDROOM FLAT',
 'SELF CONTAIN FLAT',
 'SELF CONTAIN FLAT',
 'SELF CONTAIN FLAT']

In [10]:
# find all prices
prices = soup.select('.n50 > h3 > span')

In [11]:
# fixing all prices
prices_list = []
for i, price in enumerate(prices):
    price = price.get_text()
    if len(price)> 2:
        price = price.replace(',','')
        try :
            price = int(price)
        except ValueError:
            price = str(price)    
        prices_list.append(price)

In [12]:
len(prices)

44

In [13]:
prices_list

['6500000/year',
 800000,
 2000000,
 5500000,
 5000000,
 13000000,
 12000000,
 8000000,
 15000000,
 6500000,
 320000000,
 7000000,
 4000000,
 4000000,
 3500000,
 700000,
 1200000,
 1200000,
 1500000,
 120000,
 150000,
 120000]

In [14]:
locations = soup.select('.single-room-text > h4')

In [15]:
len(locations)

22

In [16]:
# find all locations
locations_list = []
for i in locations:
    loc = i.get_text().split()
    loc = loc[-2]
    loc = loc.replace(',', '')
    locations_list.append(loc)
locations_list

['2',
 'Ikeja',
 'Maryland',
 'Lekki',
 'Lekki',
 'Ikoyi',
 'Ikoyi',
 'Island',
 'Ikoyi',
 'Ikoyi',
 'Lekki',
 'Ikoyi',
 'Lekki',
 'TollgateLekki',
 'Lekki',
 'Ojodu',
 'Isolo',
 'Isolo',
 'Isolo',
 'Isolo',
 'Isolo',
 'Isolo']

In [18]:
Amenities = soup.select('.fur-areea > span')

In [19]:
len(Amenities)

66

In [20]:
Amenities_list = []
for i in Amenities:
    Amenities_split = i.get_text().split()
    Amenities_list.append(Amenities_split)
Amenities_list

[['beds'],
 ['baths'],
 ['3', 'Toilets'],
 ['1', 'beds'],
 ['1', 'baths'],
 ['2', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['5', 'beds'],
 ['5', 'baths'],
 ['6', 'Toilets'],
 ['4', 'beds'],
 ['4', 'baths'],
 ['5', 'Toilets'],
 ['4', 'beds'],
 ['4', 'baths'],
 ['5', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['4', 'beds'],
 ['4', 'baths'],
 ['5', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['5', 'beds'],
 ['6', 'baths'],
 ['7', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['3', 'Toilets'],
 ['4', 'beds'],
 ['4', 'baths'],
 ['5', 'Toilets'],
 ['4', 'beds'],
 ['4', 'baths'],
 ['5', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['1', 'beds'],
 ['1', 'baths'],
 ['2', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['3', 'beds'],
 ['3', 'baths'],
 ['4', 'Toilets'],
 ['0', 'beds'],
 ['0',

In [25]:
#creating a function to be able to return all list generated

def all_data(url):
    req = requests.get (base_url, headers=headers)
    soup = BeautifulSoup(r.text,'html.parser')
    
    titles = soup.select('.single-room-text > a > h2')
    titles_list = []
    for title in titles:
        title = title.get_text()
    title = title.split()
    title = title[0:3]
    title = ' '.join(title)
    titles_list.append(title)

    prices = soup.select('.n50 > h3 > span')
    prices_list = []
    for i, price in enumerate(prices):
        price = price.get_text()
    if len(price)> 2:
        price = price.replace(',','')
        try :
            price = int(price)
        except ValueError:
            price = str(price)    
        prices_list.append(price)

    locations = soup.select('.single-room-text > h4')
    locations_list = []
    for i in locations:
        loc = i.get_text().split()
    loc = loc[-2]
    loc = loc.replace(',', '')
    locations_list.append(loc)

    Amenities = soup.select('.fur-areea > span')
    Amenities_list = []
    for i in Amenities:
        Amenities_split = i.get_text().split()
    Amenities_list.append(Amenities_split)
    
    return(titles_list, prices_list, locations_list, Amenities_list)