In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from requests import get
import numpy as np

#### Beautiful Soup will help us read this html. It picks the text from the response and parses the information in a way that makes it easier for us to navigate in its structure and get its contents.
#### The requests module allows you to send HTTP requests using Python

In [2]:
# Assigning the url(Uniform Resource Locator) of website from which data needs to be scarpped.
base_url = 'https://www.propertypro.ng/property-for-rent?search=Lagos&type=&bedroom=&min_price=&max_price='
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}

#### Now, let's test if we can communicate with the website. You can get several codes from this command, but if we get '200' it’s usually a sign that we’re good to go.

In [3]:
#  Requesting the data  from url.
r = requests.get(base_url, headers=headers)
r.status_code

200

In [4]:
soup = BeautifulSoup(r.text, 'html.parser')
print(soup.title.get_text())

 Property & Houses for rent  in Nigeria (26,720 listings) | PropertyPro.ng


In [5]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <title>
   Property &amp; Houses for rent  in Nigeria (26,720 listings) | PropertyPro.ng
  </title>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link href="/assets/image/42484569b600758112d45bad3d876f78-favicon.ico" rel="icon" type="image/ico"/>
  <meta content="Real Estate for rent in  Nigeria,  Properties for rent  in  Nigeria - Flats, Houses, Apartments, Duplexes, land, Office space, self-contain, Commercial property for rent in Nigeria" name="description"/>
  <meta content="Real Estate for  rent in  Nigeria,  Properties for rent  in  Nigeria - Flats, Houses, Apartments, Duplexes, land, Commercial property, Office space, self-contain" name="keywords"/>
  <meta content=" Property &amp; Houses for rent  in Nigeria" property="og:title"/>
  <meta content="Real Estate for rent in  Nigeria,  Properties for rent  in  Nigeria - Flats, Houses, Apartments, Duplexes, land, Office space, self-cont

In [6]:
house_containers = soup.find_all('div', class_="single-room-text")

In [7]:
titles = soup.select('.single-room-text > a > h2')

In [8]:
len(titles)

22

In [9]:
# Creating empty lists in-order to append data scrapped from url's.
# The number of lists depends on the number of features we are extracting from the url.

title=[]
location=[]
price=[]
Property_type=[]
Property_features=[]

In [10]:
titles_list = []
for title in titles:
    title = title.get_text()
    title = title.split()
    title = title[0:3]
    title = ' '.join(title)
    titles_list.append(title)
titles_list

['COMMERCIAL PROPERTY FOR',
 '1 BEDROOM MINI',
 '3 BEDROOM FLAT',
 '3 BEDROOM FLAT',
 '4 BEDROOM DETACHED',
 '4 BEDROOM DETACHED',
 'OFFICE SPACE COMMERCIAL',
 '3 BEDROOM FLAT',
 '4 BEDROOM SEMI',
 '3 BEDROOM FLAT',
 '4 BEDROOM HOUSE',
 '4 BEDROOM TERRACED',
 'OFFICE SPACE COMMERCIAL',
 '4 BEDROOM TERRACED',
 'OFFICE SPACE COMMERCIAL',
 '2 BEDROOM FLAT',
 '2 BEDROOM FLAT',
 '3 BEDROOM FLAT',
 '4 BEDROOM TERRACED',
 '2 BEDROOM FLAT',
 '3 BEDROOM FLAT',
 'OFFICE SPACE COMMERCIAL']

In [11]:
# find all prices
prices = soup.select('.n50 > h3 > span')

In [12]:
# fixing all prices
prices_list = []
for i, price in enumerate(prices):
    price = price.get_text()
    if len(price)> 2:
        price = price.replace(',','')
        try :
            price = int(price)
        except ValueError:
            price = str(price)    
        prices_list.append(price)

In [13]:
len(prices)

44

In [14]:
prices_list

['6500000/year',
 500000,
 800000,
 3000000,
 5000000,
 5000000,
 5500000,
 6000000,
 4000000,
 2500000,
 100000,
 5000000,
 45000,
 10000000,
 40000,
 12000000,
 8000000,
 4000000,
 6000000,
 8000000,
 2500000,
 25000]

In [15]:
locations = soup.select('.single-room-text > h4')

In [16]:
len(locations)

22

In [17]:
# find all locations
locations_list = []
for i in locations:
    loc = i.get_text().split()
    loc = loc[-2]
    loc = loc.replace(',', '')
    locations_list.append(loc)
locations_list[1]

'Ikosi'

In [18]:
propertyTypes = soup.select('.listings-property-title2')

In [19]:
len(propertyTypes)

22

In [83]:
# find number of bedrooms
propertyTypes_list = []
for propertyType in propertyTypes:
    propertyType_list.append(propertyType.get_text())
    propertyTypes_list