# Import the necessary libraries

In [4]:
import requests
from bs4 import BeautifulSoup as soup
import pandas as pd
import re
import time

# Get property website links and listing dates

This code picks data from the website immediately after specifying the area we need. In our case, NW8 or Mayfair

In [9]:
#the link to the home page
base_url = 'https://www.zoopla.co.uk/' 

#create an empty dictionary
property_links = {}

#specify the page range. (5,11) means it will scrap data from page 5 to page 10
for x in range (1,21):
    #add the page link after f' and remember to put {x} where there is a page number
    page = requests.get(f'https://www.zoopla.co.uk/for-sale/property/london/nw8/st-johns-wood/?q=NW8&results_sort=newest_listings&search_source=home&pn={x}')
    bsobj = soup(page.content,'lxml')
    
    #scrap tags thatn have the listings of all items on a page
    property_list = bsobj.findAll("div", id=re.compile("^listing_"))
    
    #get link and date from each tag listed above
    for item in property_list:
        #scrap data from tags with link to each property
        for link in item.findAll("a", {"class":"e9kuaf124 css-1rzeb2c-StyledLink-Link-StyledLink e33dvwd0"}, href=True):
            lin = base_url + link['href']
            
        #scrap data from tags with the listing dates
        for date in item.findAll("div", {"class":"css-y69cjd-DateDetailWrapper e9kuaf127"}):
            dt = date.text
            property_links[lin] = dt

#create a dataframe with the website links to each property and listing dates
data = pd.DataFrame(property_links.keys(),columns = ["Website_Link"])
data["Listing_Date"] = property_links.values()
#check the number of columns and rows (2,3) means 2 rows and 3 columns
data.shape

(500, 2)

In [10]:
#print out the first 5 rows
data.head()

Unnamed: 0,Website_Link,Listing_Date
0,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022
1,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022
2,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022
3,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 7th May 2022
4,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 7th May 2022


# Get property details from each property link

This code carries the two columns previously created and adds other columns/attributes of a property from each link

In [11]:
#create an empty dataframe with specified column names
property_finallist = pd.DataFrame(columns=['Website Link','Listing Date','Building Name','Description','Features and description',
                              'Street Name','Asking Price','Total Square Feet of the Property/ Approx (Sq. ft)','Agent'])
d=0

#carry forwad the columns from previous dataframe and add other columns
for link,date in zip(data.Website_Link,data.Listing_Date):
    page=requests.get(link)
    bsobj = soup(page.content,'lxml')
    
    #start and empty list with link and date in preparation of adding other columns/properties
    info = []
    info.append(link)
    info.append(date)
    
    time.sleep(3)
    
    #add other properties
    try:
        building_name=bsobj.find("div", attrs={"class": "c-PJLV c-PJLV-ieHhfWi-css"}).text.strip()
        info.append(building_name)
    except: 
        building_name = 'No Info'   
        info.append(building_name)
    try:
        description=bsobj.find("div", class_ = "c-PJLV c-PJLV-iiNveLf-css").text.strip()
        info.append(description)
    except: 
        description = 'No Info'
        info.append(description)
    try:
        fnd=bsobj.find("div", class_ = "css-1x8rn37-FeaturesContainer e1cyjrz33").text.strip()
        info.append(fnd)
    except: 
        fnd = 'No Info'
        info.append(fnd)
    try:
        street_name=bsobj.find("div", class_ = "PJLV PJLV-ieIPARB-css").text.strip()
        info.append(street_name)
    except: 
        street_name = 'No Info'
        info.append(street_name)
    try:
        asking_price=bsobj.find("div", class_ = "c-PJLV c-PJLV-igTumRt-css").text.strip()
        info.append(asking_price)
    except: 
        asking_price = 'No Info'
        info.append(asking_price)
    try:
        sq_ft=bsobj.find("div", class_ = "c-PJLV c-PJLV-ieVYfkQ-css").text.strip()
        info.append(sq_ft)
    except: 
        sq_ft = 'No Info'
        info.append(sq_ft)
    try:
        agent=bsobj.find("p", class_ = "css-lnx84k-AgentName e5vsiog3").text.strip()
        info.append(agent)
    except: 
        agent = 'No Info'
        info.append(agent)
    
    
    property_finallist.loc[d]=info
    d+=1
    print('Saving: ', property['Building Name'])

Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed flat for sale
Saving:  2 bed

In [12]:
#check number of rows and columns
property_finallist.shape

(500, 9)

In [13]:
#check first few rows
property_finallist.head(20)

Unnamed: 0,Website Link,Listing Date,Building Name,Description,Features and description,Street Name,Asking Price,Total Square Feet of the Property/ Approx (Sq. ft),Agent
0,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022,2 bed flat for sale,2 beds2 baths1 reception866 sq. ft,24 Hour ConciergeParkingEn Suite Shower RoomFu...,"Wellington Court, Wellington Road, St John's W...","£1,300,000","(£1,501/sq. ft)",Fox Gregory Ltd
1,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022,2 bed flat for sale,2 beds1 bath1 reception,LeaseholdTwo Double BedroomsBathroomReception ...,"Fettes House, Wellington Road NW8","£675,000",No Info,Winkworth - St Johns Wood
2,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 9th May 2022,1 bed flat for sale,1 bed1 bath1 reception,Reception roomSeparate fully fitted kitchenDou...,"Grove End Road, London NW8","£515,000",No Info,Charmill Residential
3,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 7th May 2022,5 bed flat for sale,5 beds2 baths2 receptions,5 bedrooms2 reception rooms2 bathroomsLiftPeri...,"Hanover House, St. Johns Wood High Street, Lon...","£3,650,000",No Info,Knight Frank - St John's Wood Sales
4,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 7th May 2022,2 bed flat for sale,2 beds1 bath1 reception,LeaseholdSt Johns Wood LocationClose to Amenit...,"Boundary Road, London NW8",POA,No Info,Tyron Ash
5,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 6th May 2022,3 bed property for sale,"3 beds3 baths1 reception2,036 sq. ft",FreeholdTriple glazed windows and doorsSecurit...,"Abbey Road, London NW8","£3,500,000","(£1,719/sq. ft)",Vantage Residential
6,https://www.zoopla.co.uk//for-sale/details/614...,Listed on 6th May 2022,2 bed flat for sale,2 beds2 baths1 reception,Share of freehold2 Bathrooms2 Bedrooms1 Recept...,"Clifton Court, Northwick Terrace NW8","£1,050,000",No Info,Goldschmidt & Howland - Little Venice
7,https://www.zoopla.co.uk//for-sale/details/613...,Listed on 6th May 2022,3 bed flat for sale,3 beds2 baths,Bright aspects throughoutPeriod featuresEn-sui...,"Hamilton Terrace, London NW8","£1,650,000",No Info,Kinleigh Folkard & Hayward - St John's Wood Sales
8,https://www.zoopla.co.uk//for-sale/details/613...,Listed on 5th May 2022,Studio for sale,1 bath1 reception,Share of freeholdStudio ApartmentThird FloorPo...,"Abbey Road, London NW8","£349,950",No Info,Dexters - St Johns Wood
9,https://www.zoopla.co.uk//for-sale/details/495...,Listed on 5th May 2022,Studio for sale,1 bath1 reception,Share of freeholdStudio flat in a purpose buil...,"Grove End Road, London NW8","£367,500",No Info,EweMove Sales & Lettings - Bexleyheath


# Print scrapped data to a csv file

specify the path to where the file will be stored on local machine

In [15]:
property_finallist.to_csv('C:/Users/Makena/Desktop/zooplanw8.csv')