In [1]:
import requests
from bs4 import BeautifulSoup
import csv
import time
import lxml


class ZooplaScraper:
    results = []

    def fetch(self, url):
        print('HTTP GET request to URL: %s' % url, end='')
        res = requests.get(url)
        print(' | Status code: %s' % res.status_code)
        
        return res
    
    def parse(self, html):
        content = BeautifulSoup(html, 'lxml')
        cards = content.findAll('div', {'class': 'listing-results-wrapper'})
    
        for card in cards:
            
            try:
                bedrooms = card.find('span', {"class": "num-icon num-beds"}).text
            except:
                bedrooms = 'NA'

            try:
                bathrooms = card.find('span', {"class": "num-icon num-baths"}).text
            except:
                bathrooms = 'NA'

            try:
                reception = card.find('span', {"class": "num-icon num-reception"}).text
            except:
                reception = 'NA'
            
            try:
                area = card.find('span', {"class": "num-icon num-sqft"}).text
            except:
                area = 'NA'               
                
            self.results.append({
                'link': "https://www.zoopla.co.uk/"+str(card.find('a', {'class': 'listing-results-price text-price'})["href"]),
                'price': card.find('a', {'class': 'listing-results-price'}).text.strip().split(' ')[0].strip(),
                'bedrooms': bedrooms,
                'bathrooms': bathrooms,
                'receptions': reception,
                'area': area,
                'address': card.find('a', {'class': 'listing-results-address'}).text,
                'date': card.find('p', {'class': 'listing-results-marketed'}).text.split('Listed on')[1].split('by')[0].strip(),
                'description': card.find('p').text.strip(),
                'title': card.find('a', {'style': 'text-decoration:underline;'}).text,
                'image': card.find('a', {'class': 'photo-hover'}).find('img')['data-src']
            })
        
    
    def to_csv(self):
        with open('zoopla.csv', 'w',newline='') as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=self.results[0].keys())
            writer.writeheader()
            for row in self.results:
                writer.writerow(row)
            print('Stored results to "zoopla.csv"')
    
    def run(self):
        for page in range(100):
            url = 'https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn='
            url += str(page+1)
            res = self.fetch(url)
            self.parse(res.text)
            time.sleep(2)

        self.to_csv()
        

if __name__ == '__main__':
    scraperZoop = ZooplaScraper()
    scraperZoop.run()
    

HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=1 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=2 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=3 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=4 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=5 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co.uk/for-sale/property/london/?identifier=london&q=London&search_source=for-sale&radius=0&pn=6 | Status code: 200
HTTP GET request to URL: https://www.zoopla.co