In [78]:
# All imports
import requests
from bs4 import BeautifulSoup
import pandas as pd
import sqlalchemy
import time
import random

In [3]:
# Create session
session = requests.Session()

In [4]:
# Add headers
headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
}

In [5]:
# Add parsed url's
url = 'https://www.zoopla.co.uk/for-sale/property/london/?q=London&search_source=home'

In [35]:
# Get response
res = session.get(url=url, headers=headers)

In [36]:
# Show Status code
res.status_code

200

In [37]:
# Create bs object
soup = BeautifulSoup(res.text, 'lxml')

In [38]:
# Find cards
results = soup.find_all('div', {'data-testid': "search-result"})

In [40]:
# Find titles
titles = [result.find('h2', {"data-testid": "listing-title"}).text.strip() for result in results]
titles

['2 bed flat for sale',
 '1 bed flat for sale',
 '3 bed maisonette for sale',
 '2 bed flat for sale',
 '1 bed flat for sale',
 '4 bed flat for sale',
 '3 bed end terrace house for sale',
 '2 bed flat for sale',
 '2 bed flat for sale',
 '3 bed semi-detached house for sale',
 'Land for sale',
 '2 bed flat for sale',
 '1 bed maisonette for sale',
 '2 bed flat for sale',
 '2 bed flat for sale',
 '2 bed end terrace house for sale',
 '2 bed flat for sale',
 '3 bed detached house for sale',
 '2 bed flat for sale',
 '2 bed flat for sale',
 '1 bed flat for sale',
 '4 bed semi-detached house for sale',
 '4 bed town house for sale',
 '4 bed semi-detached house for sale',
 '1 bed flat for sale']

In [46]:
# Find address
address = [result.find('p', {"data-testid": "listing-description"}).text.strip() for result in results]
address

['Wharf Street, Deptford SE8',
 'Pratt Street, London NW1',
 'St. Quintin Avenue, London W10',
 'Steele Road, Leytonstone E11',
 'Canning Road, Stratford, London E15',
 'The Pryors, East Heath Road, London NW3',
 'Tottenhall Road, London N13',
 'High Road, London NW10',
 'Kennington Lane, London SE11',
 'Leesons Hill, Orpington BR5',
 'New Drum Street, London E1',
 'Sancroft Street, London SE11',
 'Macgregor Road, Canning Town, London E16',
 'Chapman Square, London SW19',
 'Woodington Close, Eltham SE9',
 'Stockton Road, London N17',
 'Jefferson Plaza, London E3',
 'West Avenue, Pinner HA5',
 'Jerome Place, Kingston Upon Thames KT1',
 'Carlingford Road, London N15',
 'Barratt House, 20 Regent Road TW3',
 'Mayhew Close, London E4',
 'Rose Park Close, Yeading, Hayes UB4',
 'Tilbrook Road, London SE3',
 'City Road, London EC1V']

In [66]:
# Find furnitures (beds, baths, chairs)
room_furnitures = [{bed.find('span', {"role":"presentation"}).get('data-testid'): bed.text for bed in result.find('div', {"data-testid": "listing-spec"})} for result in results]

beds, baths, chairs = [], [], []
for furnitures in room_furnitures:
    beds.append(furnitures.get('bed'))
    baths.append(furnitures.get('bath'))
    chairs.append(furnitures.get('chair'))
    
beds, baths, chairs

(['2',
  '1',
  '3',
  '2',
  '1',
  '4',
  '3',
  '2',
  '2',
  '3',
  '2',
  '2',
  '1',
  '2',
  '2',
  '2',
  '2',
  '3',
  '2',
  '2',
  '1',
  '4',
  '4',
  '4',
  '1'],
 ['2',
  '1',
  '3',
  '1',
  '1',
  '2',
  '2',
  '2',
  '2',
  '1',
  '2',
  '2',
  '1',
  '2',
  '1',
  '1',
  None,
  '1',
  '2',
  '2',
  '1',
  '1',
  '2',
  '2',
  '1'],
 ['1',
  '1',
  '1',
  '1',
  '1',
  '2',
  '2',
  '1',
  '1',
  '2',
  '1',
  '1',
  '1',
  '1',
  '1',
  '2',
  '1',
  '2',
  '1',
  '1',
  '1',
  '1',
  '1',
  '2',
  '1'])

In [54]:
# Find prices
prices = [result.find('div', {"data-testid": "listing-price"}).text.strip().split('£')[-1] for result in results]
prices

['500,000',
 '485,000',
 '1,950,000',
 '400,000',
 '250,000',
 '2,000,000',
 '675,000',
 '325,000',
 '965,000',
 '450,000',
 '1,115,225',
 '675,000',
 '270,000',
 '675,000',
 '325,000',
 '485,000',
 '449,000',
 '880,000',
 '675,000',
 '475,000',
 '357,000',
 '675,000',
 '600,000',
 '575,000',
 '825,000']

In [57]:
# Find emails of agent
emails = ['https://www.zoopla.co.uk' + result.find('a', {"data-testid": "agent-contact-link"}).get('href') for result in results]
emails

['https://www.zoopla.co.uk/for-sale/details/contact/62434743/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434739/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434740/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434733/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434724/',
 'https://www.zoopla.co.uk/for-sale/details/contact/55249134/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434715/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434708/',
 'https://www.zoopla.co.uk/new-homes/details/contact/62434508/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434683/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434665/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434662/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434652/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434646/',
 'https://www.zoopla.co.uk/for-sale/details/contact/62434645/',
 'https://www.zoopla.co.uk/for-sale/det

In [59]:
# Find details of card
details = ['https://www.zoopla.co.uk' + result.find('a', {"data-testid": "listing-details-link"}).get('href') for result in results]
details

['https://www.zoopla.co.uk/for-sale/details/62434743/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434739/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434740/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434733/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434724/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/55249134/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434715/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/details/62434708/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/new-homes/details/62434508/?search_identifier=8076c5a18641e64870023fe382cd0cd7',
 'https://www.zoopla.co.uk/for-sale/

In [69]:
# Adding data To Pandas
df_real_estate = pd.DataFrame({
    'title': titles,
    'address': address,
    'bedrooms': beds,
    'bathrooms': baths,
    'chairs': chairs,
    'price_£': prices,
    'email': emails,
    'property_link': details
})

df_real_estate

Unnamed: 0,title,address,bedrooms,bathrooms,chairs,price_£,email,property_link
0,2 bed flat for sale,"Wharf Street, Deptford SE8",2,2.0,1,500000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
1,1 bed flat for sale,"Pratt Street, London NW1",1,1.0,1,485000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
2,3 bed maisonette for sale,"St. Quintin Avenue, London W10",3,3.0,1,1950000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
3,2 bed flat for sale,"Steele Road, Leytonstone E11",2,1.0,1,400000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
4,1 bed flat for sale,"Canning Road, Stratford, London E15",1,1.0,1,250000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
5,4 bed flat for sale,"The Pryors, East Heath Road, London NW3",4,2.0,2,2000000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/5524...
6,3 bed end terrace house for sale,"Tottenhall Road, London N13",3,2.0,2,675000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
7,2 bed flat for sale,"High Road, London NW10",2,2.0,1,325000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...
8,2 bed flat for sale,"Kennington Lane, London SE11",2,2.0,1,965000,https://www.zoopla.co.uk/new-homes/details/con...,https://www.zoopla.co.uk/new-homes/details/624...
9,3 bed semi-detached house for sale,"Leesons Hill, Orpington BR5",3,1.0,2,450000,https://www.zoopla.co.uk/for-sale/details/cont...,https://www.zoopla.co.uk/for-sale/details/6243...


In [72]:
# Save to excel
df_real_estate.to_excel('real_estate_singel.xlsx', index=False)

In [92]:
# Multiple parse - 20 pages
titles = []
address = []
beds, baths, chairs = [], [], []
prices = []
emails = []
details = []

print('Start parsing https://www.zoopla.co.uk')


for i in range(1, 21):
    print(f'Starting parse page №{i}')
    url = f'https://www.zoopla.co.uk/for-sale/property/london/?q=London&search_source=home&pn={i}'
    res = session.get(url=url, headers=headers)
    soup = BeautifulSoup(res.text, 'lxml')
    results = soup.find_all('div', {'data-testid': "search-result"})
    titles.extend([result.find('h2', {"data-testid": "listing-title"}).text.strip() for result in results])
    address.extend([result.find('p', {"data-testid": "listing-description"}).text.strip() for result in results])
    room_furnitures = [{bed.find('span', {"role":"presentation"}).get('data-testid'): bed.text for bed in result.find('div', {"data-testid": "listing-spec"})} for result in results]    
    for furnitures in room_furnitures:
        beds.append(furnitures.get('bed'))
        baths.append(furnitures.get('bath'))
        chairs.append(furnitures.get('chair'))
    prices.extend([result.find('div', {"data-testid": "listing-price"}).text.strip().split('£')[-1] for result in results])
    emails.extend(['https://www.zoopla.co.uk' + result.find('a', {"data-testid": "agent-contact-link"}).get('href') for result in results])
    details.extend(['https://www.zoopla.co.uk' + result.find('a', {"data-testid": "listing-details-link"}).get('href') for result in results])
    print(f'Ending parse page №{i}')
    time.sleep(random.randint(2, 5))
    
df_real_estate_multi = pd.DataFrame({
    'title': titles,
    'address': address,
    'bedrooms': beds,
    'bathrooms': baths,
    'chairs': chairs,
    'price_£': prices,
    'email': emails,
    'property_link': details
})

df_real_estate_multi.to_excel('real_estate_multi.xlsx', index=False)
print('All job is done!')


Start parsing https://www.zoopla.co.uk
Starting parse page №1
Ending parse page №1
Starting parse page №2
Ending parse page №2
Starting parse page №3
Ending parse page №3
Starting parse page №4
Ending parse page №4
Starting parse page №5
Ending parse page №5
Starting parse page №6
Ending parse page №6
Starting parse page №7
Ending parse page №7
Starting parse page №8
Ending parse page №8
Starting parse page №9
Ending parse page №9
Starting parse page №10
Ending parse page №10
Starting parse page №11
Ending parse page №11
Starting parse page №12
Ending parse page №12
Starting parse page №13
Ending parse page №13
Starting parse page №14
Ending parse page №14
Starting parse page №15
Ending parse page №15
Starting parse page №16
Ending parse page №16
Starting parse page №17
Ending parse page №17
Starting parse page №18
Ending parse page №18
Starting parse page №19
Ending parse page №19
Starting parse page №20
Ending parse page №20
All job is done!


In [94]:
# Adding data in postgres
engine = sqlalchemy.create_engine('postgresql://postgres:postgres@localhost:5432')
df_real_estate_multi.to_sql('zoopla', engine, index=False)

500