In [1]:
# We're going to fetch data from the website
# using beautifulsoup, requests and pandas

from bs4 import BeautifulSoup as bs
import grequests
import requests
import pandas as pd

In [2]:
imo = requests.get('https://www.imovirtual.com/arrendar/apartamento/') # request http
raw_html = imo.text # convert webpage code into raw text
soup = bs(raw_html) # soupify page text

# find number of pages
for i in soup.find_all('ul', class_='pager'):
    pages = int(i.text.split()[-1])
    
# initialize url list with first page for subsequent appending 
urls = ['https://www.imovirtual.com/arrendar/apartamento/'] 

# append further pages links
for page in range(2,pages+1):
    urls.append('https://www.imovirtual.com/arrendar/apartamento/?page=' + str(page))
    
# initialize lists for house prices, types, locations and sizes. We'll also store links
prices = []
types = []
location = []
sizes = [] 
links = []

for u in urls:
    imo = requests.get(u) # request http
    raw_html = imo.text # convert webpage code into raw text
    soup = bs(raw_html) # soupify page text

    # Find the desired data: price, location (concelho), typology and size (m2)

    # price
    for price in soup.find_all('li', class_="offer-item-price"): 
        prices.append(price.string.split('€')[0].replace(' ','').strip())

    #type 
    for ty in soup.find_all('li', class_="offer-item-rooms hidden-xs"):
        types.append(ty.string)

    #location
    for loc in soup.find_all('p', class_="text-nowrap"):
        location.append(loc.text.split('Apartamento para arrendar: ')[1])

    #size (m2)
    for size in soup.find_all('li', class_="hidden-xs offer-item-area"):
        sizes.append(size.next.split(' ')[0]) 
        
    for link in soup.find_all('header', class_="offer-item-header"):
        links.append(link.a['href'])
        
all_houses = [] # list where all houses will be
all_houses.append([prices, types, location, sizes, links])
columns = ['price','type','location','size','link'] # columns names for the dataframe
all_data = pd.DataFrame(all_houses[0]) # create datafame with the list created
all_data = all_data.transpose() # transpose dataframe
all_data.columns = columns # rename dataframe columns
all_data = all_data.drop_duplicates() # drop duplicate columns
all_data = all_data[all_data['price'] != 'Preçosobconsulta'] # drop houses with negotiable prices

In [3]:
# replace house types with numeric values       
type_dict = {'T2':2, 'T3':3, 'T1':1, 'T4':4, 'T0':0, 'T5':5, 'T6':6, 'T8':8, 'T7':7, 'T10 ou superior':10, 'T9':9}
all_data['type'] = all_data['type'].replace(type_dict)

# split location into municipality and district columns
municipality = []
district = []
for i in all_data.location.str.split(', '):
    try:
        municipality.append(i[-2])
    except:
        municipality.append(i[-1])
        
    try:
        district.append(i[-1])
    except:
        district.append('')

all_data['municipality'] = municipality
all_data['district'] = district
all_data = all_data.drop(columns='location', axis=1)

# change size column to float and price to int
all_data['size'] = all_data['size'].replace(',','.', regex=True).astype('float')
all_data['price'] = all_data['price'].replace(',','.', regex=True).astype('float').round().astype('int')


# reset index to match number of posts
all_data = all_data.reset_index()

# save data to local csv file
all_data.to_csv('house_data.csv', index=False)
all_data

Unnamed: 0,index,price,type,size,link,municipality,district
0,0,650,2,85.0,https://www.imovirtual.com/pt/anuncio/t2-1-vis...,Lordelo do Ouro e Massarelos,Porto
1,1,450,0,30.0,https://www.imovirtual.com/pt/anuncio/t0-cozin...,Bonfim,Porto
2,2,800,3,115.0,https://www.imovirtual.com/pt/anuncio/t3-terra...,Vila Nova de Gaia,Porto
3,3,700,3,86.0,https://www.imovirtual.com/pt/anuncio/t3-mobil...,Vila Nova de Gaia,Porto
4,4,600,1,63.0,https://www.imovirtual.com/pt/anuncio/t1-bons-...,Vila Nova de Gaia,Porto
...,...,...,...,...,...,...,...
11888,11995,1650,3,140.0,https://www.imovirtual.com/pt/anuncio/arrenda-...,Alvalade,Lisboa
11889,11996,450,1,45.0,https://www.imovirtual.com/pt/anuncio/casa-par...,Oeiras,Lisboa
11890,11997,680,2,70.0,https://www.imovirtual.com/pt/anuncio/t2-em-qu...,Sintra,Lisboa
11891,11998,450,0,21.0,https://www.imovirtual.com/pt/anuncio/apartame...,Seixal,Setúbal
