# Airbnb Data Scraping 

In [1]:
url = 'https://www.airbnb.co.uk/s/Italy/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_lengths%5B%5D=one_week&price_filter_input_type=0&price_filter_num_nights=5&query=Italy&place_id=ChIJA9KNRIL-1BIRb15jJFz1LOI&date_picker_type=calendar&source=structured_search_input_header&search_type=autocomplete_click'

In [2]:
import requests 
from bs4 import BeautifulSoup 

In [3]:
def open_url(url): 
    return BeautifulSoup(requests.get(url).text, "html.parser")

page = open_url(url)


In [4]:
def scrape_package_title(page):
    packages = page.find_all(attrs={'class': 't1jojoys'})
    packages = [package.get_text() for package in packages]
    return packages



In [5]:
def scrape_package_description(page):
    packages = page.find_all(attrs={'class': 't6mzqp7 dir dir-ltr'})
    packages = [package.get_text() for package in packages]
    return packages

In [6]:
def scrape_package_rating(page):
    packages = page.find_all(attrs={'class': 'g1qv1ctd cb4nyux dir dir-ltr'})
    packages = [package.get_text().split('price breakdown')[-1] for package in packages]
    return packages

In [7]:
def scrape_package_price(page):
    packages = page.find_all(attrs={'class': '_1jo4hgw'})
    packages = [package.get_text().replace('\xa0night', '').split('\xa0')[-1] for package in packages]

    return packages

In [8]:
def scrape_package_duration(page):
    packages = page.find_all(attrs={'class': 'f15liw5s s1cjsi4j dir dir-ltr'})
    packages = [package.get_text() for package in packages]
    type_of_host, available_dates = [], []
    if '' in packages: 
        packages.remove('')
        for i in range(0, len(packages),2):
            if 'host' in packages[i].lower(): 
                type_of_host.append(packages[i])
            else:
                type_of_host.append("information missing")
            available_dates.append(packages[i+1])
    else: 
        for i in range(0, len(packages),2):
            if 'host' in packages[i].lower(): 
                type_of_host.append(packages[i])
            else:
                type_of_host.append("information missing")
            available_dates.append(packages[i+1])
            
            
    return type_of_host, available_dates

In [9]:
import pandas as pd 

pages = 2

def scrape_airbnb(url, pages):
    scrape_data_dict = pd.DataFrame()
    page = open_url(url)
    scrape_data_dict["Title"] = scrape_package_title(page)
    scrape_data_dict["Description"] = scrape_package_description(page)
    scrape_data_dict["Rating"] = scrape_package_rating(page)
    scrape_data_dict["Price"] = scrape_package_price(page)
    scrape_data_dict["Type of Host"], scrape_data_dict["Available Dates"] = scrape_package_duration(page)
    
    for i in range(0,pages-1):
        next_url = 'https://www.airbnb.co.uk' + page.find_all(attrs={'aria-label':'Next'})[-1].attrs['href'] 
        temporarily_dataframe = pd.DataFrame()
        page = open_url(next_url)
        temporarily_dataframe["Title"] = scrape_package_title(page)
        temporarily_dataframe["Description"] = scrape_package_description(page)
        temporarily_dataframe["Rating"] = scrape_package_rating(page)
        temporarily_dataframe["Price"] = scrape_package_price(page) 
        temporarily_dataframe["Type of Host"], temporarily_dataframe["Available Dates"] = scrape_package_duration(page)
        # display(temporarily_dataframe)
        # scrape_data_dict.append(temporarily_dataframe, ignore_index = True)
        scrape_data_dict = pd.concat([scrape_data_dict, temporarily_dataframe], axis = 0)
        
    scrape_data_dict.reset_index(drop = True, inplace = True)
    return scrape_data_dict

data_frame = scrape_airbnb(url, pages)
data_frame
    


Unnamed: 0,Title,Description,Rating,Price,Type of Host,Available Dates
0,Farm stay in Pierantonio,La Stalla - Casa San Gabriel,4.95 (39),£115,Individual Host,1–6 Feb
1,Flat in Rome,Stylish Roman Loft with Piano | Steps to Colos...,4.98 (199),£200,Individual Host,28 Mar – 2 Apr
2,Tiny home in Bolzano,Mirror House North,4.93 (68),£227,Individual Host,1–6 Feb
3,Flat in Stresa,Villa Niobe | Exclusive Flat With Private Beach,4.83 (35),£211,Individual Host,1–8 Mar
4,Private room in Vigo di Fassa,Room Stella Alpina in Agriturismo Ecogreen,4.93 (30),£159,Professional Host,5–10 Feb
5,Farm stay in Provincia di Siena,"La Corte, 1 bedroom cozy apartment",,£83,Professional Host,1–8 Mar
6,Trullo in Locorotondo,Private Trullo Suite Martodda,4.85 (157),£88,Professional Host,12–17 Apr
7,Castle in Tuscania (VT),San Giusto Abbey { medieval Tower },4.99 (318),£203,Professional Host,3–8 Feb
8,Farm stay in Sillico,Romantic stay where Tuscany meets the sky!,4.96 (111),£103,Professional Host,1–6 Feb
9,Home in Cerreto Guidi,Casa del Giardino,4.95 (349),£86,Individual Host,1–6 Feb
