In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests

In [2]:
def extract_basic_features(listing_html):
    url = "https://www.airbnb.com.sg" + listing_html.find('a').get('href')
    header = listing_html.find("div", {"class": "t1jojoys dir dir-ltr"}).get_text()
    name = listing_html.find("div", {"class": "nquyp1l s1cjsi4j dir dir-ltr"}).get_text()
    
    price = listing_html.find("div", {"class": "_1jo4hgw"}).get_text().split("\xa0")[-3].replace("$","")

    beds = listing_html.find_all("span", class_ = "dir dir-ltr")[0].get_text().split(" ")[0]
    
    try:
        ratings = listing_html.find("span", class_ = "t5eq1io r4a59j5 dir dir-ltr").get("aria-label").split(",")
        rate = ratings[0].split(" ")[0]
        review = ratings[1].split(" ")[-2]
    except:
        rate = np.nan
        review = np.nan
    
    pic = listing_html.find_all("source")[0].get("srcset")
    
    features_dict['header'].append(header)
    features_dict['url'].append(url)
    features_dict['name'].append(name)
    features_dict['price'].append(price)
    features_dict['beds'].append(beds)
    features_dict['rate'].append(rate)
    features_dict['review'].append(review)
    features_dict['pic'].append(pic)

In [3]:
def extract_page(url):
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')
    listings = soup.find_all('div', 'cy5jw6o dir dir-ltr')
    
    for i in range(len(listings)):
        extract_basic_features(listings[i])

In [4]:
locations = ['Amsterdam','Paris','Lucerne','Interlaken','Milan']
df_list = []

In [5]:
for location in locations:
    airbnb_url = 'https://www.airbnb.com.sg/s/' + location + '/homes'
    soup = BeautifulSoup(requests.get(airbnb_url).content, 'html.parser')
    next_page = ['https://www.airbnb.com.sg' + x.get('href') for x in soup.find("div",class_="_jro6t0").find_all('a')]
    
    features_dict = {}

    features = ['header','url','name','price','beds','rate','review','pic']

    for feat in features:
        features_dict[feat] = []

    extract_page(airbnb_url)
    extract_page(next_page[0])
    extract_page(next_page[1])

    df = pd.DataFrame(features_dict)
    
    df_list.append(df)

In [6]:
df_list[0]

Unnamed: 0,header,url,name,price,beds,rate,review,pic
0,Loft in Stadsdeel Oost,https://www.airbnb.com.sg/rooms/38012872?adult...,Beautiful Studio by the Amstel,171,1,4.97,68.0,https://a0.muscache.com/im/pictures/miso/Hosti...
1,Private room in Stadsdeel West,https://www.airbnb.com.sg/rooms/19689332?adult...,Private Room. Garden Apartment. Old West Amste...,79,1,4.87,225.0,https://a0.muscache.com/im/pictures/b0e68619-6...
2,Private room in Stadsdeel Centrum,https://www.airbnb.com.sg/rooms/29051?adults=1...,Comfortable single room,90,1,4.79,601.0,https://a0.muscache.com/im/pictures/162009/bd6...
3,Private room in Stadsdeel West,https://www.airbnb.com.sg/rooms/44374975?adult...,Cozy Houseboat Homeward Amsterdam,268,1,4.88,173.0,https://a0.muscache.com/im/pictures/miso/Hosti...
4,Apartment in Stadsdeel Centrum,https://www.airbnb.com.sg/rooms/1320353?adults...,W Spacious split-level 100m2 Jordaan apartment C,233,1,4.91,57.0,https://a0.muscache.com/im/pictures/24494657/f...
5,Flat in Stadsdeel Centrum,https://www.airbnb.com.sg/rooms/22649619?adult...,★ Typical Apartment in the Heart of Amsterdam ★,298,1,5.0,48.0,https://a0.muscache.com/im/pictures/0fb83e89-4...
6,Private room in Stadsdeel Zuid,https://www.airbnb.com.sg/rooms/8660698?adults...,Exclusive studio. Sarphatipark de Pijp,186,1,4.93,388.0,https://a0.muscache.com/im/pictures/110141486/...
7,Flat in Stadsdeel Noord,https://www.airbnb.com.sg/rooms/11857316?adult...,Waterfront / Lots of Privacy / Free Parking!,210,1,4.93,713.0,https://a0.muscache.com/im/pictures/miso/Hosti...
8,Flat in Stadsdeel Centrum,https://www.airbnb.com.sg/rooms/3911407?adults...,Canal View Amsterdam Brouwersgracht apartment,206,2,4.88,76.0,https://a0.muscache.com/im/pictures/49203327/a...
9,Private room in Stadsdeel Centrum,https://www.airbnb.com.sg/rooms/38835232?adult...,Cozy dormer room in the old center,118,1,4.59,139.0,https://a0.muscache.com/im/pictures/c721181e-b...


In [11]:
def getairbnb(destination):
    airbnb_url = 'https://www.airbnb.com.sg/s/' + destination + '/homes'
    soup = BeautifulSoup(requests.get(airbnb_url).content, 'html.parser')
    airbnb = []
    count = 1

    listings = soup.find_all('div', 'cy5jw6o dir dir-ltr')
    for listing_html in listings:
        features_dict = {}
        url = "https://www.airbnb.com.sg" + \
            listing_html.find('a').get('href')
        header = listing_html.find(
            "div", {"class": "t1jojoys dir dir-ltr"}).get_text()
        name = listing_html.find(
            "div", {"class": "nquyp1l s1cjsi4j dir dir-ltr"}).get_text()

        price = listing_html.find("div", {"class": "_1jo4hgw"}).get_text().split(
            "\xa0")[-3].replace("$", "")

        beds = listing_html.find_all(
            "span", class_="dir dir-ltr")[0].get_text().split(" ")[0]

        try:
            ratings = listing_html.find(
                "span", class_="t5eq1io r4a59j5 dir dir-ltr").get("aria-label").split(",")
            rate = ratings[0].split(" ")[0]
            review = ratings[1].split(" ")[-2]
        except:
            rate = np.nan
            review = np.nan

        pic = listing_html.find_all("source")[0].get("srcset")

        features_dict['City'] = destination
        features_dict['Name'] = header
        features_dict['Rating'] = rate
        features_dict['url'] = url
        features_dict['imageurl'] = pic.split(' ')[0]
        features_dict['review'] = review

        count += 1

        airbnb.append(features_dict)

    return airbnb

In [12]:
getairbnb('Amsterdam')

[{'City': 'Amsterdam',
  'Name': 'Flat in Stadsdeel Noord',
  'Rating': '4.93',
  'url': 'https://www.airbnb.com.sg/rooms/11857316?adults=1&children=0&infants=0&pets=0&check_in=2023-08-28&check_out=2023-09-02&previous_page_section_name=1000',
  'imageurl': 'https://a0.muscache.com/im/pictures/miso/Hosting-11857316/original/8cd2920f-2910-4ebc-9a7f-e02c845a90e9.jpeg?im_w=720',
  'review': '713'},
 {'City': 'Amsterdam',
  'Name': 'Private room in Stadsdeel Centrum',
  'Rating': '4.79',
  'url': 'https://www.airbnb.com.sg/rooms/29051?adults=1&children=0&infants=0&pets=0&check_in=2023-07-04&check_out=2023-07-09&previous_page_section_name=1000',
  'imageurl': 'https://a0.muscache.com/im/pictures/162009/bd6be2f8_original.jpg?im_w=720',
  'review': '601'},
 {'City': 'Amsterdam',
  'Name': 'Private room in Stadsdeel West',
  'Rating': '4.87',
  'url': 'https://www.airbnb.com.sg/rooms/19689332?adults=1&children=0&infants=0&pets=0&check_in=2023-01-09&check_out=2023-01-16&previous_page_section_nam