## Scraping Data from Airbnb

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import re
from IPython.display import HTML

### Information per one page

In [2]:
site = 'https://www.airbnb.com/s/Sofia-Center--Sofia--Bulgaria/homes?tab_id=home_tab&refinement_paths[]=%2Fhomes&flexible_trip_lengths[]=one_week&price_filter_input_type=0&price_filter_num_nights=5&query=Sofia%20Center%2C%20Sofia&place_id=ChIJhXxR0W2FqkARqt8NNxumtf8&date_picker_type=calendar&source=structured_search_input_header&search_type=autocomplete_click&federated_search_session_id=a45ecbd5-968b-4aa9-89d5-8729f35f6606&pagination_search=true&cursor=eyJzZWN0aW9uX29mZnNldCI6MCwiaXRlbXNfb2Zmc2V0IjowLCJ2ZXJzaW9uIjoxfQ%3D%3D'

In [3]:
response = requests.get(site)

In [4]:
print(response)

<Response [200]>


In [5]:
# soup object
soup = BeautifulSoup(response.text, 'html.parser')

In [6]:
# results
results = soup.findAll('div', class_ = 'cy5jw6o dir dir-ltr')
len(results)

20

In [7]:
# title
title = results[0].find(class_ = 't1jojoys dir dir-ltr').text
title

'Apartment in Oborishte'

In [8]:
# information
information = results[0].find('span', class_ = 't6mzqp7 dir dir-ltr').text
information

'Designer One-bedroom Maisonette with Best Location'

In [9]:
# bedroom
bedroom = results[0].find('span', class_ = 'dir dir-ltr').text
bedroom

'2 beds'

In [10]:
# price per night
price = results[0].find('span', class_ = 'a8jt5op dir dir-ltr').text
price

'$60 per night, originally $82'

In [11]:
# rating
rating = results[0].find(class_ = 'r1dxllyb dir dir-ltr').text
rating

'4.8 (5)'

In [12]:
# link
link_first_part = 'https://www.airbnb.com'
link_second_part = results[0].find('a').get('href')
link_second_part
link = link_first_part + link_second_part
link

'https://www.airbnb.com/rooms/623662489144351480?adults=1&children=0&infants=0&pets=0&check_in=2022-11-27&check_out=2022-12-02&previous_page_section_name=1000'

In [13]:
titles = []
informations = []
beds = []
prices = []
ratings = []
links = []

In [14]:
for result in results:
    try: 
        title = result.find(class_ = 't1jojoys dir dir-ltr').text
        titles.append(title)
    except:
        titles.append(np.nan)
    
    try:
        information = result.find('span', class_ = 't6mzqp7 dir dir-ltr').text
        informations.append(information)
    except:
        informations.append(np.nan)
        
    try:
        bed = result.find('span', class_ = 'dir dir-ltr').text
        beds.append(bed)
    except:
        beds.append(np.nan)
    
    try:
        price = result.find('span', class_ = 'a8jt5op dir dir-ltr').text
        prices.append(price)
    except:
        prices.append(np.nan)
    
    try:
        rating = result.find(class_ = 'r1dxllyb dir dir-ltr').text
        ratings.append(rating)
    except:
        ratings.append(np.nan)
        
    try:
        link_first_part = 'https://www.airbnb.com'
        link_second_part = results[0].find('a').get('href')
        link = link_first_part + link_second_part
        links.append(link)
    except:
        links.append(np.nan)
        

In [15]:
flats = pd.DataFrame({'Title': titles, 
                                     'Information' : informations, 
                                     'Bedrooms' : beds,
                                     'Price' : prices,
                                     'Rating' : ratings,
                                     'Link' : links})
flats.head()

Unnamed: 0,Title,Information,Bedrooms,Price,Rating,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,"$60 per night, originally $82",4.8 (5),https://www.airbnb.com/rooms/62366248914435148...
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,$52 per night,4.88 (210),https://www.airbnb.com/rooms/62366248914435148...
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,$58 per night,4.9 (20),https://www.airbnb.com/rooms/62366248914435148...
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,$66 per night,4.9 (240),https://www.airbnb.com/rooms/62366248914435148...
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,"$43 per night, originally $52",4.82 (148),https://www.airbnb.com/rooms/62366248914435148...


### Paganation

In [16]:
titles_ = []
informations_ = []
beds_ = []
prices_ = []
ratings_ = []
links_ = []

break_ = False
page = 'https://www.airbnb.com/s/Sofia-Center--Sofia--Bulgaria/homes?tab_id=home_tab&refinement_paths[]=%2Fhomes&flexible_trip_lengths[]=one_week&price_filter_input_type=0&price_filter_num_nights=5&query=Sofia%20Center%2C%20Sofia&place_id=ChIJhXxR0W2FqkARqt8NNxumtf8&date_picker_type=calendar&source=structured_search_input_header&search_type=autocomplete_click&federated_search_session_id=a45ecbd5-968b-4aa9-89d5-8729f35f6606&pagination_search=true&cursor=eyJzZWN0aW9uX29mZnNldCI6MCwiaXRlbXNfb2Zmc2V0IjowLCJ2ZXJzaW9uIjoxfQ%3D%3D'
while True:
    if break_:
        break
        
    response = requests.get(page)
    # soup object
    soup = BeautifulSoup(response.text, 'html.parser')
    
    for result in results:
        try: 
            title = result.find(class_ = 't1jojoys dir dir-ltr').text
            titles_.append(title)
        except:
            titles_.append(np.nan)

        try:
            information = result.find('span', class_ = 't6mzqp7 dir dir-ltr').text
            informations_.append(information)
        except:
            informations_.append(np.nan)

        try:
            bed = result.find('span', class_ = 'dir dir-ltr').text
            beds_.append(bed)
        except:
            beds_.append(np.nan)

        try:
            price = result.find('span', class_ = 'a8jt5op dir dir-ltr').text
            prices_.append(price)
        except:
            prices_.append(np.nan)

        try:
            rating = result.find(class_ = 'r1dxllyb dir dir-ltr').text
            ratings_.append(rating)
        except:
            ratings_.append(np.nan)

        try:
            link_first_part = 'https://www.airbnb.com'
            link_second_part = result.find('a').get('href')
            link = link_first_part + link_second_part
            links_.append(link)
        except:
            links_.append(np.nan)
            
    try:
        navigation = soup.find('a', class_ = '_1bfat5l').get('href')
        next_page = 'https://www.airbnb.com' + navigation
        page = next_page
    except:
        break_ = True

In [17]:
flats_df = pd.DataFrame({'Title': titles_, 
                     'Information' : informations_, 
                     'Beds' : beds_,
                     'Price $' : prices_,
                     'Rating' : ratings_,
                     'Link' : links_})
flats_df.head()

Unnamed: 0,Title,Information,Beds,Price $,Rating,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,"$60 per night, originally $82",4.8 (5),https://www.airbnb.com/rooms/62366248914435148...
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,$52 per night,4.88 (210),https://www.airbnb.com/rooms/21230782?adults=1...
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,$58 per night,4.9 (20),https://www.airbnb.com/rooms/63607034505952489...
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,$66 per night,4.9 (240),https://www.airbnb.com/rooms/35245061?adults=1...
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,"$43 per night, originally $52",4.82 (148),https://www.airbnb.com/rooms/21658495?adults=1...


In [18]:
flats_df.tail()

Unnamed: 0,Title,Information,Beds,Price $,Rating,Link
295,Apartment in Sofia,"Hip Studio in the Clouds, Supreme Location",Nov 20 – 25,$42 per night,4.51 (76),https://www.airbnb.com/rooms/45812997?adults=1...
296,Apartment in Oborishte,Cozy One-bedroom Apartment with Amazing Terrace,1 queen bed,"$45 per night, originally $65",4.48 (23),https://www.airbnb.com/rooms/52546849?adults=1...
297,Apartment in Oborishte,Georgi's Guest studio,1 double bed,$30 per night,4.86 (168),https://www.airbnb.com/rooms/44777551?adults=1...
298,Apartment in Oborishte,Luxury furnishings top center Vitosha blvd for...,2 beds,$72 per night,4.82 (34),https://www.airbnb.com/rooms/54776695559782153...
299,Apartment in Sofia,The Bookies Apartment with terrace NDK,2 beds,$51 per night,4.92 (53),https://www.airbnb.com/rooms/20022758?adults=1...


### Data Cleaning

In [19]:
flats_df['New_Price $'] = flats_df['Price $'].str.split().str[0].str.split('$').str[1]

In [20]:
flats_df = flats_df.drop(columns = 'Price $' )
flats_df.head()

Unnamed: 0,Title,Information,Beds,Rating,Link,New_Price $
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,4.8 (5),https://www.airbnb.com/rooms/62366248914435148...,60
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,4.88 (210),https://www.airbnb.com/rooms/21230782?adults=1...,52
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,4.9 (20),https://www.airbnb.com/rooms/63607034505952489...,58
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,4.9 (240),https://www.airbnb.com/rooms/35245061?adults=1...,66
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,4.82 (148),https://www.airbnb.com/rooms/21658495?adults=1...,43


In [21]:
# define function to swap columns
def swap_columns(df, col1, col2):
    col_list = list(df.columns)
    x, y = col_list.index(col1), col_list.index(col2)
    col_list[y], col_list[x] = col_list[x], col_list[y]
    df = df[col_list]
    return df
 
# swap points and rebounds columns
flats_df = swap_columns(flats_df, 'Link', 'New_Price $')
flats_df.head()

Unnamed: 0,Title,Information,Beds,Rating,New_Price $,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,4.8 (5),60,https://www.airbnb.com/rooms/62366248914435148...
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,4.88 (210),52,https://www.airbnb.com/rooms/21230782?adults=1...
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,4.9 (20),58,https://www.airbnb.com/rooms/63607034505952489...
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,4.9 (240),66,https://www.airbnb.com/rooms/35245061?adults=1...
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,4.82 (148),43,https://www.airbnb.com/rooms/21658495?adults=1...


In [22]:
# rename column
flats_df = flats_df.rename(columns = {'New_Price $' : 'Price $'})
flats_df.head()

Unnamed: 0,Title,Information,Beds,Rating,Price $,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,4.8 (5),60,https://www.airbnb.com/rooms/62366248914435148...
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,4.88 (210),52,https://www.airbnb.com/rooms/21230782?adults=1...
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,4.9 (20),58,https://www.airbnb.com/rooms/63607034505952489...
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,4.9 (240),66,https://www.airbnb.com/rooms/35245061?adults=1...
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,4.82 (148),43,https://www.airbnb.com/rooms/21658495?adults=1...


In [23]:
flats_df['Price $'] = pd.to_numeric(flats_df['Price $'], errors = 'coerce')
flats_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Title        300 non-null    object
 1   Information  300 non-null    object
 2   Beds         300 non-null    object
 3   Rating       300 non-null    object
 4   Price $      300 non-null    int64 
 5   Link         300 non-null    object
dtypes: int64(1), object(5)
memory usage: 14.2+ KB


In [24]:
flats_df.head()

Unnamed: 0,Title,Information,Beds,Rating,Price $,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Loca...,2 beds,4.8 (5),60,https://www.airbnb.com/rooms/62366248914435148...
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,4.88 (210),52,https://www.airbnb.com/rooms/21230782?adults=1...
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,4.9 (20),58,https://www.airbnb.com/rooms/63607034505952489...
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,4.9 (240),66,https://www.airbnb.com/rooms/35245061?adults=1...
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,4.82 (148),43,https://www.airbnb.com/rooms/21658495?adults=1...


In [25]:
HTML(flats_df.to_html(render_links=True, escape=False))

Unnamed: 0,Title,Information,Beds,Rating,Price $,Link
0,Apartment in Oborishte,Designer One-bedroom Maisonette with Best Location,2 beds,4.8 (5),60,https://www.airbnb.com/rooms/623662489144351480?adults=1&children=0&infants=0&pets=0&check_in=2022-11-27&check_out=2022-12-02&previous_page_section_name=1000
1,Apartment in Oborishte,Rila - Top Center Apartment,1 double bed,4.88 (210),52,https://www.airbnb.com/rooms/21230782?adults=1&children=0&infants=0&pets=0&check_in=2022-11-20&check_out=2022-11-25&previous_page_section_name=1000
2,Apartment in Oborishte,Disain&Qaliti at the Best Location (Vitosha Blvd),1 bed,4.9 (20),58,https://www.airbnb.com/rooms/636070345059524894?adults=1&children=0&infants=0&pets=0&check_in=2022-12-02&check_out=2022-12-08&previous_page_section_name=1000
3,Apartment in Oborishte,Trendy & Comfy Studio at Vitosha BLVD(Top Center),1 bed,4.9 (240),66,https://www.airbnb.com/rooms/35245061?adults=1&children=0&infants=0&pets=0&check_in=2022-11-27&check_out=2022-12-02&previous_page_section_name=1000
4,Apartment in Oborishte,UNIQUE Atmosphere AUTHENTIC CENTER,2 beds,4.82 (148),43,https://www.airbnb.com/rooms/21658495?adults=1&children=0&infants=0&pets=0&check_in=2022-12-14&check_out=2022-12-21&previous_page_section_name=1000
5,Apartment in Sofia,Top Central Supreme Apartment next to NDK,1 queen bed,4.99 (68),65,https://www.airbnb.com/rooms/41302921?adults=1&children=0&infants=0&pets=0&check_in=2022-11-16&check_out=2022-11-21&previous_page_section_name=1000
6,Apartment in Oborishte,B(11) Smart&Modern/Top Central/Free Parking/NEW!,2 beds,4.95 (87),95,https://www.airbnb.com/rooms/41578678?adults=1&children=0&infants=0&pets=0&check_in=2022-11-30&check_out=2022-12-05&previous_page_section_name=1000
7,Apartment in Oborishte,Cosy and modern apartment in the heart of the city,3 queen beds,4.89 (163),42,https://www.airbnb.com/rooms/38271301?adults=1&children=0&infants=0&pets=0&check_in=2023-01-07&check_out=2023-01-12&previous_page_section_name=1000
8,Apartment in Oborishte,"Beautiful, stylish and new apt on Vitosha Blvd",1 bed,5.0 (4),59,https://www.airbnb.com/rooms/612711283066492886?adults=1&children=0&infants=0&pets=0&check_in=2022-11-28&check_out=2022-12-03&previous_page_section_name=1000
9,Apartment in Oborishte,STYLISH FLAT in the heart of Sofia,1 queen bed,4.92 (59),49,https://www.airbnb.com/rooms/34331295?adults=1&children=0&infants=0&pets=0&check_in=2022-11-27&check_out=2022-12-02&previous_page_section_name=1000


In [27]:
flats_df.to_excel('flats_Sofia_Center.xlsx', index = False)