### Install webscraping Library 

In [1]:
import requests
from bs4 import BeautifulSoup

### Let's Scrap Airbnb Website

In [2]:
# Creating a fuction to collect listing data from first page
def get_listings(search_page):
    answer = requests.get(search_page, timeout=5)
    content = answer.content
    soup = BeautifulSoup(content, 'html.parser')
    listings = soup.find_all('div', 'cm4lcvy dir dir-ltr')
    return listings

In [3]:
#To simplify the Scrapping process
#I entered date,no of people and destination in website 

#Below URL with Destination:Hawai Date:Aug26 to Sep:5 No_Of People-4 
#https://www.airbnb.ca/s/Hawaii--United-States/homes?place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&refinement_paths%5B%5D=%2Fhomes&tab_id=home_tab&query=Hawaii%2C%20United%20States&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&source=structured_search_input_header&search_type=filter_change&checkin=2022-08-26&checkout=2022-09-06&adults=4

#lets use the above url for scrapping
airbnb_url = 'https://www.airbnb.ca/s/Hawaii--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=april&flexible_trip_dates%5B%5D=may&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&query=Hawaii%2C%20United%20States&place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&checkin=2022-08-26&checkout=2022-09-06&adults=4&source=structured_search_input_header&search_type=autocomplete_click'

In [4]:
# Add the URl to the function to het the data in html format 
listings = get_listings(airbnb_url)

In [5]:
#Check the scrap data 
#print(listings)

In [6]:
#lets scrap the data based on css class
#The Below way we can extract data in different column so required less cleaning at the end
#store all the data in a dictionary with headings

def extract_basic_features(listing_html):
    
    features_dict = {}
    
    #If data available then store in below format
    try:
        listing_Location = listing_html.find("div", {"class": "cuu4odx c1frjvtt dir dir-ltr"}).text 
   
    #if data not available then write "Empty" 
    except:
        listing_Location = 'empty'
        
    try:
        Listing_Name = listing_html.find("div", {"class": "c1bx80b8 dir dir-ltr"}).text
    except:
        Listing_Name = 'empty'
    
    try:
        Features = listing_html.find("div", {"class": "i1wgresd dir dir-ltr"}).text
    except:
        Features = 'empty'
        
    try:
        Price = listing_html.find("span", {"class": "a8jt5op dir dir-ltr"}).text
    except:
        Price = 'empty'
    
    try:
        Rating_Review = listing_html.find("div", {"class": "sglmc5a dir dir-ltr"}).text
    except:
        Rating_Review = 'empty'
        
    
    features_dict['listing_Location'] = listing_Location
    features_dict['Listing_Name'] = Listing_Name
    features_dict['Features'] = Features
    features_dict['Price'] = Price
    features_dict['Rating_Review'] = Rating_Review
    
    return features_dict

In [7]:
# Create a fuction which collects all the URL of 15 different pages.
def build_urls(main_url, listings_per_page=20, pages_per_location=15):
    url_list = []
    for i in range(pages_per_location):
        offset = listings_per_page * i
        url_pagination = main_url + f'&items_offset={offset}'
        url_list.append(url_pagination)
    
    return url_list

In [8]:
# Add scrapping website URL in function
url_list = build_urls(airbnb_url)

In [9]:
# let's check the URL List for all 15 Pages
#url_list

In [10]:
# Add the get_listing fuction and and collect all the listing data from each page
def process_search_pages(url_list):
    features_list = []
    for page in url_list:
        listings = get_listings(page)
        for listing in listings:
            features = extract_basic_features(listing)
            features_list.append(features)

    return features_list

In [11]:
#Add url list so we can see all the listing text
Airbnb_Data = process_search_pages(url_list)

In [12]:
#lets check all the feature of listings
#Airbnb_Data

In [13]:
# import pandas library
import pandas as pd

In [14]:
#convert the data into the dataframe
Airbnb_Data = pd.DataFrame(Airbnb_Data)
#Airbnb_Data

In [15]:
#lets Store the data into Csv file
Airbnb_Data.to_csv('C:/Users/pooja/OneDrive/Desktop/AirBnb - Web Scraping & Data Analysis/Airbnb_WebScrap_Data.csv')