### Install webscraping Library 

In [1]:
import requests
from bs4 import BeautifulSoup

### Let's Scrap Airbnb Website

In [2]:
# Creating a fuction to collect listing data from first page
def get_listings(search_page):
    answer = requests.get(search_page, timeout=5)
    content = answer.content
    soup = BeautifulSoup(content, 'html.parser')
    listings = soup.find_all('div', 'cm4lcvy dir dir-ltr')
    return listings

In [3]:
#To simplify the Scrapping process
#I entered date,no of people and destination in website 

#Below URL with Destination:Hawai Date:Aug26 to Sep:5 No_Of People-4 
#https://www.airbnb.ca/s/Hawaii--United-States/homes?place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&refinement_paths%5B%5D=%2Fhomes&tab_id=home_tab&query=Hawaii%2C%20United%20States&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&source=structured_search_input_header&search_type=filter_change&checkin=2022-08-26&checkout=2022-09-06&adults=4

#lets use the above url for scrapping
airbnb_url = 'https://www.airbnb.ca/s/Hawaii--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=april&flexible_trip_dates%5B%5D=may&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&query=Hawaii%2C%20United%20States&place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&checkin=2022-08-26&checkout=2022-09-06&adults=4&source=structured_search_input_header&search_type=autocomplete_click'

In [4]:
# Add the URl to the function to het the data in html format 
listings = get_listings(airbnb_url)

In [5]:
#Check the scrap data 
print(listings)

[<div class="cm4lcvy dir dir-ltr" role="group"><a aria-labelledby="title_7651691" class="l8au1ct dir dir-ltr" href="/rooms/7651691?adults=4&amp;check_in=2022-08-26&amp;check_out=2022-09-06&amp;previous_page_section_name=1000&amp;federated_search_id=2548e800-d63e-4ffc-b71d-4c9e1367f6cf" lang="en" rel="noopener noreferrer" target="listing_7651691"></a><div class="m28cx3m dir dir-ltr"><div class="cza9200 cf0wwk1 cccwndf cof42p7 c1ex7ql7 dir dir-ltr" style="--media-container_border-radius:var(--i-g-gvoq)"><div class="_1mx6kqf" style="background:var(--d-nc-lt-s);--dls-basecard-padding-top:66.6667%"><div class="_1szwzht"><div class="_v0gz4uz" style="--dls-liteimage-padding-top:66.6667%"><div class="_4626ulj"><picture><source media="(max-width: 743px)" srcset="https://a0.muscache.com/im/pictures/72a1c236-b44b-4f12-bb3c-2f99b27e72e0.jpg?im_w=720 1x"/><source media="(min-width: 743.1px) and (max-width: 1127px)" srcset="https://a0.muscache.com/im/pictures/72a1c236-b44b-4f12-bb3c-2f99b27e72e0.jpg

In [6]:
#lets scrap the data based on css class
#The Below way we can extract data in different column so required less cleaning at the end
#store all the data in a dictionary with headings

def extract_basic_features(listing_html):
    
    features_dict = {}
    
    #If data available then store in below format
    try:
        listing_Location = listing_html.find("div", {"class": "cuu4odx c1frjvtt dir dir-ltr"}).text 
   
    #if data not available then write "Empty" 
    except:
        listing_Location = 'empty'
        
    try:
        Listing_Name = listing_html.find("div", {"class": "c1bx80b8 dir dir-ltr"}).text
    except:
        Listing_Name = 'empty'
    
    try:
        Features = listing_html.find("div", {"class": "i1wgresd dir dir-ltr"}).text
    except:
        Features = 'empty'
        
    try:
        Price = listing_html.find("span", {"class": "a8jt5op dir dir-ltr"}).text
    except:
        Price = 'empty'
    
    try:
        Rating_Review = listing_html.find("div", {"class": "sglmc5a dir dir-ltr"}).text
    except:
        Rating_Review = 'empty'
        
    
    features_dict['listing_Location'] = listing_Location
    features_dict['Listing_Name'] = Listing_Name
    features_dict['Features'] = Features
    features_dict['Price'] = Price
    features_dict['Rating_Review'] = Rating_Review
    
    return features_dict

In [7]:
# Create a fuction which collects all the URL of 15 different pages.
def build_urls(main_url, listings_per_page=20, pages_per_location=15):
    url_list = []
    for i in range(pages_per_location):
        offset = listings_per_page * i
        url_pagination = main_url + f'&items_offset={offset}'
        url_list.append(url_pagination)
    
    return url_list

In [8]:
# Add scrapping website URL in function
url_list = build_urls(airbnb_url)

In [9]:
# let's check the URL List for all 15 Pages
url_list

['https://www.airbnb.ca/s/Hawaii--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=april&flexible_trip_dates%5B%5D=may&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&query=Hawaii%2C%20United%20States&place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&checkin=2022-08-26&checkout=2022-09-06&adults=4&source=structured_search_input_header&search_type=autocomplete_click&items_offset=0',
 'https://www.airbnb.ca/s/Hawaii--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D=april&flexible_trip_dates%5B%5D=may&flexible_trip_lengths%5B%5D=weekend_trip&date_picker_type=calendar&query=Hawaii%2C%20United%20States&place_id=ChIJBeB5Twbb_3sRKIbMdNKCd0s&checkin=2022-08-26&checkout=2022-09-06&adults=4&source=structured_search_input_header&search_type=autocomplete_click&items_offset=20',
 'https://www.airbnb.ca/s/Hawaii--United-States/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_dates%5B%5D

In [10]:
# Add the get_listing fuction and and collect all the listing data from each page
def process_search_pages(url_list):
    features_list = []
    for page in url_list:
        listings = get_listings(page)
        for listing in listings:
            features = extract_basic_features(listing)
            features_list.append(features)

    return features_list

In [11]:
#Add url list so we can see all the listing text
Airbnb_Data = process_search_pages(url_list)

In [12]:
#lets check all the feature of listings
Airbnb_Data

[{'listing_Location': 'Entire rental unit in Kailua-Kona',
  'Listing_Name': '★ Huge 3br/3ba ★ Amazing Ocean Views ★ Top rated ★',
  'Features': '8 guests · 3 bedrooms · 5 beds · 3 baths',
  'Price': ',&nbsp;',
  'Rating_Review': '4.87\xa0(187 reviews)'},
 {'listing_Location': 'Entire home in Mountain View',
  'Listing_Name': 'A/C All Rms/WorkStations/Dishwasher AlohaHaleNohea',
  'Features': '8 guests · 3 bedrooms · 3 beds · 2 baths',
  'Price': '$332 CAD per night, originally $358',
  'Rating_Review': '4.96\xa0(217 reviews)'},
 {'listing_Location': 'Entire bungalow in Pepeekeo',
  'Listing_Name': 'Heavenly Hakalau: Oceanfront Cliff House',
  'Features': '4 guests · 2 bedrooms · 2 beds · 2 baths',
  'Price': '$742 CAD per night',
  'Rating_Review': '4.97\xa0(154 reviews)'},
 {'listing_Location': 'Entire rental unit in Kihei',
  'Listing_Name': '3 minutes walk to the beach 2BR/2BA Top floor',
  'Features': '4 guests · 2 bedrooms · 3 beds · 2 baths',
  'Price': '$496 CAD per night',
  '

In [13]:
# import pandas library
import pandas as pd

In [14]:
#convert the data into the dataframe
Airbnb_Data = pd.DataFrame(Airbnb_Data)
Airbnb_Data

Unnamed: 0,listing_Location,Listing_Name,Features,Price,Rating_Review
0,Entire rental unit in Kailua-Kona,★ Huge 3br/3ba ★ Amazing Ocean Views ★ Top rat...,8 guests · 3 bedrooms · 5 beds · 3 baths,",&nbsp;",4.87 (187 reviews)
1,Entire home in Mountain View,A/C All Rms/WorkStations/Dishwasher AlohaHaleN...,8 guests · 3 bedrooms · 3 beds · 2 baths,"$332 CAD per night, originally $358",4.96 (217 reviews)
2,Entire bungalow in Pepeekeo,Heavenly Hakalau: Oceanfront Cliff House,4 guests · 2 bedrooms · 2 beds · 2 baths,$742 CAD per night,4.97 (154 reviews)
3,Entire rental unit in Kihei,3 minutes walk to the beach 2BR/2BA Top floor,4 guests · 2 bedrooms · 3 beds · 2 baths,$496 CAD per night,4.85 (171 reviews)
4,Entire guest suite in Honolulu,Honolulu Hale with free parking,6 guests · 2 bedrooms · 2 beds · 1 bath,$379 CAD per night,4.83 (124 reviews)
...,...,...,...,...,...
295,Entire loft in Honolulu,Unique opportunity to rent a New York style loft!,6 guests · 2 bedrooms · 2 beds · 1.5 baths,"$834 CAD per night, originally $904",4.72 (18 reviews)
296,Entire cottage in Paia,LunaHouse In The Heart of Paia,4 guests · 2 bedrooms · 2 beds · 1 bath,"$466 CAD per night, originally $504",5.0 (74 reviews)
297,Entire condo in Honolulu,NEW 2-bedroom condo at Waikiki/Wifi/Kitchen,5 guests · 2 bedrooms · 3 beds · 1 bath,$419 CAD per night,empty
298,Entire condo in Kihei,OCEAN VIEW 2 BR 2 BATH PENTHOUSE KAM 2/3 BEACH,4 guests · 2 bedrooms · 2 beds · 2 baths,$635 CAD per night,4.60 (5 reviews)


In [15]:
#lets Store the data into Csv file
Airbnb_Data.to_csv('C:/Users/pooja/OneDrive/Desktop/AirBnb - Web Scraping & Data Analysis/airbnb_WebScrap_data.csv')