# airbnb Scrape

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL to scrape
url = "https://www.airbnb.com/s/Southwest-Florida-International-Airport-RSW--Terminal-Access-Road--Fort-Myers--FL--USA/homes?tab_id=home_tab&refinement_paths%5B%5D=%2Fhomes&flexible_trip_lengths%5B%5D=one_week&monthly_start_date=2024-08-01&monthly_length=3&monthly_end_date=2024-11-01&price_filter_input_type=0&channel=EXPLORE&query=Southwest%20Florida%20International%20Airport%20%28RSW%29%2C%20Fort%20Myers%2C%20FL&place_id=ChIJgQUlvz8T24gRRk4VTw6fGPw&location_bb=QdRQtcKjgZ1B1CH%2BwqODFw%3D%3D&date_picker_type=calendar&checkin=2024-11-01&checkout=2024-12-31&source=structured_search_input_header&search_type=autocomplete_click"

# Send a GET request to the URL
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

# Extract data using the provided HTML paths and classes
property_id = url.split('/')[-1].split('?')[0]

# Use try-except blocks to handle missing elements
try:
    title = soup.select_one('div.t1jojoys').text
except AttributeError:
    title = None

try:
    price = soup.select_one('span.a8jt5op').text
except AttributeError:
    price = None

total_price = price  # Assuming total price is the same as price for simplicity

try:
    latitude = soup.select_one('meta[property="airbedandbreakfast:location:latitude"]')['content']
except TypeError:
    latitude = None

try:
    longitude = soup.select_one('meta[property="airbedandbreakfast:location:longitude"]')['content']
except TypeError:
    longitude = None

latlong = f"{latitude},{longitude}" if latitude and longitude else None

try:
    rating_badge = soup.select_one('span[data-testid="rating-badge"]').text
except AttributeError:
    rating_badge = None

try:
    rating_description = soup.select_one('span[data-testid="rating-description"]').text
except AttributeError:
    rating_description = None

try:
    number_of_reviews = soup.select_one('span[data-testid="number-of-reviews"]').text
except AttributeError:
    number_of_reviews = None

checkin_date = "2024-11-01"
checkout_date = "2024-12-31"
state = "FL"  # Assuming state is Florida

try:
    sleeps = soup.select_one('div[data-testid="listing-card-subtitle"]').text.split(' ')[0]
except AttributeError:
    sleeps = None

try:
    bedrooms = soup.select_one('div[data-testid="listing-card-subtitle"]').text.split(' ')[2]
except AttributeError:
    bedrooms = None

try:
    bathrooms = soup.select_one('div[data-testid="listing-card-subtitle"]').text.split(' ')[4]
except AttributeError:
    bathrooms = None

# Create a DataFrame
data = [[property_id, title, url, price, total_price, latitude, longitude, latlong, rating_badge, rating_description, number_of_reviews, checkin_date, checkout_date, state, sleeps, bedrooms, bathrooms]]
columns = ["Property ID", "Title", "URL", "Price", "TotalPrice", "Latitude", "Longitude", "LatLong", "Rating Badge", "Rating Description", "Number of Reviews", "Check-in Date", "Check-out Date", "State", "Sleeps", "Bedrooms", "Bathrooms"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('airbnb_data.csv', index=False)




In [16]:
import pandas as pd

# Read the CSV file
df = pd.read_csv('listings-3.csv')

# Create a new DataFrame with the required columns
new_df = pd.DataFrame({
    'Property ID': df['id'],
    'Title': df['name'],
    'URL': df['listing_url'],
    'Price': df['price'],
    'TotalPrice': df['price'],  # Assuming TotalPrice is the same as Price
    'Latitude': df['latitude'],
    'Longitude': df['longitude'],
    'LatLong': df['latitude'].astype(str) + ', ' + df['longitude'].astype(str),
    'Rating Badge': df['review_scores_rating'],  # Assuming Rating Badge is the same as review_scores_rating
    'Rating Description': df['review_scores_rating'],  # Assuming Rating Description is the same as review_scores_rating
    'Number of Reviews': df['number_of_reviews'],
    'Check-in Date': 'N/A',  # Marking as N/A
    'Check-out Date': 'N/A',  # Marking as N/A
    'Region ID': 'N/A',  # Marking as N/A
    'Destination': df['neighbourhood_cleansed'],
    'Price (top_dp)': df['price'],  # Assuming Price (top_dp) is the same as Price
    'Currency (top_cur)': 'N/A',  # Marking as N/A
    'Room Type': df['room_type'],
    'Rate Plan': 'N/A',  # Marking as N/A
    'Neighborhood ID': df['neighbourhood_cleansed'],  # Assuming Neighborhood ID is the same as neighbourhood_cleansed
    'Privacy Tracking State': 'N/A',  # Marking as N/A
    'Search ID': 'N/A',  # Marking as N/A
    'User Intent': 'N/A',  # Marking as N/A
    'Referrer URL': 'N/A',  # Marking as N/A
    'PWA Timestamp': 'N/A',  # Marking as N/A
    'Sleeps': df['accommodates'],
    'Bedrooms': df['bedrooms'],
    'Bathrooms': df['bathrooms_text']
})

# Write the new DataFrame to a CSV file
new_df.to_csv('airbnb_data.csv', index=False)


In [19]:
import pandas as pd

# Read the listings.csv file with a tab delimiter
listings_df = pd.read_csv('listings.csv')

# Print the column names to verify
print(listings_df.columns)

# Create a new DataFrame with the required columns from listings.csv
new_listings_df = pd.DataFrame({
    'Property ID': listings_df['id'],
    'Title': listings_df['name'],
    'URL': 'N/A',  # Marking as N/A
    'Price': listings_df['price'],
    'TotalPrice': listings_df['price'],  # Assuming TotalPrice is the same as Price
    'Latitude': listings_df['latitude'],
    'Longitude': listings_df['longitude'],
    'LatLong': listings_df['latitude'].astype(str) + ', ' + listings_df['longitude'].astype(str),
    'Rating Badge': 'N/A',  # Marking as N/A
    'Rating Description': 'N/A',  # Marking as N/A
    'Number of Reviews': listings_df['number_of_reviews'],
    'Check-in Date': 'N/A',  # Marking as N/A
    'Check-out Date': 'N/A',  # Marking as N/A
    'Region ID': 'N/A',  # Marking as N/A
    'Destination': listings_df['neighbourhood'],
    'Price (top_dp)': listings_df['price'],  # Assuming Price (top_dp) is the same as Price
    'Currency (top_cur)': 'N/A',  # Marking as N/A
    'Room Type': listings_df['room_type'],
    'Rate Plan': 'N/A',  # Marking as N/A
    'Neighborhood ID': listings_df['neighbourhood'],  # Assuming Neighborhood ID is the same as neighbourhood
    'Privacy Tracking State': 'N/A',  # Marking as N/A
    'Search ID': 'N/A',  # Marking as N/A
    'User Intent': 'N/A',  # Marking as N/A
    'Referrer URL': 'N/A',  # Marking as N/A
    'PWA Timestamp': 'N/A',  # Marking as N/A
    'Sleeps': listings_df['minimum_nights'],  # Assuming Sleeps is the same as minimum_nights
    'Bedrooms': 'N/A',  # Marking as N/A
    'Bathrooms': 'N/A'  # Marking as N/A
})

# Read the existing airbnb_data.csv file
airbnb_df = pd.read_csv('airbnb_data.csv')

# Append the new listings to the existing airbnb_data.csv
updated_airbnb_df = pd.concat([airbnb_df, new_listings_df], ignore_index=True)

# Write the updated DataFrame back to airbnb_data.csv
updated_airbnb_df.to_csv('airbnb_data.csv', index=False)


Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365'],
      dtype='object')


In [21]:
import pandas as pd

# Read the airbnb.csv file
airbnb_new_df = pd.read_csv('airnb.csv')

# Filter for properties in Florida
florida_properties = airbnb_new_df[airbnb_new_df['Title'].str.contains('Florida', case=False, na=False)]

# Create a new DataFrame with the required columns
new_florida_df = pd.DataFrame({
    'Property ID': 'N/A',  # Marking as N/A
    'Title': florida_properties['Title'],
    'URL': 'N/A',  # Marking as N/A
    'Price': florida_properties['Price(in dollar)'],
    'TotalPrice': florida_properties['Offer price(in dollar)'].fillna(florida_properties['Price(in dollar)']),
    'Latitude': 'N/A',  # Marking as N/A
    'Longitude': 'N/A',  # Marking as N/A
    'LatLong': 'N/A',  # Marking as N/A
    'Rating Badge': florida_properties['Review and rating'].str.extract(r'(\d+\.\d+)')[0],
    'Rating Description': florida_properties['Review and rating'],
    'Number of Reviews': florida_properties['Review and rating'].str.extract(r'\((\d+)\)')[0],
    'Check-in Date': florida_properties['Date'].str.split(' - ').str[0],
    'Check-out Date': florida_properties['Date'].str.split(' - ').str[1],
    'Region ID': 'N/A',  # Marking as N/A
    'Destination': 'Florida',
    'Price (top_dp)': florida_properties['Price(in dollar)'],
    'Currency (top_cur)': 'USD',  # Assuming currency is USD
    'Room Type': 'N/A',  # Marking as N/A
    'Rate Plan': 'N/A',  # Marking as N/A
    'Neighborhood ID': 'N/A',  # Marking as N/A
    'Privacy Tracking State': 'N/A',  # Marking as N/A
    'Search ID': 'N/A',  # Marking as N/A
    'User Intent': 'N/A',  # Marking as N/A
    'Referrer URL': 'N/A',  # Marking as N/A
    'PWA Timestamp': 'N/A',  # Marking as N/A
    'Sleeps': 'N/A',  # Marking as N/A
    'Bedrooms': florida_properties['Number of bed'].str.extract(r'(\d+)')[0],
    'Bathrooms': 'N/A'  # Marking as N/A
})

# Read the existing airbnb_data.csv file
airbnb_df = pd.read_csv('airbnb_data.csv')

# Append the new Florida listings to the existing airbnb_data.csv
updated_airbnb_df = pd.concat([airbnb_df, new_florida_df], ignore_index=True)

# Write the updated DataFrame back to airbnb_data.csv
updated_airbnb_df.to_csv('airbnb_data.csv', index=False)
