In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10,6)


In [None]:
file_path = '/content/1730285881-Airbnb_Open_Data.xlsx'
df = pd.read_excel(file_path)

print("🔹 Dataset Preview:")
display(df.head())

print("\n🔹 Dataset Information:")
print(df.info())

print("\n🔹 Missing Values:")
print(df.isnull().sum())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10,6)


review_col = None
avail_col = None

for col in df.columns:
    c = col.lower()
    if 'review' in c and 'date' not in c:
        review_col = col
    elif 'availability' in c or 'available' in c:
        avail_col = col


if review_col:
    df[review_col] = pd.to_datetime(df[review_col], errors='coerce')
    df['month'] = df[review_col].dt.month
    monthly_bookings = df['month'].value_counts().sort_index()

    plt.figure()
    sns.barplot(x=monthly_bookings.index, y=monthly_bookings.values, color='skyblue')
    plt.title('Bookings by Month')
    plt.xlabel('Month')
    plt.ylabel('Number of Bookings')
    plt.show()

if avail_col:
    sns.histplot(df[avail_col], bins=30, kde=True, color='orange')
    plt.title('Distribution of Booking Lead Time (Availability Days)')
    plt.xlabel('Days Available in a Year')
    plt.ylabel('Frequency')
    plt.show()


In [None]:
print("\n💰 PRICING STRATEGIES ANALYSIS")

# Try to find matching column names dynamically
price_col = None
room_col = None
neigh_col = None

for col in df.columns:
    col_lower = col.lower()
    if 'price' in col_lower:
        price_col = col
    elif 'room' in col_lower and 'type' in col_lower:
        room_col = col
    elif 'neighbourhood' in col_lower or 'location' in col_lower or 'area' in col_lower:
        neigh_col = col

if price_col:
    df[price_col] = pd.to_numeric(df[price_col], errors='coerce')


if price_col and room_col:
    sns.boxplot(x=room_col, y=price_col, data=df)
    plt.title('Price Distribution by Room Type')
    plt.xlabel('Room Type')
    plt.ylabel('Price')
    plt.show()
else:
    print("⚠️ Missing 'room_type' or 'price' column in dataset.")


if price_col and neigh_col:
    avg_price = df.groupby(neigh_col)[price_col].mean().sort_values(ascending=False)
    avg_price.plot(kind='bar', color='orange')
    plt.title('Average Price by Location/Neighbourhood')
    plt.ylabel('Average Price')
    plt.xlabel('Location')
    plt.show()
else:
    print("⚠️ 'neighbourhood_group' or equivalent column not found.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10,6)


room_col = None
night_col = None

for col in df.columns:
    c = col.lower()
    if 'room' in c and 'type' in c:
        room_col = col
    elif 'minimum' in c and 'night' in c:
        night_col = col


if room_col:
    room_pref = df[room_col].value_counts()
    plt.figure()
    room_pref.plot(kind='pie', autopct='%1.1f%%', startangle=90, colors=sns.color_palette('pastel'))
    plt.title('Guest Room Type Preferences')
    plt.ylabel('')
    plt.show()


if night_col:
    plt.figure()
    sns.histplot(df[night_col], bins=30, color='green', kde=True)
    plt.title('Distribution of Minimum Nights Booked')
    plt.xlabel('Minimum Nights')
    plt.ylabel('Frequency')
    plt.show()


In [None]:
print("\n📈 SUMMARY INSIGHTS")

if 'price' in df.columns:
    avg_price = df['price'].mean()
    print(f"✅ Average Price: ${avg_price:.2f}")

if 'room_type' in df.columns:
    top_room = df['room_type'].value_counts().idxmax()
    print(f"✅ Most Preferred Room Type: {top_room}")

if 'neighbourhood_group' in df.columns:
    top_area = df['neighbourhood_group'].value_counts().idxmax()
    print(f"✅ Most Popular Area: {top_area}")

if 'reviews_per_month' in df.columns:
    avg_reviews = df['reviews_per_month'].mean()
    print(f"✅ Average Reviews per Month: {avg_reviews:.2f}")

if 'availability_365' in df.columns:
    avg_availability = df['availability_365'].mean()
    print(f"✅ Average Availability (days/year): {avg_availability:.1f}")

