In [None]:
### Airbnb Price and Review Analysis - EDA Template

# 1️⃣ Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium

# 2️⃣ Load Dataset
# Replace with your file path
df = pd.read_csv('listings.csv')
df.head()

# 3️⃣ Basic Data Info
print(df.info())
print(df.describe())
print(df.isnull().sum())

# 4️⃣ Price Trends
# Convert price to numeric if needed
df['price'] = df['price'].replace({'\$':'','\,':''}, regex=True).astype(float)

# Average price by neighbourhood
df.groupby('neighbourhood')['price'].mean().sort_values(ascending=False).head(10).plot(kind='barh')
plt.title('Top 10 Expensive Neighbourhoods')
plt.show()

# Room type distribution
sns.boxplot(x='room_type', y='price', data=df)
plt.title('Price Distribution by Room Type')
plt.show()

# 5️⃣ Location Heatmap
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)

for idx, row in df.iterrows():
    folium.CircleMarker([row['latitude'], row['longitude']],
                        radius=2,
                        color='blue',
                        fill=True,
                        fill_opacity=0.4).add_to(m)

m.save('airbnb_map.html')

# 6️⃣ Amenities Impact
# Example: Check impact of Wi-Fi on price
df['has_wifi'] = df['amenities'].str.contains('Wifi')

sns.boxplot(x='has_wifi', y='price', data=df)
plt.title('Price vs Wi-Fi Availability')
plt.show()

# 7️⃣ Reviews Analysis
# Distribution of number of reviews
sns.histplot(df['number_of_reviews'], bins=50)
plt.title('Number of Reviews Distribution')
plt.show()

# Average rating vs price (if 'review_scores_rating' exists)
if 'review_scores_rating' in df.columns:
    sns.scatterplot(x='review_scores_rating', y='price', data=df)
    plt.title('Price vs Review Rating')
    plt.show()

# 8️⃣ Save Cleaned Data
# df.to_csv('cleaned_listings.csv', index=False)
