In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
%matplotlib inline


In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('../data/AB_NYC_2019.csv')
df.head()


In [None]:
df.isnull().sum()

In [None]:
# Drop rows where 'name' or 'host_name' is missing
df = df.dropna(subset=['name', 'host_name'])

# Fill missing 'reviews_per_month' with 0 (assuming no reviews)
df['reviews_per_month'].fillna(0, inplace=True)

# Confirm all nulls are gone
df.isnull().sum()


In [None]:
df['price'].describe()

In [None]:
df=df[df['price']<=500]

In [None]:
df=df[df['price']>0]

In [None]:
df.reset_index(drop=True,inplace=True)

In [None]:
df.info()

In [None]:
df.describe()


Room Type Distribution

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(data=df,x='room_type',palette='pastel')
plt.title("Room Type Distribution")
plt.xlabel("Room Type")
plt.ylabel("Number of Listings")
plt.show()

Listings By Neighbourhood Group

In [None]:
plt.figure(figsize=(6,4))
sns.countplot(data=df,x='neighbourhood_group',palette='Set2')
plt.title('Listings by Neighbourhood Group')
plt.xlabel("Area")
plt.ylabel("Number Of Listings")
plt.show()

Price Distribution

In [None]:
plt.figure(figsize=(7,4))
sns.histplot(data=df,x='price',bins=50,color='skyblue')
plt.title("Price Distribution")
plt.xlabel("Price ($)")
plt.ylabel("Number Of Listings")
plt.xlim(0,500)
plt.show()

Average Price by Room Type

In [None]:
plt.figure(figsize=(6,4))
sns.barplot(data=df,x='room_type',y='price',palette='muted')
plt.title("Average Price by Room Type")
plt.xlabel("Room Type")
plt.ylabel('Avg Price ($)')
plt.show()

Reviews vs Room Type

In [None]:
plt.figure(figsize=(6,4))
sns.boxenplot(data=df,x='room_type',y='reviews_per_month',palette='cool')
plt.title("Reviews per Month by Room Type")
#plt.xlim(0,10)
plt.show()

Map Of Listings

In [None]:
import pandas as pd
import plotly.express as px

# Load the dataset
df = pd.read_csv('../data/AB_NYC_2019.csv')

# Create the scatter map plot
fig = px.scatter_map(
    df.sample(1000),  # Only show 1000 points for speed
    lat="latitude",
    lon="longitude",
    color="price",
    size="price",
    hover_name="name",
    hover_data=["neighbourhood", "room_type", "price"],
    color_continuous_scale=px.colors.cyclical.IceFire,
    zoom=10,
    height=500
)

# Update layout
fig.update_layout(mapbox_style="carto-positron")
fig.update_layout(title="Airbnb Listings in NYC (Sample View)")

# Save to HTML (works in all environments)
fig.write_html("airbnb_map.html")


In [None]:
df.to_csv('cleaned_airbnb_data.csv', index=False)
