In [76]:
import pandas as pd

# Load the data from the CSV files
airbnbprice_df = pd.read_csv('NYC Airbnb Trend/airbnbprice.csv')
neighbourhoods_df = pd.read_csv('NYC Airbnb Trend/neighbourhoods.csv')

# Display the first few rows to understand the structure (optional)
print("Airbnb Price Data")
print(airbnbprice_df.head())
print("Neighbourhoods Data")
print(neighbourhoods_df.head())

neighbourhood_mapping = {
    "Manhat": "Manhattan",
    "Manhattan": "Manhattan",
    "Brooklyn": "Brooklyn",
    "Brook": "Brooklyn",
    "Queens": "Queens",
    "Bronx": "Bronx",
    "Staten Island": "Staten Island",
}

airbnbprice_df['neighbourhood'] = airbnbprice_df['neighbourhood'].map(neighbourhood_mapping).fillna(airbnbprice_df['neighbourhood'])
print(airbnbprice_df.describe())
print(neighbourhoods_df.describe())

# Assuming 'neighbourhood.csv' has a column 'neighbourhood' listing boroughs within NYC
nyc_neighbourhoods = neighbourhoods_df['neighbourhood'].unique()
print("NYC Neighbourhoods:", nyc_neighbourhoods)

# Filter for listings in New York City boroughs
nyc_listings = airbnbprice_df[airbnbprice_df['neighbourhood'].isin(nyc_neighbourhoods)]
print("Filtered NYC Listings:")
print(nyc_listings)

nyc_listings = nyc_listings.dropna(subset=['price'])

nyc_listings = nyc_listings.drop_duplicates()
print("Filtered NYC Listings after removing duplicates:")
print(nyc_listings)

# Statistics before removing outliers
print("Summary Statistics before Removing Outliers:")
print(nyc_listings['price'].describe())

# Remove outliers
q1 = nyc_listings['price'].quantile(0.25)
q3 = nyc_listings['price'].quantile(0.75)
iqr = q3 - q1
lower_bound = q1 - 1.5 * iqr
upper_bound = q3 +1.5 * iqr
nyc_listings = nyc_listings[(nyc_listings['price'] >= lower_bound) & (nyc_listings['price'] <= upper_bound)]

# Statistics after removing outliers
print("Summary Statistics after Removing Outliers:")
print(nyc_listings['price'].describe())

# Calculate the average price
average_price_nyc = nyc_listings['price'].mean()

# Print the average price rounded to two decimal places
print(f"The average Airbnb listing price in New York City is ${average_price_nyc:.2f}")


Airbnb Price Data
                    id                                        name    host_id  \
0   794696802406920855            Lovely studio in Hell's Kitchen!   43305568   
1              6713005                           Large one bedroom   35144920   
2             54294086  Luxury 1- bedroom rental unit with Hot tub  440355290   
3  1039262508734112133                   Private room close to LGA  430854617   
4   922527181364298444               New Flushing Modern Apartment  420975523   

  host_name neighbourhood_group    neighbourhood   latitude  longitude  \
0      Rüya           Manhattan   Hell's Kitchen  40.762272 -73.995836   
1     Kevin           Manhattan  Upper East Side  40.767510 -73.955170   
2    Tavian              Queens          Jamaica  40.697390 -73.792790   
3     Bryan              Queens    East Elmhurst  40.758789 -73.867923   
4      Will              Queens         Flushing  40.754404 -73.832370   

         room_type  price  minimum_nights  number_