In [None]:
# Importring necessary libraries
import streamlit as st
import pandas as pd
import plotly.express as px

# Streamlit page layout and title
st.set_page_config(page_title="Airbnb NYC Dashboard", layout="wide")
st.title(" Airbnb New York City Dashboard")

# Loading cleaned Airbnb dataset
@st.cache_data
def load_data():
    df = pd.read_csv('/content/drive/My Drive/cleaned_airbnb.csv')
    df['last_review'] = pd.to_datetime(df['last_review'], errors='coerce')
    df['review_year'] = df['last_review'].dt.year  # Extract year
    return df

df = load_data()

# Sidebar Filters
st.sidebar.header("Filters")
neighbourhood_group = st.sidebar.selectbox("Select Neighbourhood Group", options=df['neighbourhood_group'].unique())

# Cascading filetrs for selecting neighbourhood, room type and price range

st.sidebar.markdown("### Cascading Filters")
filtered_neighbourhoods = df[df['neighbourhood_group'] == neighbourhood_group]['neighbourhood'].unique()
neighbourhood = st.sidebar.selectbox("Select Neighbourhood", options=filtered_neighbourhoods)

room_types = st.sidebar.multiselect("Select Room Type(s)", options=df['room_type'].unique(), default=df['room_type'].unique())
price_range = st.sidebar.slider("Select Price Range ($)", min_value=int(df['price'].min()), max_value=int(df['price'].max()), value=(50, 500))

# Year filter in sidebar
available_years = df['review_year'].dropna().unique()
available_years = sorted([int(y) for y in available_years if pd.notnull(y)])

selected_year = st.sidebar.selectbox("Select Review Year:", options=available_years)


# Creating  new filtered dataset based on user inputs
df_selection = df.query(
    "neighbourhood_group == @neighbourhood_group & "
    "neighbourhood == @neighbourhood & "
    "room_type == @room_types and "
    "price >= @price_range[0] and price <= @price_range[1] and "
    "review_year == @selected_year"
)

# Showing quick stats like average price and total listings
st.subheader("Summary Metrics")

col1, col2 = st.columns(2)
col1.metric(label="Average Price ($)", value=round(df_selection['price'].mean(), 2))
col2.metric(label="Total Listings", value=df_selection.shape[0])

# Connected Visualisations displaying Map, Room Type Distribution, Scatter Plot, and Line Chart
st.markdown("###  Connected Visualisations")


#  Showing selected  AirBnB listings on interactive map
with st.container():
    if not df_selection.empty:
        fig_map = px.scatter_mapbox(
            df_selection,
            lat="latitude",
            lon="longitude",
            color="price",
            size="price",
            hover_name="name",
            mapbox_style="open-street-map",
            color_continuous_scale="Plasma",
            zoom=10,
            height=500,
            title="Airbnb Listings Map"
        )
        st.plotly_chart(fig_map, use_container_width=True)
    else:
        st.info("No listings found for the selected filters.")

#  Displaying on bar chart showing how many listings each room type has
with st.container():
    if not df_selection.empty:
        room_type_counts = df_selection['room_type'].value_counts().reset_index()
        room_type_counts.columns = ['room_type', 'count']

        fig_room = px.bar(
            room_type_counts,
            x='room_type',
            y='count',
            labels={'room_type': 'Room Type', 'count': 'Number of Listings'},
            title="Room Type Distribution"
        )
        st.plotly_chart(fig_room, use_container_width=True)
    else:
        st.info("No data available to show Room Type Distribution.")

# Showing how price relates to review count using a scatter plot
with st.container():
    if not df_selection.empty:
        fig_scatter = px.scatter(
            df_selection,
            x='number_of_reviews',
            y='price',
            color='room_type',
            hover_data=['name'],
            title="Price vs Number of Reviews"
        )
        st.plotly_chart(fig_scatter, use_container_width=True)
    else:
        st.info("No data available to show Scatter Plot.")

# Multiple linecharts showing price distribution trends for multiple neighbourhood
st.markdown("### Dynamic Configuration")
with st.container():
    st.subheader(" Price Distribution")

    selected_places = st.multiselect(
        "Select Neighbourhoods to Compare:",
        options=df['neighbourhood_group'].unique(),
        default=df['neighbourhood_group'].unique()
    )

    bins = st.slider(
        "Select Number of Bins",
        min_value=5,
        max_value=100,
        value=30,
        step=5,
        help="Adjust price bin granularity"
    )


    smoothed_frames = []

    for place in selected_places:
        place_df = df[df['neighbourhood_group'] == place].copy()

        if not place_df.empty:
            place_df['price_bin'] = pd.cut(place_df['price'], bins=bins)
            price_counts = place_df['price_bin'].value_counts().sort_index()

            bin_labels = [interval.mid for interval in price_counts.index]

            temp_df = pd.DataFrame({
                'Price Bin Midpoint': bin_labels,
                'Listing Count': price_counts.values,
                'Borough': place
            })


            temp_df['Smoothed Count'] = temp_df['Listing Count'].rolling(window=3, center=True).mean()
            temp_df['Smoothed Count'].fillna(method='bfill', inplace=True)
            temp_df['Smoothed Count'].fillna(method='ffill', inplace=True)

            smoothed_frames.append(temp_df)


    if smoothed_frames:
        combined_df = pd.concat(smoothed_frames)

        fig_multi = px.line(
            combined_df,
            x='Price Bin Midpoint',
            y='Smoothed Count',
            color='Borough',
            title="Price Distribution",
            labels={
                'Price Bin Midpoint': 'Price ($)',
                'Smoothed Count': 'Number of Listings',
                'Borough': 'Neighbourhood Group'
            },
            markers=True
        )
        st.plotly_chart(fig_multi, use_container_width=True)
    else:
        st.info("No data available for selected borough(s).")

#  This is a Conditional content displaying listings priced above $500 when checkbox is ticked
with st.container():
    st.markdown("### Conditional Content")
    st.subheader("Luxury Listings (Price > $500)")
    if st.checkbox("Show Luxury Listings"):
        luxury = df_selection[df_selection['price'] > 500]
        if not luxury.empty:
            st.dataframe(luxury[['name', 'neighbourhood', 'price', 'room_type']])
        else:
            st.info("No luxury listings available based on selected filters.")


# Allowing users to view raw data
with st.expander("View Raw Data"):
    st.dataframe(df_selection)


st.caption("Dashboard created by Mohamed Shaz Pathiattu Valappil.")
st.caption("Student ID: 24300262")
st.caption("Mail ID: mohamed.pathiattuvalappil@ucdconnect.ie")

st.caption("Data source: Airbnb NYC 2019.")