## Visualize your data with Streamlit

Create interactive charts using Streamlit, directly in your Notebook

Make you upload the `environment.yml` and `diamonds.csv` in this folder!

In [None]:
import streamlit as st
import altair as alt
import pandas as pd

In [None]:
df = pd.read_csv('diamonds.csv')
df.head()

In [None]:
import streamlit as st
import altair as alt

st.title("Diamond Carat vs Price Analysis")

chart = alt.Chart(df).mark_circle().encode(
    x='carat',
    y='price',
    color='cut',
    tooltip=['carat', 'price', 'cut', 'color']
).properties(
    width=600,
    height=400
)

st.altair_chart(chart, use_container_width=True)

In [None]:
import streamlit as st
import altair as alt


st.title("Diamond Analysis Dashboard")

# Create metrics for quick statistics
col1, col2, col3 = st.columns(3)
with col1:
    st.metric("Average Price", f"${df['price'].mean():,.2f}")
with col2:
    st.metric("Average Carat", f"{df['carat'].mean():.2f}")
with col3:
    st.metric("Price Range", f"${df['price'].min():,} - ${df['price'].max():,}")

# Create filters
col1, col2 = st.columns(2)
with col1:
    cut_selection = st.multiselect("Select Cut", 
                                 options=sorted(df['cut'].unique()), 
                                 default=sorted(df['cut'].unique()))
with col2:
    color_selection = st.multiselect("Select Color", 
                                   options=sorted(df['color'].unique()), 
                                   default=sorted(df['color'].unique()))

price_range = st.slider("Price Range", 
                       min_value=int(df['price'].min()), 
                       max_value=int(df['price'].max()),
                       value=(int(df['price'].min()), int(df['price'].max())))

# Cache the filtering operation
@st.cache_data
def filter_data(df, cuts, colors, price_min, price_max):
    return df[
        (df['cut'].isin(cuts)) &
        (df['color'].isin(colors)) &
        (df['price'].between(price_min, price_max))
    ]

# Apply filters using cached function
filtered_df = filter_data(df, cut_selection, color_selection, price_range[0], price_range[1])

# Create visualizations
col1, col2 = st.columns(2)

with col1:
    st.subheader("Price Distribution by Cut")
    chart1 = alt.Chart(filtered_df).mark_boxplot().encode(
        x='cut:N',
        y='price:Q',
        color='cut:N'
    ).properties(height=300)
    st.altair_chart(chart1, use_container_width=True)

with col2:
    st.subheader("Carat vs Price")
    chart2 = alt.Chart(filtered_df).mark_circle().encode(
        x='carat:Q',
        y='price:Q',
        color='cut:N',
        tooltip=['cut', 'color', 'price', 'carat']
    ).properties(height=300)
    st.altair_chart(chart2, use_container_width=True)

# Show average price by cut and color
st.subheader("Average Price by Cut and Color")
avg_price_chart = alt.Chart(filtered_df).mark_rect().encode(
    x='cut:N',
    y='color:N',
    color=alt.Color('mean(price):Q', scale=alt.Scale(scheme='viridis')),
    tooltip=['cut', 'color', alt.Tooltip('mean(price):Q', format='$,.2f')]
).properties(height=200)
st.altair_chart(avg_price_chart, use_container_width=True)

Push this update to GitHub!