In [2]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.43.1-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.1-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m67.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[

In [9]:
import streamlit as st
import plotly.express as px
import plotly.figure_factory as ff
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')

st.set_page_config(page_title="Superstore!",
                   page_icon=":bar_chart:",
                   layout="wide"
                   )

st.title(" :bar_chart: Superstore EDA")
st.markdown('<style>div.block-containeer{padding-top:1rem;}</style>', unsafe_allow_html = True)
fl = st.file_uploader(":file_folder Upload a file", type=(["csv", "txt", "xlsx", "xls"]))
if fl:
    filename = fl.name
    st.write(filename)
    df = pd.read_csv(filename, encoding="ISO-8859-1")
else:
  df = pd.read_csv("/content/Sample - Superstore.csv", encoding="ISO-8859-1")

col1, col2 = st.columns((2))
df['Order Date'] = pd.to_datetime(df['Order Date'])

startDate = pd.to_datetime(df['Order Date']).min()
endDate = pd.to_datetime(df['Order Date']).max()

with col1:
  date1 = pd.to_datetime(st.date_input("Start Date", startDate))
with col2:
   date2 = pd.to_datetime(st.date_input("End Date", endDate))

df = df[(df['Order Date'] >= date1) & (df['Order Date'] <= date2)].copy()

st.sidebar.header("Choose your filter:")
region = st.sidebar.multiselect("Pick your region", df['Region'].unique())

if not region:
  df2 = df.copy()
else:
  df2 = df[df['Region'].isin(region)]

state = st.sidebar.multiselect("Pick your state", df2['State'].unique())
if not state:
  df3 = df2.copy()
else:
  df3 = df2[df2['State'].isin(state)]

city = st.sidebar.multiselect("Pick your city", df3['City'].unique())

if not region and not state and not city:
  filtered_df = df
elif not state and not city:
  filtered_df = df[df['Region'].isin(region)]
elif not region and not city:
  filtered_df = df[df['State'].isin(state)]
elif state and city:
  filtered_df = df3[df["State"].isin(state) & df3['City'].isin(city)]
elif region and city:
  filtered_df = df3[df["Region"].isin(region) & df3['City'].isin(city)]
elif region and state:
  filtered_df = df3[df["Region"].isin(region) & df3['State'].isin(state)]
elif city:
  filtered_df = df3[df3['City'].isin(city)]
else:
  filtered_df = df3[df3["Region"].isin(region) & df3["State"].isin(state) & df3['City'].isin(city)]

category_df = filtered_df.groupby(by=["Category"], as_index=False)["Sales"].sum()
with col1:
  st.subheader("Category-wise Sales")
  fig = px.bar(category_df, x="Category", y="Sales", text=['${:,.2f}'.format(x) for x in category_df["Sales"]], template = 'seaborn')
  st.plotly_chart(fig, use_container_width = True, height = 200)

with col2:
  st.subheader("Region-wise Sales")
  fig = px.pie(filtered_df, values="Sales", names="Region", hole=0.5)
  fig.update_traces(text = filtered_df["Region"], textposition = 'inside')
  st.plotly_chart(fig, use_container_width = True)

cl1, cl2 = st.columns(2)
with cl1:
  with st.expander("Category_ViewData"):
    st.write(category_df.style.background_gradient(cmap="Blues"))
    csv = category_df.to_csv(index=False).encode('utf-8')
    st.download_button("Download Data", data=csv, file_name="Category.csv", mime="text/csv",
                       help='Click here to download the data as a CSV file')

with cl2:
  with st.expander("Region_ViewData"):
    region = filtered_df.groupby(by="Region", as_index=False)["Sales"].sum()
    st.write(region.style.background_gradient(cmap="Oranges"))
    csv = region.to_csv(index=False).encode('utf-8')
    st.download_button("Download Data", data=csv, file_name="Region.csv", mime="text/csv",
                       help='Click here to download the data as a CSV file')

filtered_df['month_year'] = filtered_df["Order Date"].dt.to_period("M")
st.subheader("Time Series Analysis")

line_chart = pd.DataFrame(filtered_df.groupby(filtered_df["month_year"].dt.strftime("%Y : %b"))["Sales"].sum()).reset_index()
fig2 = px.line(line_chart, x="month_year", y="Sales", labels={"Sales":"Amount"}, height=500, width=1000, template="gridon")
st.plotly_chart(fig2) # Pass the fig2 object to st.plotly_chart
# st.line_chart(line_chart, x="month_year", y="Sales", labels={"Sales":"Amount"}, height=500, width=1000, template="gridon") # Remove or comment out this line

with st.expander("View Data of Time Series Analysis"):
  st.write(line_chart.T.style.background_gradient(cmap="Blues"))
  csv = line_chart.to_csv(index=False).encode('utf-8')
  st.download_button("Download Data", data=csv, file_name="TimeSeries.csv", mime="text/csv")
st.subheader("Hierarchical view of Sales using TreeMap")
fig3 = px.treemap(filtered_df, path = ["Region","Category","Sub-Category"], values = "Sales",hover_data = ["Sales"],
                  color = "Sub-Category")
fig3.update_layout(width = 800, height = 650)
st.plotly_chart(fig3, use_container_width=True)

chart1, chart2 = st.columns((2))

with chart1:
    st.subheader('Segment wise Sales')
    fig = px.pie(filtered_df, values = "Sales", names = "Segment", template = "plotly_dark")
    fig.update_traces(text = filtered_df["Segment"], textposition = "inside")
    st.plotly_chart(fig,use_container_width=True)

with chart2:
    st.subheader('Category wise Sales')
    fig = px.pie(filtered_df, values = "Sales", names = "Category", template = "gridon")
    fig.update_traces(text = filtered_df["Category"], textposition = "inside")
    st.plotly_chart(fig,use_container_width=True)

st.subheader(":point_right: Month wise Sub-Category Sales Summary")
with st.expander("Summary_Table"):
    df_sample = df[0:5][["Region","State","City","Category","Sales","Profit","Quantity"]]
    fig = ff.create_table(df_sample, colorscale = "Cividis")
    st.plotly_chart(fig, use_container_width=True)

    st.markdown("Month wise sub-Category Table")
    filtered_df["month"] = filtered_df["Order Date"].dt.month_name()
    sub_category_Year = pd.pivot_table(data = filtered_df, values = "Sales", index = ["Sub-Category"],columns = "month")
    st.write(sub_category_Year.style.background_gradient(cmap="Blues"))

data1 = px.scatter(filtered_df, x = "Sales", y = "Profit", size = "Quantity")
data1['layout'].update(title="Relationship between Sales and Profits using Scatter Plot.",
                       titlefont = dict(size=20),xaxis = dict(title="Sales",titlefont=dict(size=19)),
                       yaxis = dict(title = "Profit", titlefont = dict(size=19)))
st.plotly_chart(data1,use_container_width=True)

with st.expander("View Data"):
    st.write(filtered_df.iloc[:500,1:20:2].style.background_gradient(cmap="Oranges"))

csv = df.to_csv(index = False).encode('utf-8')
st.download_button('Download Data', data = csv, file_name = "Data.csv",mime = "text/csv")

2025-03-10 22:06:36.069 Serialization of dataframe to Arrow table was unsuccessful. Applying automatic fixes for column types to make the dataframe Arrow-compatible.
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/streamlit/dataframe_util.py", line 822, in convert_pandas_df_to_arrow_bytes
    table = pa.Table.from_pandas(df)
            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "pyarrow/table.pxi", line 4751, in pyarrow.lib.Table.from_pandas
  File "/usr/local/lib/python3.11/dist-packages/pyarrow/pandas_compat.py", line 625, in dataframe_to_arrays
    arrays = [convert_column(c, f)
             ^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pyarrow/pandas_compat.py", line 625, in <listcomp>
    arrays = [convert_column(c, f)
              ^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/pyarrow/pandas_compat.py", line 612, in convert_column
    raise e
  File "/usr/local/lib/python3.11/dist-packages/pyarrow/pandas_comp

False