<h3>Data loading and cleaning</h3>

In [None]:
import pandas as pd
from datasets import load_dataset
df = pd.read_csv('Superstore.csv', encoding='latin1')
df.columns = df.columns.str.strip()
df["Order Date"] = pd.to_datetime(df["Order Date"])
df['Month'] = df['Order Date'].dt.strftime('%B')
df.head()

Unnamed: 0,Row ID,Order ID,Order Date,Ship Date,Ship Mode,Customer ID,Customer Name,Segment,Country,City,...,Region,Product ID,Category,Sub-Category,Product Name,Sales,Quantity,Discount,Profit,Month
0,1,CA-2016-152156,2016-11-08,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,South,FUR-BO-10001798,Furniture,Bookcases,Bush Somerset Collection Bookcase,261.96,2,0.0,41.9136,November
1,2,CA-2016-152156,2016-11-08,11/11/2016,Second Class,CG-12520,Claire Gute,Consumer,United States,Henderson,...,South,FUR-CH-10000454,Furniture,Chairs,"Hon Deluxe Fabric Upholstered Stacking Chairs,...",731.94,3,0.0,219.582,November
2,3,CA-2016-138688,2016-06-12,6/16/2016,Second Class,DV-13045,Darrin Van Huff,Corporate,United States,Los Angeles,...,West,OFF-LA-10000240,Office Supplies,Labels,Self-Adhesive Address Labels for Typewriters b...,14.62,2,0.0,6.8714,June
3,4,US-2015-108966,2015-10-11,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,South,FUR-TA-10000577,Furniture,Tables,Bretford CR4500 Series Slim Rectangular Table,957.5775,5,0.45,-383.031,October
4,5,US-2015-108966,2015-10-11,10/18/2015,Standard Class,SO-20335,Sean O'Donnell,Consumer,United States,Fort Lauderdale,...,South,OFF-ST-10000760,Office Supplies,Storage,Eldon Fold 'N Roll Cart System,22.368,2,0.2,2.5164,October


In [19]:
total_sales = df["Sales"].sum()
total_profit = df["Profit"].sum()
total_orders = df.shape[0]

total_profit


np.float64(286397.0217)

In [20]:
# Monthly sales
monthly_sales = df.groupby("Month")["Sales"].sum().reset_index()

# Top 10 products
top_products = df.groupby("Sub-Category")["Sales"].sum().nlargest(10).reset_index()

# Sales by region
region_sales = df.groupby("Region")["Sales"].sum().reset_index()

# Category-wise performance
category_sales = df.groupby("Category")["Sales"].sum().reset_index()


In [22]:
from dash import Dash, html, dcc
import plotly.express as px

app = Dash(__name__)

# Charts
fig_monthly = px.line(monthly_sales, x="Month", y="Sales", title="Monthly Sales Trend")
fig_top_products = px.bar(top_products, x="Sub-Category", y="Sales", title="Top 10 Products")
fig_region = px.pie(region_sales, names="Region", values="Sales", title="Sales by Region")
fig_category = px.bar(category_sales, x="Category", y="Sales", title="Sales by Category")

# Layout
app.layout = html.Div([
    html.H1("🛍️ E-Commerce Sales Dashboard", style={"textAlign": "center"}),

    html.Div([
        html.Div([
            html.H4(f"Total Sales: ${total_sales:,.0f}"),
            html.H4(f"Total Profit: ${total_profit:,.0f}"),
            html.H4(f"Total Orders: {total_orders:,}")
        ], style={"textAlign": "center", "padding": "10px"})
    ]),

    dcc.Graph(figure=fig_monthly),
    dcc.Graph(figure=fig_top_products),
    dcc.Graph(figure=fig_region),
    dcc.Graph(figure=fig_category)
])

if __name__ == "__main__":
    app.run(debug=True)
