In [113]:
import pandas as pd
import numpy as np

from bokeh.io import output_notebook, show, output_file, save
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, Legend, LegendItem
from bokeh.transform import factor_cmap, linear_cmap
from bokeh.palettes import Category10, Category20, Viridis256, BrBG, RdBu
from bokeh.layouts import row, gridplot

output_notebook()

In [80]:
df = pd.read_csv("sales_data.csv")
df.head()

Unnamed: 0,Retailer,Retailer ID,Invoice Date,Region,State,City,Product,Price per Unit,Units Sold,Total Sales,Operating Profit,Sales Method
0,Walmart,1128299,6/17/2021,Southeast,Florida,Orlando,Women's Apparel,$103.00,218,2245,"$1,257",Online
1,West Gear,1128299,7/16/2021,South,Louisiana,New Orleans,Women's Apparel,$103.00,163,1679,$806,Online
2,Sports Direct,1197831,8/25/2021,South,Alabama,Birmingham,Men's Street Footwear,$10.00,700,7000,"$3,150",Outlet
3,Sports Direct,1197831,8/27/2021,South,Alabama,Birmingham,Women's Street Footwear,$15.00,575,8625,"$3,881",Outlet
4,Sports Direct,1197831,8/21/2021,South,Alabama,Birmingham,Women's Street Footwear,$15.00,475,7125,"$3,206",Outlet


In [81]:
df.shape

(9641, 12)

In [82]:
df.describe(include = object)

Unnamed: 0,Retailer,Invoice Date,Region,State,City,Product,Price per Unit,Units Sold,Total Sales,Operating Profit,Sales Method
count,9641,9641,9641,9641,9641,9641,9639,9641,9641,9641,9641
unique,6,724,5,50,52,7,94,361,1710,1530,3
top,Foot Locker,1/17/2021,West,California,Portland,Men's Street Footwear,$50.00,225,10000,"$6,300",Online
freq,2634,77,2446,430,360,1610,674,207,66,39,4889


In [83]:
df.isna().sum()

Retailer            0
Retailer ID         0
Invoice Date        0
Region              0
State               0
City                0
Product             0
Price per Unit      2
Units Sold          0
Total Sales         0
Operating Profit    0
Sales Method        0
dtype: int64

In [84]:
df.dropna(subset = "Price per Unit", inplace = True)
df.isna().sum()
df.shape

(9639, 12)

In [85]:
df["Invoice Date"] = pd.to_datetime(df["Invoice Date"])
df["Total Sales"] = pd.to_numeric(df["Total Sales"], errors="coerce")
df.dtypes

Retailer                    object
Retailer ID                  int64
Invoice Date        datetime64[ns]
Region                      object
State                       object
City                        object
Product                     object
Price per Unit              object
Units Sold                  object
Total Sales                float64
Operating Profit            object
Sales Method                object
dtype: object

## Monthly sales trend

In [105]:
monthly = (df
           .assign(month=df["Invoice Date"].dt.to_period("M").dt.to_timestamp())
           .groupby("month", as_index=False)["Total Sales"].sum()
          )

source = ColumnDataSource(monthly)

p = figure(
    x_axis_type="datetime",
    height=320, width=850,
    title="Monthly Sales",
    tools="pan,wheel_zoom,box_zoom,reset,save"
)

# Shaded region under the curve 
p.varea(
    x="month",
    y1=0,
    y2="Total Sales",
    source=source,
    fill_color="dodgerblue",
    fill_alpha=0.25
)

# Line on top
p.line(
    x="month",
    y="Total Sales",
    source=source,
    line_width=3,
    color="dodgerblue"
)

p.scatter("month", "Total Sales", source=source, marker="circle", size=5, color="dodgerblue")

# Hover tooltip
p.add_tools(HoverTool(
    tooltips=[
        ("Month", "@month{%b %Y}"),
        ("Total Sales", "@{Total Sales}{0,0}")
    ],
    formatters={"@month": "datetime"},
    mode="vline"
))

# Light styling to match the vibe in your screenshot
p.grid.grid_line_alpha = 0.2
p.outline_line_alpha = 0.0

show(p)


## Sales by Region

In [112]:

def make_donut(df, group_col, value_col="Total Sales", top_n=6, title=None, palette_name="BrBG"):
    g = (df.groupby(group_col, as_index=False)[value_col].sum()
           .sort_values(value_col, ascending=False))

    max_slices = 11
    top_n = min(top_n, max_slices - 1)

    if len(g) > top_n:
        top = g.head(top_n).copy()
        other_sum = g.iloc[top_n:][value_col].sum()
        plot_df = pd.concat([top, pd.DataFrame([{group_col: "Other", value_col: other_sum}])],
                            ignore_index=True)
    else:
        plot_df = g.copy()

    total = plot_df[value_col].sum()
    plot_df["pct"] = plot_df[value_col] / total
    plot_df["angle"] = plot_df["pct"] * 2*np.pi
    plot_df["end_angle"] = plot_df["angle"].cumsum()
    plot_df["start_angle"] = plot_df["end_angle"] - plot_df["angle"]

    palettes = {"BrBG": BrBG, "RdBu": RdBu}
    pal = palettes[palette_name]

    n = len(plot_df)
    size = min(11, max(3, n))
    palette = pal[size][:n]
    plot_df["color"] = palette

    p = figure(
        height=360, width=520,
        title=title or f"{value_col} by {group_col}",
        tools="pan,wheel_zoom,box_zoom,reset,save,hover",
        toolbar_location="right",
        x_range=(-1.7, 1.7), y_range=(-1.7, 1.7)
    )

    wedge_renderers = []
    legend_items = []

    for i, r in plot_df.reset_index(drop=True).iterrows():
        one = pd.DataFrame([r])
        src = ColumnDataSource(one)

        wedge = p.annular_wedge(
            x=0, y=0,
            inner_radius=0.55, outer_radius=0.9,
            start_angle="start_angle", end_angle="end_angle",
            color="color",
            line_color="white", line_width=2,
            source=src
        )
        wedge_renderers.append(wedge)

        bullet = p.scatter(x=[999], y=[999], marker="circle", size=10,
                           color=palette[i], alpha=1.0)

        legend_items.append(LegendItem(label=str(r[group_col]),
                                       renderers=[wedge, bullet]))

    p.hover.renderers = wedge_renderers
    p.hover.tooltips = [
        (group_col, f"@{{{group_col}}}"),
        (value_col, f"@{{{value_col}}}{{0,0}}"),
        ("Share", "@pct{0.0%}")
    ]

    legend = Legend(items=legend_items, orientation="horizontal", location="center")
    legend.click_policy = "hide"
    p.add_layout(legend, "below")

    p.axis.visible = False
    p.grid.visible = False
    p.outline_line_alpha = 0

    return p

p_region   = make_donut(df, "Region",   top_n=6,  title="Total Sales by Region")
p_retailer = make_donut(df, "Retailer", top_n=10, title="Total Sales by Retailer", palette_name="RdBu")

show(row(p_region, p_retailer))