# Module 4.2

## Imports

In [3]:
import pandas as pd
import plotly.express as px

## Load Data

In [4]:
df = pd.read_csv("csvsample.csv")

df.head()

Unnamed: 0,Date,Category,Product,Region,Sales_Rep,Units_Sold,Unit_Price,Total_Sales,Cost_Per_Unit,Customer_Age,Customer_Satisfaction
0,2025-01-01,Electronics,Laptop,North,Alice,5,899.99,4499.95,650.0,34,4.5
1,2025-01-01,Electronics,Phone,South,Bob,8,599.99,4799.92,420.0,28,4.2
2,2025-01-01,Clothing,Jacket,East,Carol,12,79.99,959.88,45.0,42,4.8
3,2025-01-01,Furniture,Chair,West,David,6,149.99,899.94,85.0,51,3.9
4,2025-01-02,Electronics,Tablet,North,Alice,10,399.99,3999.9,280.0,29,4.6


## Let's Plot

### Bar Chart

In [9]:
sales_by_product = df.groupby('Product')['Total_Sales'].sum().reset_index()

sales_by_product

Unnamed: 0,Product,Total_Sales
0,Bookshelf,3239.82
1,Chair,4949.67
2,Desk,6599.78
3,Dress,4599.54
4,Headphones,9118.86
5,Jacket,3759.53
6,Laptop,37799.58
7,Pants,4679.22
8,Phone,40199.33
9,Shirt,3268.91


In [13]:
fig = px.bar(
    sales_by_product,
    x='Product',
    y='Total_Sales',
    title='Total Sales by Item',
    color='Product',
)

fig.show()

In [14]:
grouped = df.groupby(['Category', 'Region'])['Total_Sales'].sum().reset_index()

grouped

Unnamed: 0,Category,Region,Total_Sales
0,Clothing,East,8038.84
1,Clothing,North,6098.92
2,Clothing,South,1349.85
3,Clothing,West,6758.93
4,Electronics,North,43799.06
5,Electronics,South,33999.08
6,Electronics,West,36518.95
7,Furniture,East,10049.57
8,Furniture,South,8639.64
9,Furniture,West,899.94


In [18]:
fig = px.bar(
    grouped,
    x='Category',
    y='Total_Sales',
    color='Region',
    barmode='group', ## 'stack' for stacked bars
    title='Total Sales by Category and Region',
    labels={'Total_Sales': 'Total Sales ($)'},
)

fig.show()

### Line Chart

In [None]:
daily_sales = df.groupby('Date')['Total_Sales'].sum().reset_index()

daily_sales

In [None]:
fig = px.line(
    daily_sales,
    x='Date',
    y='Total_Sales',
    title='Sales Trend',
    labels={'Total_Sales': 'Total Sales ($)', 'Date': 'Date'}, 
    markers=True ## add markers to the line
)

fig.show()

In [None]:
daily_sales_by_category = df.groupby(['Date', 'Category'])['Total_Sales'].sum().reset_index()

daily_sales_by_category

In [None]:
fig = px.line(
    daily_sales_by_category,
    x='Date',
    y='Total_Sales',
    color='Category',  ## creates separate lines for each category
    title='Sales Over Time by Category',
    labels={'Total_Sales': 'Total Sales ($)', 'Date': 'Date'},
    markers=True
)

fig.show()

### Scatter Plot

In [None]:
fig = px.scatter(
    df,
    x='Customer_Age',
    y='Customer_Satisfaction',
    title='Customer Age vs Customer Satisfaction',
    labels={'Customer_Age': 'Customer Age', 'Customer_Satisfaction': 'Customer Satisfaction Score'},
    color='Category',  ## will colour points based on category
    hover_data=['Product']  ## defines additional info on hover
)

fig.show()

### Pie Charts

In [5]:
sales_by_rep = df.groupby('Sales_Rep')['Total_Sales'].sum().reset_index()

sales_by_rep

Unnamed: 0,Sales_Rep,Total_Sales
0,Alice,49897.98
1,Bob,43988.57
2,Carol,18088.41
3,David,44177.82


In [8]:
fig = px.pie(
    sales_by_rep, ## rows in this table represent pie slices
    values='Total_Sales', ## size of each pie slice
    names='Sales_Rep', ## label for each pie slice
    title='Total Sales by Sales Representative', ## label for the chart
    hole=0,  ## replace w a bigger value (e.g., 0.4) for a donut chart
    hover_data=['Total_Sales'] ## additional info on hover
)

fig.show()