In [49]:
import pandas as pd
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go


we begin

Total Sales by Retailer

In [50]:
df = pd.read_excel('data/Adidas.xlsx')
df.head()

Unnamed: 0,Retailer,RetailerID,InvoiceDate,Region,State,City,Product,PriceperUnit,UnitsSold,TotalSales,OperatingProfit,OperatingMargin,SalesMethod
0,Foot Locker,1185732,2020-02-03,Northeast,New York,New York,Women's Athletic Footwear,45.0,825,371250.0,129937.5,0.35,Outlet
1,Foot Locker,1185732,2020-02-04,Northeast,New York,New York,Men's Apparel,60.0,825,495000.0,148500.0,0.3,Outlet
2,Foot Locker,1185732,2020-02-05,Northeast,New York,New York,Women's Apparel,50.0,950,475000.0,118750.0,0.25,Outlet
3,Foot Locker,1185732,2020-02-06,Northeast,New York,New York,Men's Street Footwear,60.0,1220,732000.0,366000.0,0.5,Outlet
4,Foot Locker,1185732,2020-02-07,Northeast,New York,New York,Men's Athletic Footwear,55.0,925,508750.0,152625.0,0.3,Outlet


In [51]:
# Find out how many Retailers there are
print(df.groupby('Retailer')['TotalSales'].sum().sort_values(axis=0, ascending=False).sort_index(ascending=False))


Retailer
West Gear        242964333.0
Walmart           74558410.0
Sports Direct    182470997.0
Kohl's           102114753.0
Foot Locker      219877220.0
Amazon            77698912.0
Name: TotalSales, dtype: float64


In [52]:
# Graph Logic
# Calculate total sales for each retailer
retailers = df['Retailer'].unique()
total_sales = df.groupby('Retailer')['TotalSales'].sum().reset_index()
# total_sales_df = total_sales.to_frame(name='Total Sales')  # Create DataFrame
colors = ['blue']

# Plot Graph
plt.figure(figsize=(7,11))
# fig = px.bar(data_frame=total_sales, x=total_sales.values, y=total_sales.index, color= retailers)
fig = px.bar(data_frame=total_sales,
             x=total_sales.index,
             y='TotalSales',  
             color=retailers, 
             color_discrete_sequence=colors
)  # Use fig for plotly object


# Graph title and setting
fig.update_layout(
    yaxis_title='Total Sales',
    xaxis_title='Retailers',
)
plt.savefig('totalSalesofRetailers.png', dpi=300)
fig.show()
total_sales

Unnamed: 0,Retailer,TotalSales
0,Amazon,77698912.0
1,Foot Locker,219877220.0
2,Kohl's,102114753.0
3,Sports Direct,182470997.0
4,Walmart,74558410.0
5,West Gear,242964333.0


<Figure size 700x1100 with 0 Axes>

Total Sales over Time

In [53]:
df.head(10)

Unnamed: 0,Retailer,RetailerID,InvoiceDate,Region,State,City,Product,PriceperUnit,UnitsSold,TotalSales,OperatingProfit,OperatingMargin,SalesMethod
0,Foot Locker,1185732,2020-02-03,Northeast,New York,New York,Women's Athletic Footwear,45.0,825,371250.0,129937.5,0.35,Outlet
1,Foot Locker,1185732,2020-02-04,Northeast,New York,New York,Men's Apparel,60.0,825,495000.0,148500.0,0.3,Outlet
2,Foot Locker,1185732,2020-02-05,Northeast,New York,New York,Women's Apparel,50.0,950,475000.0,118750.0,0.25,Outlet
3,Foot Locker,1185732,2020-02-06,Northeast,New York,New York,Men's Street Footwear,60.0,1220,732000.0,366000.0,0.5,Outlet
4,Foot Locker,1185732,2020-02-07,Northeast,New York,New York,Men's Athletic Footwear,55.0,925,508750.0,152625.0,0.3,Outlet
5,Foot Locker,1185732,2020-02-08,Northeast,New York,New York,Women's Street Footwear,50.0,900,450000.0,157500.0,0.35,Outlet
6,Foot Locker,1185732,2020-01-01,Northeast,New York,New York,Men's Street Footwear,50.0,1200,382500.0,133800.0,0.5,In-store
7,Foot Locker,1185732,2020-01-02,Northeast,New York,New York,Men's Athletic Footwear,50.0,1000,500000.0,150000.0,0.3,In-store
8,Foot Locker,1185732,2020-01-03,Northeast,New York,New York,Women's Street Footwear,40.0,1000,400000.0,140000.0,0.35,In-store
9,Foot Locker,1185732,2020-01-04,Northeast,New York,New York,Women's Athletic Footwear,45.0,850,382500.0,133875.0,0.35,In-store


In [54]:
# Graph Logic 

# 1. Convert 'InvoiceDate' to datetime format 
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Format the dates to "yyyy-mm" format
df['InvoiceDate'] = df['InvoiceDate'].dt.strftime('%Y-%m')

# 2. Group data by year, month, etc. (choose your desired time granularity)
total_sales_over_time = df.groupby([df['InvoiceDate']])['TotalSales'].sum().reset_index()

# 3. Create the plot
fig = px.line(
    data_frame=total_sales_over_time,
    x='InvoiceDate',  # Replace with your time column name if different
    y='TotalSales',
    title='Total Sales Over Time'
)

# 4. Customize the plot 
fig.update_layout(
    xaxis_title='Year',  # Adjust title based on your time granularity
    yaxis_title='Total Sales',
    xaxis_tickformat="%Y-%m"
)

fig.show()
plt.savefig('overtime.png', dpi=300)
total_sales_over_time

Unnamed: 0,InvoiceDate,TotalSales
0,2020-01,16036246.0
1,2020-02,14997988.0
2,2020-03,17660577.0
3,2020-04,24607006.0
4,2020-05,16918014.0
5,2020-06,8829819.0
6,2020-07,17146013.0
7,2020-08,19877980.0
8,2020-09,18304436.0
9,2020-10,10836269.0


<Figure size 640x480 with 0 Axes>

Total Sales and Units Sold by State

In [55]:
df.head()

Unnamed: 0,Retailer,RetailerID,InvoiceDate,Region,State,City,Product,PriceperUnit,UnitsSold,TotalSales,OperatingProfit,OperatingMargin,SalesMethod
0,Foot Locker,1185732,2020-02,Northeast,New York,New York,Women's Athletic Footwear,45.0,825,371250.0,129937.5,0.35,Outlet
1,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Apparel,60.0,825,495000.0,148500.0,0.3,Outlet
2,Foot Locker,1185732,2020-02,Northeast,New York,New York,Women's Apparel,50.0,950,475000.0,118750.0,0.25,Outlet
3,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Street Footwear,60.0,1220,732000.0,366000.0,0.5,Outlet
4,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Athletic Footwear,55.0,925,508750.0,152625.0,0.3,Outlet


In [56]:
##########################################3
# Graph logic 1
states_df = df['State'].unique()

total_sales_bystate = df.groupby('State')['TotalSales'].sum().sort_index()
sumTotal = total_sales_bystate.to_frame(name= 'State')

# Graph Logic 2
units_sold_bystate = df.groupby('State')['UnitsSold'].sum().sort_index()
unitTotal = units_sold_bystate.to_frame(name='State')

# Graph Logic 3
grand_df= df.groupby('State')[['TotalSales','UnitsSold']].sum().sort_index()

plt.figure(figsize=(11,15))



###############################
# Create Plot 1
color = ['blue']
fig1 = px.bar(data_frame=sumTotal, 
            x=sumTotal.index, 
            y='State', 
            color_discrete_sequence=color,
            
)

# Create Plot 2
fig2 = px.line(data_frame=unitTotal,
               x=unitTotal.index,
               y='State',
               color_discrete_sequence=color
       
 )

# Create Plot 3

# Create traces for fig3
trace1 = go.Bar(
    x=grand_df.index,
    y=grand_df['TotalSales'],
    name='Total Sales',
    marker_color='royalblue'
)


trace3 = go.Scatter(
    # x=grand_df.index,
    # y=grand_df['UnitsSold'],
    x=unitTotal.index,
    y=unitTotal['State'],
    name='Units Sold',
    line_color='orange',
    marker_opacity=0,
    yaxis='y2'
    # Hide markers for cleaner line
)

# Combine traces into fig3
fig3 = go.Figure(data=[trace1, trace3])




##############################
# Customize Plots
fig1.update_layout(
    xaxis_title='State',
    yaxis_title='Total Sales'
)

fig2.update_layout(
    xaxis_title='State',
    yaxis_title='Units Sold'
)


# Update layouts for all figures (modify as needed for fig1 & fig2)
fig3.update_layout(
    title='Total Sales and Units Sold by State',
    xaxis_title='State',
    xaxis_tickangle=-45,
    yaxis_title='Total Sales (Millions)',  # Label for primary y-axis
    yaxis2=dict(
        title='Units Sold (Thousands)',  # Label for secondary y-axis
        overlaying='y',
        side='right'
    )   
)

# Show Graph 
plt.savefig('unitsSoldByState.png', dpi=300)
fig1.show() 
fig2.show()
fig3.show()
grand_df.head(5)

Unnamed: 0_level_0,TotalSales,UnitsSold
State,Unnamed: 1_level_1,Unnamed: 2_level_1
Alabama,17633424.0,63327
Alaska,14753103.0,30815
Arizona,15782221.0,46919
Arkansas,12639347.0,48468
California,60174133.0,163284


<Figure size 1100x1500 with 0 Axes>

## Total Sales by Region and City in Treemap

In [57]:
df.head(10)

Unnamed: 0,Retailer,RetailerID,InvoiceDate,Region,State,City,Product,PriceperUnit,UnitsSold,TotalSales,OperatingProfit,OperatingMargin,SalesMethod
0,Foot Locker,1185732,2020-02,Northeast,New York,New York,Women's Athletic Footwear,45.0,825,371250.0,129937.5,0.35,Outlet
1,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Apparel,60.0,825,495000.0,148500.0,0.3,Outlet
2,Foot Locker,1185732,2020-02,Northeast,New York,New York,Women's Apparel,50.0,950,475000.0,118750.0,0.25,Outlet
3,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Street Footwear,60.0,1220,732000.0,366000.0,0.5,Outlet
4,Foot Locker,1185732,2020-02,Northeast,New York,New York,Men's Athletic Footwear,55.0,925,508750.0,152625.0,0.3,Outlet
5,Foot Locker,1185732,2020-02,Northeast,New York,New York,Women's Street Footwear,50.0,900,450000.0,157500.0,0.35,Outlet
6,Foot Locker,1185732,2020-01,Northeast,New York,New York,Men's Street Footwear,50.0,1200,382500.0,133800.0,0.5,In-store
7,Foot Locker,1185732,2020-01,Northeast,New York,New York,Men's Athletic Footwear,50.0,1000,500000.0,150000.0,0.3,In-store
8,Foot Locker,1185732,2020-01,Northeast,New York,New York,Women's Street Footwear,40.0,1000,400000.0,140000.0,0.35,In-store
9,Foot Locker,1185732,2020-01,Northeast,New York,New York,Women's Athletic Footwear,45.0,850,382500.0,133875.0,0.35,In-store


In [58]:
df.Region.unique()

array(['Northeast', 'South', 'West', 'Midwest', 'Southeast'], dtype=object)

In [59]:
import plotly.express as px



fig = px.treemap(df, path=['Region', 'City'], 
                 values='TotalSales',
                color='City',  # Color nodes by city
                hover_name='TotalSales',
                hover_data=['TotalSales'],  # Show relevant data on hover
                title='Total Sales by Region and City',
                color_discrete_sequence=['pink', 'red', 'green', 'orange', 'blue', 'purple']
            )  # Customize color palette
fig.update_traces(textinfo="label+value")
fig.update_layout(margin=dict(t=50, l=50, r=50, b=50))  # Adjust margins for better readability

fig.show()