# Sales analysis with Superstore Sales Data
The data is stored in 10 separate files. The analysis attempts at compiling all data and using plotly & widgets for interactive visual creation.

## Main steps
* Compiling data files
* Analyze overall sales trend and average monthly sales
* Look into sales by market, segment, and category
* Investigate the sales per sub category in each region
* Compare the % sales contribution of the sub categories in each region against global trend
* Explore the top 10 best selling products & the most preferred shipping modes

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import plotly.graph_objects as go
import plotly.express as px
from plotly.offline import plot
import plotly.io as pio
pio.renderers.default='svg'

In [None]:
#Compile all files into one dataset
os.chdir('C:\\Users\\ASUS\\OneDrive\\Documents\\Programming\\Python\\practice\\Sales-Analysis-master')
 
files=[file for file in os.listdir(".\\Sales data")]

data=pd.DataFrame()
for file in files:
    df=pd.read_csv(".\\Sales data\\"+file)
    data=pd.concat([data,df])
    
data.describe()
data.head()

In [None]:
#Sales trend 
data['order_date']=pd.to_datetime(data.order_date)
data['order_mm_yy']=data.order_date.apply(lambda x: x.strftime('%Y-%m'))
monthly_sales=data.groupby('order_mm_yy').sales.sum().reset_index()

plt.figure(figsize=(15,6))
sns.lineplot(monthly_sales.order_mm_yy,
             monthly_sales.sales,
             palette=sns.color_palette("mako_r", 6))
plt.title('Sales trend')
plt.xlabel('month-year')
plt.ylabel('total sales in USD')
plt.xticks(rotation=45,size=8)
plt.show()

In [None]:
#Sales by month, averaged for whole period of analysis
data['order_month']=data.order_date.dt.month

#line plot
months=[month for month, df in data.groupby('order_month')]
sns.lineplot(months, data.groupby('order_month').sales.sum()/data.order_date.dt.year.nunique())
plt.xlabel('month')
plt.ylabel('average sales in USD')
plt.xticks(months)
plt.grid()
plt.show()

In [None]:
#Sales by market, segment, and category
#ref: https://towardsdatascience.com/bring-your-jupyter-notebook-to-life-with-interactive-widgets-bc12e03f0916
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import ipywidgets as widgets

def bar_plot(var, data):
    plt.figure(figsize = (10,8))
    ax = sns.barplot(x=data.groupby(var).sales.sum().sort_values(ascending=False).index,
            y=data.groupby(var).sales.sum().sort_values(ascending=False).values)
    plt.title(var, size = 15)

def inter_plot(x):
    return bar_plot(x, data)

### Multiple selection widgets ###
widget_plot=widgets.Dropdown(
    options=['segment','market','category'],
    description="Numeric Variable:", 
    style = {'description_width': 'initial'}
)
interact(inter_plot, x=widget_plot);

In [None]:
#Best selling sub categories by region
sales_region=data.groupby('sub_category').sales.sum().sort_values(ascending=False)

#make the general figure
fig = go.Figure(layout=go.Layout(title= go.layout.Title(text="Sales in each category by region"),
                                 xaxis=dict(title="Sub category"),
                                 yaxis=dict(title="Total sales USD")))

fig.add_trace(go.Bar(name= 'Region Selection', x= sales_region.index, y=sales_region.values))

#create button and supplement selection with corresponding data
buttons = []

buttons.append(dict(method='restyle',
                        label= 'All regions',
                        visible=True,
                        args=[{'y':[data.groupby('sub_category').sales.sum().sort_values(ascending=False)],
                               'x':[data.groupby('sub_category').sales.sum().sort_values(ascending=False).index],
                               'type':'bar'}, [0]],))
              
#use loop to add all sub categories
for i in data.region.unique():
    buttons.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[data[data.region==i].groupby('sub_category').sales.sum().sort_values(ascending=False)],
                               'x':[data[data.region==i].groupby('sub_category').sales.sum().sort_values(ascending=False).index],
                               'type':'bar'}, [0]],))

#format button
updatemenus = list([
    dict(buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.05,
            xanchor="left",
            y=1.06,
            yanchor="top")])

#add dropdown menus    
fig.update_layout(showlegend=False, updatemenus=updatemenus)

fig.update_layout(
    annotations=[
        dict(text="Region", x=0.01, xref="paper", y=1.04, yref="paper",
                             align="left", showarrow=False)])

#fig.show() 
pio.show(fig,renderer='browser') ##to render on browser

In [None]:
#Compare the sales contribution of the sub categories by region

sales_region=data.groupby('sub_category').sales.sum()/data.sales.sum()*100
sales_region.sort_values(ascending=False,inplace=True)
 
fig = go.Figure(layout=go.Layout(title= go.layout.Title(text="% Sales contribution of each category by region, compared with global data"),
                                 xaxis=dict(title="Sub category"),
                                 yaxis=dict(title="% Sales contribution")))

fig.add_trace(go.Bar(name= 'Regional', x= sales_region.index, y=sales_region.values))

#add the second series of bars to demonstrate global %
fig.add_trace(go.Bar(name= 'All Data',x= sales_region.index, y=sales_region.values))

#make button
buttons = []

buttons.append(dict(method='restyle',
                        label= 'All regions',
                        visible=True,
                        args=[{'y':[sales_region.values],
                               'x':[sales_region.index],
                               'type':'bar'}, [0]],))
              
for i in data.region.unique():
    df=data[data.region==i].groupby('sub_category').sales.sum()/data[data.region==i].sales.sum()*100
    df.sort_values(ascending=False,inplace=True)
    buttons.append(dict(method='restyle',
                        label= i,
                        visible=True,
                        args=[{'y':[df.values],
                               'x':[df.index],
                               'type':'bar'}, [0]],))

#format button
updatemenus = list([
    dict(buttons=buttons,
            direction="down",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.05,
            xanchor="left",
            y=1.06,
            yanchor="top")])

# add dropdown menus   
fig.update_layout(showlegend=True, updatemenus=updatemenus)

fig.update_layout(
    annotations=[
        dict(text="Region", x=0.01, xref="paper", y=1.04, yref="paper",
                             align="left", showarrow=False)])

fig.show()
#pio.show(fig,renderer='browser')

In [None]:
#Top 10 products by sales value
data.groupby('product_name').sales.sum().sort_values().reset_index().tail(10)

##Visualize data
plt.figure(figsize=(15,6))
sns.barplot(x=data.groupby('product_name').sales.sum().sort_values(ascending=False).index[0:10],
            y=data.groupby('product_name').sales.sum().sort_values(ascending=False).values[0:10])
plt.xlabel('Product')
plt.ylabel('Sales (USD)')
plt.xticks(rotation=90)
plt.show()

#Top 10 products by sales quantity
data.groupby('product_name').quantity.sum().sort_values().reset_index().tail(10)

In [None]:
#The most preferred shipping mode
data.groupby('ship_mode').order_id.nunique()

#visualize
plt.figure(figsize=(10,8.5))
sns.barplot(x=data.groupby('ship_mode').order_id.nunique().sort_values(ascending=False).index,
            y=data.groupby('ship_mode').order_id.nunique().sort_values(ascending=False).values)
plt.xlabel('Ship mode')
plt.ylabel('Number of orders')
plt.show()