In [1]:
# Import Libs

# General
import pandas as pd
import numpy as np

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Read the data
data_sub = pd.read_csv('/kaggle/input/sales-analysis/SalesKaggle3.csv')
data_sub.head()

Unnamed: 0,Order,File_Type,SKU_number,SoldFlag,SoldCount,MarketingType,ReleaseNumber,New_Release_Flag,StrengthFactor,PriceReg,ReleaseYear,ItemCount,LowUserPrice,LowNetPrice
0,2,Historical,1737127,0.0,0.0,D,15,1,682743.0,44.99,2015,8,28.97,31.84
1,3,Historical,3255963,0.0,0.0,D,7,1,1016014.0,24.81,2005,39,0.0,15.54
2,4,Historical,612701,0.0,0.0,D,0,0,340464.0,46.0,2013,34,30.19,27.97
3,6,Historical,115883,1.0,1.0,D,4,1,334011.0,100.0,2006,20,133.93,83.15
4,7,Historical,863939,1.0,1.0,D,2,1,1287938.0,121.95,2010,28,4.0,23.99


In [3]:
data_sub['File_Type'].value_counts()

Active        122921
Historical     75996
Name: File_Type, dtype: int64

In [4]:
# Function Defination
def ABC_segmentation(perc):
    """
    Create the 3 classes A, B, C based on
    quantity percentage (A-60%, B-25%, C-15%)
    """
    if perc > 0 and perc <0.6 :
        return 'A'
    elif perc >=0.6 and perc <0.85:
        return 'B'
    elif perc >=0.85:
        return 'C'

In [5]:
# Create the column of the additive cost per SKU
data_sub['AddCost'] = data_sub['PriceReg'] * data_sub['ItemCount']

# Order By Cumulative Cost
data_sub = data_sub.sort_values(by=['AddCost'], ascending= False)

# Create the column of the running CumCost of the cumulative cost per SKU
data_sub['RunCumCost'] = data_sub['AddCost'].cumsum()

# Create the column of the total sum
data_sub['TotSum'] = data_sub['AddCost'].sum()

# Create the column of the running percentage
data_sub['RunPerc'] = data_sub['RunCumCost']/data_sub['TotSum']

# Create the column of the class
data_sub['Class'] = data_sub['RunPerc'].apply(ABC_segmentation)

In [6]:
# Check the data
data_sub.head()

Unnamed: 0,Order,File_Type,SKU_number,SoldFlag,SoldCount,MarketingType,ReleaseNumber,New_Release_Flag,StrengthFactor,PriceReg,ReleaseYear,ItemCount,LowUserPrice,LowNetPrice,AddCost,RunCumCost,TotSum,RunPerc,Class
112548,121659,Active,923197,,,S,0,0,429479.6,2999.99,1997,121,3.72,5.4,362998.79,362998.79,792481525.9,0.000458,A
79160,88271,Active,622234,,,D,1,0,1403206.0,6182.67,1985,43,4.0,51.74,265854.81,628853.6,792481525.9,0.000794,A
685,693,Historical,145889,1.0,1.0,D,9,1,2338.0,244.6,2010,851,135.9,48.98,208154.6,837008.2,792481525.9,0.001056,A
601,609,Historical,435034,1.0,2.0,D,9,1,1746.0,281.8,2012,616,173.3,48.99,173588.8,1010597.0,792481525.9,0.001275,A
196941,206052,Active,538479,,,S,3,1,275843.6,2645.3,2009,65,4.0,72.05,171944.5,1182541.5,792481525.9,0.001492,A


In [7]:
# Total SKUs for each class
data_sub['Class'].value_counts()

C    101749
B     57170
A     39998
Name: Class, dtype: int64

In [8]:
# Total cost per class
print('Cost of Class A:', data_sub[data_sub['Class']=='A']['AddCost'].sum())
print('Cost of Class B:', data_sub[data_sub['Class']=='B']['AddCost'].sum())
print('Cost of Class C:', data_sub[data_sub['Class']=='C']['AddCost'].sum())

Cost of Class A: 475485669.73
Cost of Class B: 198121395.61
Cost of Class C: 118874460.56000002


In [9]:
# Percentage of total cost per class
print('Percentage of Cost of Class A:', data_sub[data_sub['Class']=='A']['AddCost'].sum()/data_sub['AddCost'].sum())
print('Percentage of Cost of Class B:', data_sub[data_sub['Class']=='B']['AddCost'].sum()/data_sub['AddCost'].sum())
print('Percentage of Cost of Class C:', data_sub[data_sub['Class']=='C']['AddCost'].sum()/data_sub['AddCost'].sum())

Percentage of Cost of Class A: 0.5999959042452171
Percentage of Cost of Class B: 0.25000127969544633
Percentage of Cost of Class C: 0.1500028160593365
