In [33]:
# load necessary modules
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [34]:
# loading the sales data
sales = pd.read_csv('data/sales.csv')
sales.head()

Unnamed: 0,order_number,order_date,customer_number,type,month,item_number,quantity,category,revenue,customer_source,order_source
0,KE0001,2024-01-01,CKE0539,RETAIL,2024-01,KE0895,12,DIABETES,5.098618,direct,App
1,KE0001,2024-01-01,CKE0539,RETAIL,2024-01,KE1000,23,HYPERTENSIVES,12.460205,direct,App
2,KE0001,2024-01-01,CKE0539,RETAIL,2024-01,KE0921,12,HYPERTENSIVES,8.012194,direct,App
3,KE0002,2024-01-01,CKE0580,WHOLESALE,2024-01,KE0015,23,DIABETES,13.854194,agent,call
4,KE0002,2024-01-01,CKE0580,WHOLESALE,2024-01,KE0224,21,HYPERTENSIVES,10.534678,agent,call


In [35]:
# checking number of unique items
print(sales['item_number'].nunique())

1000


In [36]:
# check unique month values
print(sales['month'].unique())

['2024-01' '2024-02' '2024-03' '2024-04' '2024-05' '2024-06' '2024-07']


In [37]:
sales.shape

(61800, 11)

In [38]:
# keeping necessary feature order date, quantity and item number
sales = sales[['order_date', 'quantity', 'item_number']]
sales.head()

Unnamed: 0,order_date,quantity,item_number
0,2024-01-01,12,KE0895
1,2024-01-01,23,KE1000
2,2024-01-01,12,KE0921
3,2024-01-01,23,KE0015
4,2024-01-01,21,KE0224


In [39]:
# missing values check
sales.isnull().sum()	

order_date     0
quantity       0
item_number    0
dtype: int64

In [40]:
# preparing the data ready for time series analysis
# converting order_date to datetime format
sales['order_date'] = pd.to_datetime(sales['order_date'])
sales.dtypes


order_date     datetime64[ns]
quantity                int64
item_number            object
dtype: object

In [42]:
# setting order_date as index
sales.set_index('order_date', inplace=True)
sales.head()


Unnamed: 0_level_0,quantity,item_number
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01,12,KE0895
2024-01-01,23,KE1000
2024-01-01,12,KE0921
2024-01-01,23,KE0015
2024-01-01,21,KE0224


In [44]:
# extrtract week number from order_date
sales['week'] = sales.index.week
sales.head()

  sales['week'] = sales.index.week


Unnamed: 0_level_0,quantity,item_number,week
order_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-01,12,KE0895,1
2024-01-01,23,KE1000,1
2024-01-01,12,KE0921,1
2024-01-01,23,KE0015,1
2024-01-01,21,KE0224,1


In [None]:
print(sales['week'].unique())

[ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30]


In [None]:
# aggregating the quantity based on week item and date
sales = sales.groupby(['week', 'item_number']).sum()
sales.head()



Unnamed: 0_level_0,Unnamed: 1_level_0,quantity
week,item_number,Unnamed: 2_level_1
1,KE0001,5
1,KE0002,11
1,KE0003,16
1,KE0004,39
1,KE0006,17
