Módulo 01
# Previsão de Demanda e Otimização de Estoque

Objetivos:
- Redução do excesso de estoque em 20% e do índice de ruptura em 15%, dentro de 6 meses.

## Import Libraries

In [368]:
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.io as pio

from smart_supply_chain_ai.data_processing import get_data

import warnings
warnings.filterwarnings('ignore')

# Define plotly template
pio.templates.default = "plotly_white"

# Pandas show all columns
pd.set_option('display.max_columns', None)

### Get Data

In [369]:
# Paths
raw_data_path = os.path.join('../data', 'raw')
processed_data_path = os.path.join('../data', 'processed')

In [370]:
# link for data - [USER] [DATASET_NAME]
module_one = "salahuddinahmedshuvo/grocery-inventory-and-sales-dataset"
# Download Data and Unzip 
get_data.download_kaggle_dataset(module_one, raw_data_path)

Starting the download of dataset 'salahuddinahmedshuvo/grocery-inventory-and-sales-dataset' from Kaggle...
Dataset URL: https://www.kaggle.com/datasets/salahuddinahmedshuvo/grocery-inventory-and-sales-dataset


Download, unzipping, and cleanup complete! The dataset was saved to: ../data/raw


In [371]:
# Load data
df_raw = pd.read_csv(raw_data_path + '/Grocery_Inventory_and_Sales_Dataset.csv')

In [372]:
df_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 990 entries, 0 to 989
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Product_ID               990 non-null    object
 1   Product_Name             990 non-null    object
 2   Catagory                 989 non-null    object
 3   Supplier_ID              990 non-null    object
 4   Supplier_Name            990 non-null    object
 5   Stock_Quantity           990 non-null    int64 
 6   Reorder_Level            990 non-null    int64 
 7   Reorder_Quantity         990 non-null    int64 
 8   Unit_Price               990 non-null    object
 9   Date_Received            990 non-null    object
 10  Last_Order_Date          990 non-null    object
 11  Expiration_Date          990 non-null    object
 12  Warehouse_Location       990 non-null    object
 13  Sales_Volume             990 non-null    int64 
 14  Inventory_Turnover_Rate  990 non-null    i

    • Expiration_Date: A data de validade do produto.
    • Last_Order_Date: A última data em que o produto foi encomendado.
    • Warehouse_Location: O local de armazenamento do produto.
    • Sales_Volume: Volume total de unidades vendidas.
    • Inventory_Turnover_Rate: Taxa na qual o produto vende e é reabastecido.
    • Status: Status atual (por exemplo, Ativo, Descontinuado, Em falta).


In [373]:
# make a copy
df = df_raw.copy()

In [374]:
df.rename(columns={"Catagory": "Category"}, inplace=True)

In [375]:
df[df['Category'].isna()]

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Date_Received,Last_Order_Date,Expiration_Date,Warehouse_Location,Sales_Volume,Inventory_Turnover_Rate,Status
685,10-378-9729,Cabbage,,83-941-9620,Rooxo,69,21,68,$66.55,12/23/2024,11/26/2024,9/21/2024,2 Butterfield Pass,36,35,Discontinued


In [376]:
df.Category.unique()

array(['Grains & Pulses', 'Beverages', 'Fruits & Vegetables',
       'Oils & Fats', 'Dairy', 'Bakery', 'Seafood', nan], dtype=object)

### Missing Values

In [377]:
# I decided to categorize cabbage under 'Fruits & Vegetables' after researching online. 
df = df.fillna('Fruits & Vegetables')

### Types

In [378]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 990 entries, 0 to 989
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Product_ID               990 non-null    object
 1   Product_Name             990 non-null    object
 2   Category                 990 non-null    object
 3   Supplier_ID              990 non-null    object
 4   Supplier_Name            990 non-null    object
 5   Stock_Quantity           990 non-null    int64 
 6   Reorder_Level            990 non-null    int64 
 7   Reorder_Quantity         990 non-null    int64 
 8   Unit_Price               990 non-null    object
 9   Date_Received            990 non-null    object
 10  Last_Order_Date          990 non-null    object
 11  Expiration_Date          990 non-null    object
 12  Warehouse_Location       990 non-null    object
 13  Sales_Volume             990 non-null    int64 
 14  Inventory_Turnover_Rate  990 non-null    i

In [379]:
df.head()

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Date_Received,Last_Order_Date,Expiration_Date,Warehouse_Location,Sales_Volume,Inventory_Turnover_Rate,Status
0,29-205-1132,Sushi Rice,Grains & Pulses,38-037-1699,Jaxnation,22,72,70,$4.50,8/16/2024,6/29/2024,9/19/2024,48 Del Sol Trail,32,19,Discontinued
1,40-681-9981,Arabica Coffee,Beverages,54-470-2479,Feedmix,45,77,2,$20.00,11/1/2024,5/29/2024,5/8/2024,36 3rd Place,85,1,Discontinued
2,06-955-3428,Black Rice,Grains & Pulses,54-031-2945,Vinder,30,38,83,$6.00,8/3/2024,6/10/2024,9/22/2024,3296 Walton Court,31,34,Backordered
3,71-594-6552,Long Grain Rice,Grains & Pulses,63-492-7603,Brightbean,12,59,62,$1.50,12/8/2024,2/19/2025,4/17/2024,3 Westerfield Crossing,95,99,Active
4,57-437-1828,Plum,Fruits & Vegetables,54-226-4308,Topicstorm,37,30,74,$4.00,7/3/2024,10/11/2024,10/5/2024,15068 Scoville Court,62,25,Backordered


In [380]:
# Create category columns
cat_columns = ['Category', 'Status']
df[cat_columns] = df[cat_columns].astype('category')

In [381]:
# create date columns
date_columns = ['Date_Received', 'Last_Order_Date', 'Expiration_Date']
df[date_columns] = df[date_columns].apply(pd.to_datetime, errors='coerce')

In [382]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 990 entries, 0 to 989
Data columns (total 16 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Product_ID               990 non-null    object        
 1   Product_Name             990 non-null    object        
 2   Category                 990 non-null    category      
 3   Supplier_ID              990 non-null    object        
 4   Supplier_Name            990 non-null    object        
 5   Stock_Quantity           990 non-null    int64         
 6   Reorder_Level            990 non-null    int64         
 7   Reorder_Quantity         990 non-null    int64         
 8   Unit_Price               990 non-null    object        
 9   Date_Received            990 non-null    datetime64[ns]
 10  Last_Order_Date          990 non-null    datetime64[ns]
 11  Expiration_Date          990 non-null    datetime64[ns]
 12  Warehouse_Location       990 non-nul

In [383]:
# Remove "$" from price
df['Unit_Price'] = df['Unit_Price'].str.replace('$', '').astype('float')

In [384]:
df.head()

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Date_Received,Last_Order_Date,Expiration_Date,Warehouse_Location,Sales_Volume,Inventory_Turnover_Rate,Status
0,29-205-1132,Sushi Rice,Grains & Pulses,38-037-1699,Jaxnation,22,72,70,4.5,2024-08-16,2024-06-29,2024-09-19,48 Del Sol Trail,32,19,Discontinued
1,40-681-9981,Arabica Coffee,Beverages,54-470-2479,Feedmix,45,77,2,20.0,2024-11-01,2024-05-29,2024-05-08,36 3rd Place,85,1,Discontinued
2,06-955-3428,Black Rice,Grains & Pulses,54-031-2945,Vinder,30,38,83,6.0,2024-08-03,2024-06-10,2024-09-22,3296 Walton Court,31,34,Backordered
3,71-594-6552,Long Grain Rice,Grains & Pulses,63-492-7603,Brightbean,12,59,62,1.5,2024-12-08,2025-02-19,2024-04-17,3 Westerfield Crossing,95,99,Active
4,57-437-1828,Plum,Fruits & Vegetables,54-226-4308,Topicstorm,37,30,74,4.0,2024-07-03,2024-10-11,2024-10-05,15068 Scoville Court,62,25,Backordered


In [385]:
df['Product_ID'].duplicated().sum()

np.int64(0)

In [386]:
df['Supplier_ID'].duplicated().sum()

np.int64(0)

## EDA - Exploratory Data Analysis

In [387]:
# Numeric columns statistics
df.describe(exclude=['datetime', 'object', 'category'])

Unnamed: 0,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Sales_Volume,Inventory_Turnover_Rate
count,990.0,990.0,990.0,990.0,990.0,990.0
mean,55.609091,51.215152,51.913131,5.924192,58.925253,50.150505
std,26.300775,29.095241,29.521059,6.49128,23.002318,28.798954
min,10.0,1.0,1.0,0.2,20.0,1.0
25%,33.0,25.25,25.0,2.5,39.0,25.0
50%,56.0,53.0,54.0,4.225,58.0,50.0
75%,79.0,77.0,77.0,7.0,78.0,74.75
max,100.0,100.0,100.0,98.43,100.0,100.0


In [388]:
# Categorical columns statistics
df.describe(include=['category'])

Unnamed: 0,Category,Status
count,990,990
unique,7,3
top,Fruits & Vegetables,Discontinued
freq,332,333


In [389]:
# String columns statistics
df.describe(include=['object'])

Unnamed: 0,Product_ID,Product_Name,Supplier_ID,Supplier_Name,Warehouse_Location
count,990,990,990,990,990
unique,990,121,990,350,990
top,29-205-1132,Bread Flour,38-037-1699,Katz,48 Del Sol Trail
freq,1,19,1,12,1


In [390]:
# Date Range
print(f'Date Min value\n{df[['Date_Received', 'Last_Order_Date', 'Expiration_Date']].min()}')
print(30 * '-')
print(f'Date Max value\n{df[['Date_Received', 'Last_Order_Date', 'Expiration_Date']].max()}')

Date Min value
Date_Received     2024-02-25
Last_Order_Date   2024-02-25
Expiration_Date   2024-02-25
dtype: datetime64[ns]
------------------------------
Date Max value
Date_Received     2025-02-24
Last_Order_Date   2025-02-24
Expiration_Date   2025-02-24
dtype: datetime64[ns]


In [391]:
# create Sales Percent Total Value
perc_sales_volume = (df['Sales_Volume'] / (df['Sales_Volume'].sum())) * 100
df.insert(14, 'Sales_Volume(%)', value=perc_sales_volume)

In [392]:
# calculate Total Values
df.insert(9, 'Stock_Value', (df['Stock_Quantity'] * df['Unit_Price']))
df.insert(10, 'Reorder_Value', (df['Reorder_Quantity'] * df['Unit_Price']))

In [393]:
# create Stock Percent Total Value
perc_stock = (df['Stock_Value'] / (df['Stock_Value'].sum())) * 100
df.insert(10, 'Stock_Value(%)', value=perc_stock)

# create Reorder Percent Total Value
perc_reorder = (df['Reorder_Value'] / (df['Reorder_Value'].sum())) * 100
df.insert(12, 'Reorder_Value(%)', value=perc_reorder)

In [394]:
diff = df['Date_Received'] - df['Last_Order_Date']
df.insert(15, 'LeadTime(days)' , diff.dt.days)

In [395]:
# Status_Order
df['Purchase_Order'] = np.where(df['LeadTime(days)'] < 0, 'Active', 'Inactive')

In [396]:
# Expiration Date in days
days_exp = df['Expiration_Date'] - df['Date_Received']
df.insert(17, 'Days_For_Expiration', days_exp.dt.days)

In [397]:
df.insert(18, 'Expiration_Status', np.where(df['Days_For_Expiration'] < 0, 'Expired', 
         np.where(df['Days_For_Expiration'] < 30, 'Attention(<30d)', 'Good')))

In [398]:
df['Expiration_Status'] = df['Expiration_Status'].astype('category')
df['Purchase_Order'] = df['Purchase_Order'].astype('category')

In [399]:
df

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Stock_Value,Stock_Value(%),Reorder_Value,Reorder_Value(%),Date_Received,Last_Order_Date,LeadTime(days),Expiration_Date,Days_For_Expiration,Expiration_Status,Warehouse_Location,Sales_Volume,Sales_Volume(%),Inventory_Turnover_Rate,Status,Purchase_Order
0,29-205-1132,Sushi Rice,Grains & Pulses,38-037-1699,Jaxnation,22,72,70,4.5,99.0,0.029761,315.0,0.103115,2024-08-16,2024-06-29,48,2024-09-19,34,Good,48 Del Sol Trail,32,0.054855,19,Discontinued,Inactive
1,40-681-9981,Arabica Coffee,Beverages,54-470-2479,Feedmix,45,77,2,20.0,900.0,0.270551,40.0,0.013094,2024-11-01,2024-05-29,156,2024-05-08,-177,Expired,36 3rd Place,85,0.145708,1,Discontinued,Inactive
2,06-955-3428,Black Rice,Grains & Pulses,54-031-2945,Vinder,30,38,83,6.0,180.0,0.054110,498.0,0.163019,2024-08-03,2024-06-10,54,2024-09-22,50,Good,3296 Walton Court,31,0.053140,34,Backordered,Inactive
3,71-594-6552,Long Grain Rice,Grains & Pulses,63-492-7603,Brightbean,12,59,62,1.5,18.0,0.005411,93.0,0.030443,2024-12-08,2025-02-19,-73,2024-04-17,-235,Expired,3 Westerfield Crossing,95,0.162850,99,Active,Active
4,57-437-1828,Plum,Fruits & Vegetables,54-226-4308,Topicstorm,37,30,74,4.0,148.0,0.044491,296.0,0.096895,2024-07-03,2024-10-11,-100,2024-10-05,94,Good,15068 Scoville Court,62,0.106281,25,Backordered,Active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
985,82-977-7752,Spinach,Fruits & Vegetables,57-473-8672,Shuffledrive,88,78,17,2.5,220.0,0.066135,42.5,0.013912,2024-09-06,2024-12-28,-113,2024-11-04,59,Good,58 Corscot Terrace,58,0.099424,21,Active,Active
986,62-393-9939,Cheddar Cheese,Dairy,93-877-9384,Gabcube,60,9,89,9.0,540.0,0.162330,801.0,0.262206,2024-06-01,2024-06-02,-1,2024-10-05,126,Good,5 Oxford Pass,95,0.162850,63,Active,Active
987,31-745-6850,Cabbage,Fruits & Vegetables,96-215-2767,Lajo,94,90,12,0.9,84.6,0.025432,10.8,0.003535,2024-10-03,2024-10-24,-21,2024-11-01,29,Attention(<30d),081 Jana Lane,98,0.167992,71,Active,Active
988,86-692-2312,Avocado Oil,Oils & Fats,77-783-4107,Dazzlesphere,30,48,52,10.0,300.0,0.090184,520.0,0.170221,2024-06-11,2024-12-07,-179,2024-04-30,-42,Expired,00616 Manitowish Parkway,22,0.037713,78,Active,Active


In [400]:
df.info(max_cols=None)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 990 entries, 0 to 989
Data columns (total 25 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   Product_ID               990 non-null    object        
 1   Product_Name             990 non-null    object        
 2   Category                 990 non-null    category      
 3   Supplier_ID              990 non-null    object        
 4   Supplier_Name            990 non-null    object        
 5   Stock_Quantity           990 non-null    int64         
 6   Reorder_Level            990 non-null    int64         
 7   Reorder_Quantity         990 non-null    int64         
 8   Unit_Price               990 non-null    float64       
 9   Stock_Value              990 non-null    float64       
 10  Stock_Value(%)           990 non-null    float64       
 11  Reorder_Value            990 non-null    float64       
 12  Reorder_Value(%)         990 non-nul

In [401]:
df.groupby(['Category'])['Product_ID'].count()

Category
Bakery                  74
Beverages               75
Dairy                  180
Fruits & Vegetables    332
Grains & Pulses        162
Oils & Fats             77
Seafood                 90
Name: Product_ID, dtype: int64

In [402]:
df.groupby(['Category', 'Expiration_Status', 'Purchase_Order'])[['Sales_Volume', 'Stock_Value']].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sales_Volume,Stock_Value
Category,Expiration_Status,Purchase_Order,Unnamed: 3_level_1,Unnamed: 4_level_1
Bakery,Attention(<30d),Active,160,275.8
Bakery,Attention(<30d),Inactive,32,265.5
Bakery,Expired,Active,590,1394.95
Bakery,Expired,Inactive,1463,5991.25
Bakery,Good,Active,1645,6271.4
Bakery,Good,Inactive,674,2589.9
Beverages,Attention(<30d),Active,373,2500.0
Beverages,Attention(<30d),Inactive,36,900.0
Beverages,Expired,Active,830,11189.5
Beverages,Expired,Inactive,1647,24438.5


In [416]:
df.groupby(['Supplier_Name'])[['LeadTime(days)']].sum().sort_values(by='LeadTime(days)', ascending=False)

Unnamed: 0_level_0,LeadTime(days)
Supplier_Name,Unnamed: 1_level_1
Gigashots,938
Devpulse,747
Rhyloo,710
Trunyx,682
Shufflester,632
...,...
Eidel,-556
Quatz,-566
Mydeo,-578
BlogXS,-847


In [418]:
df[df['Supplier_Name'] == 'Gigashots']

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Stock_Value,Stock_Value(%),Reorder_Value,Reorder_Value(%),Date_Received,Last_Order_Date,LeadTime(days),Expiration_Date,Days_For_Expiration,Expiration_Status,Warehouse_Location,Sales_Volume,Sales_Volume(%),Inventory_Turnover_Rate,Status,Purchase_Order
258,19-214-5762,Pear,Fruits & Vegetables,88-108-3774,Gigashots,41,52,75,4.5,184.5,0.055463,337.5,0.11048,2024-04-06,2024-04-24,-18,2024-10-13,190,Good,7 Bonner Terrace,57,0.09771,62,Backordered,Active
332,65-145-9672,Long Grain Rice,Grains & Pulses,65-068-1200,Gigashots,71,10,49,1.5,106.5,0.032015,73.5,0.02406,2024-08-11,2024-03-16,148,2024-05-26,-77,Expired,8 Dovetail Junction,66,0.113138,43,Active,Inactive
539,93-015-0811,Pineapple,Fruits & Vegetables,07-055-5188,Gigashots,18,7,58,3.5,63.0,0.018939,203.0,0.066452,2024-11-26,2024-08-05,113,2024-06-26,-153,Expired,1 Pleasure Hill,88,0.15085,96,Discontinued,Inactive
752,85-835-3445,Egg (Goose),Dairy,13-433-4930,Gigashots,50,93,37,2.45,122.5,0.036825,90.65,0.029674,2025-01-11,2024-03-02,315,2024-05-12,-244,Expired,288 Fair Oaks Place,78,0.133708,79,Backordered,Inactive
814,10-555-5971,Sourdough Bread,Bakery,30-410-3509,Gigashots,94,56,80,4.0,376.0,0.11303,320.0,0.104751,2025-01-04,2024-12-02,33,2024-09-27,-99,Expired,8425 New Castle Parkway,47,0.080568,96,Backordered,Inactive
829,40-126-0515,Carrot,Fruits & Vegetables,78-379-0369,Gigashots,74,37,57,1.5,111.0,0.033368,85.5,0.027988,2025-02-14,2024-03-04,347,2024-12-18,-58,Expired,28351 Cascade Plaza,91,0.155993,18,Active,Inactive


In [420]:
df[~(df['Status'] == 'Discontinued')]

Unnamed: 0,Product_ID,Product_Name,Category,Supplier_ID,Supplier_Name,Stock_Quantity,Reorder_Level,Reorder_Quantity,Unit_Price,Stock_Value,Stock_Value(%),Reorder_Value,Reorder_Value(%),Date_Received,Last_Order_Date,LeadTime(days),Expiration_Date,Days_For_Expiration,Expiration_Status,Warehouse_Location,Sales_Volume,Sales_Volume(%),Inventory_Turnover_Rate,Status,Purchase_Order
2,06-955-3428,Black Rice,Grains & Pulses,54-031-2945,Vinder,30,38,83,6.0,180.0,0.054110,498.0,0.163019,2024-08-03,2024-06-10,54,2024-09-22,50,Good,3296 Walton Court,31,0.053140,34,Backordered,Inactive
3,71-594-6552,Long Grain Rice,Grains & Pulses,63-492-7603,Brightbean,12,59,62,1.5,18.0,0.005411,93.0,0.030443,2024-12-08,2025-02-19,-73,2024-04-17,-235,Expired,3 Westerfield Crossing,95,0.162850,99,Active,Active
4,57-437-1828,Plum,Fruits & Vegetables,54-226-4308,Topicstorm,37,30,74,4.0,148.0,0.044491,296.0,0.096895,2024-07-03,2024-10-11,-100,2024-10-05,94,Good,15068 Scoville Court,62,0.106281,25,Backordered,Active
6,71-516-1996,Corn Oil,Oils & Fats,04-391-7610,Tagfeed,96,52,16,2.5,240.0,0.072147,40.0,0.013094,2024-03-18,2024-05-07,-50,2024-06-20,94,Good,12 Truax Court,67,0.114852,13,Active,Active
8,66-268-8345,Greek Yogurt,Dairy,32-182-1895,Thoughtstorm,91,84,11,3.0,273.0,0.082067,33.0,0.010802,2024-12-04,2024-06-02,185,2025-01-08,35,Good,550 Clemons Plaza,56,0.095996,90,Active,Inactive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
984,53-805-9523,Sourdough Bread,Bakery,99-864-0256,Rhyloo,40,30,65,4.5,180.0,0.054110,292.5,0.095749,2024-07-02,2024-03-19,105,2024-05-14,-49,Expired,94 Mallory Center,54,0.092567,29,Active,Inactive
985,82-977-7752,Spinach,Fruits & Vegetables,57-473-8672,Shuffledrive,88,78,17,2.5,220.0,0.066135,42.5,0.013912,2024-09-06,2024-12-28,-113,2024-11-04,59,Good,58 Corscot Terrace,58,0.099424,21,Active,Active
986,62-393-9939,Cheddar Cheese,Dairy,93-877-9384,Gabcube,60,9,89,9.0,540.0,0.162330,801.0,0.262206,2024-06-01,2024-06-02,-1,2024-10-05,126,Good,5 Oxford Pass,95,0.162850,63,Active,Active
987,31-745-6850,Cabbage,Fruits & Vegetables,96-215-2767,Lajo,94,90,12,0.9,84.6,0.025432,10.8,0.003535,2024-10-03,2024-10-24,-21,2024-11-01,29,Attention(<30d),081 Jana Lane,98,0.167992,71,Active,Active
