# Data Cleaning

In [66]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime as dt


Loading in the Datas

In [67]:
inbound_df = pd.read_csv("30June_EM_2025_HackathonData/Inbound.csv")
inventory_df = pd.read_csv("30June_EM_2025_HackathonData/Inventory.csv")
material_master_df = pd.read_csv("30June_EM_2025_HackathonData/MaterialMaster.csv")
outbound_df = pd.read_csv("30June_EM_2025_HackathonData/Outbound.csv")

In [68]:
print("Inventory DataFrame:")
print(inventory_df.head())
print("Inventory DataFrame:")
print("Inventory data columns:", inventory_df.columns.tolist())
print(f"Number of inventory data records: {len(inventory_df)}")

Inventory DataFrame:
  BALANCE_AS_OF_DATE       PLANT_NAME MATERIAL_NAME BATCH_NUMBER  \
0         12/31/2023  CHINA-WAREHOUSE      MAT-0045        SCRAP   
1         12/31/2023  CHINA-WAREHOUSE      MAT-0193         6024   
2         12/31/2023  CHINA-WAREHOUSE      MAT-0193         5755   
3         12/31/2023  CHINA-WAREHOUSE      MAT-0193         3142   
4         12/31/2023  CHINA-WAREHOUSE      MAT-0193         6734   

   UNRESRICTED_STOCK STOCK_UNIT  STOCK_SELL_VALUE CURRENCY  
0                164         KG               211      CNY  
1                  0         KG             87666      CNY  
2              70720         KG            414419      CNY  
3              12240         KG             71726      CNY  
4               2720         KG             15939      CNY  
Inventory DataFrame:
Inventory data columns: ['BALANCE_AS_OF_DATE', 'PLANT_NAME', 'MATERIAL_NAME', 'BATCH_NUMBER', 'UNRESRICTED_STOCK', 'STOCK_UNIT', 'STOCK_SELL_VALUE', 'CURRENCY']
Number of inventory da

In [69]:
# Display the first few rows of each DataFrame
print("Inbound DataFrame:")
print(inbound_df.head())
print("Inventory DataFrame:")
print("Inbound data columns:", inbound_df.columns.tolist())
print(f"Number of inbound data records: {len(inbound_df)}")

Inbound DataFrame:
  INBOUND_DATE           PLANT_NAME MATERIAL_NAME  NET_QUANTITY_MT
0   2023/12/15  SINGAPORE-WAREHOUSE      MAT-0354           23.375
1   2023/12/22  SINGAPORE-WAREHOUSE      MAT-0413            1.375
2   2023/12/22  SINGAPORE-WAREHOUSE      MAT-0413           23.375
3   2023/12/22  SINGAPORE-WAREHOUSE      MAT-0413           24.750
4   2023/12/22  SINGAPORE-WAREHOUSE      MAT-0413           19.250
Inventory DataFrame:
Inbound data columns: ['INBOUND_DATE', 'PLANT_NAME', 'MATERIAL_NAME', 'NET_QUANTITY_MT']
Number of inbound data records: 19593


In [70]:
print("Outbound DataFrame:")
print(outbound_df.head())
print("Outbound data columns:", outbound_df.columns.tolist())
print(f"Number of outbound data records: {len(outbound_df)}")

Outbound DataFrame:
  OUTBOUND_DATE       PLANT_NAME MODE_OF_TRANSPORT MATERIAL_NAME  \
0    2024/01/02  CHINA-WAREHOUSE             Truck      MAT-0013   
1    2024/01/02  CHINA-WAREHOUSE             Truck      MAT-0013   
2    2024/01/02  CHINA-WAREHOUSE             Truck      MAT-0268   
3    2024/01/02  CHINA-WAREHOUSE             Truck      MAT-0268   
4    2024/01/02  CHINA-WAREHOUSE             Truck      MAT-0268   

  CUSTOMER_NUMBER  NET_QUANTITY_MT  
0       CST-00001             25.5  
1       CST-00001             25.5  
2       CST-00002             25.5  
3       CST-00002             25.5  
4       CST-00002             25.5  
Outbound data columns: ['OUTBOUND_DATE', 'PLANT_NAME', 'MODE_OF_TRANSPORT', 'MATERIAL_NAME', 'CUSTOMER_NUMBER', 'NET_QUANTITY_MT']
Number of outbound data records: 26994


In [71]:
print("Material Master DataFrame:")
print(material_master_df.head())
print("Material Master data columns:", material_master_df.columns.tolist())
print(f"Number of Material Master data records: {len(material_master_df)}")

Material Master DataFrame:
  MATERIAL_NAME POLYMER_TYPE  SHELF_LIFE_IN_MONTH  \
0      MAT-0001        P-002                    3   
1      MAT-0002        P-001                    8   
2      MAT-0003        P-004                    2   
3      MAT-0004        P-002                    3   
4      MAT-0005        P-002                    5   

   DOWNGRADE_VALUE_LOST_PERCENT  
0                            40  
1                            15  
2                            35  
3                            35  
4                            20  
Material Master data columns: ['MATERIAL_NAME', 'POLYMER_TYPE', 'SHELF_LIFE_IN_MONTH', 'DOWNGRADE_VALUE_LOST_PERCENT']
Number of Material Master data records: 431


## Checking Null Values

In [72]:
print("Checking null values in Inbound DataFrame:")
print(inbound_df.isnull().sum())
print("Checking null values in Inventory DataFrame:")
print(inventory_df.isnull().sum())
print("Checking null values in Outbound DataFrame:")
print(outbound_df.isnull().sum())
print("Checking null values in Material Master DataFrame:")
print(material_master_df.isnull().sum())

Checking null values in Inbound DataFrame:
INBOUND_DATE       0
PLANT_NAME         0
MATERIAL_NAME      0
NET_QUANTITY_MT    0
dtype: int64
Checking null values in Inventory DataFrame:
BALANCE_AS_OF_DATE    0
PLANT_NAME            0
MATERIAL_NAME         0
BATCH_NUMBER          1
UNRESRICTED_STOCK     0
STOCK_UNIT            0
STOCK_SELL_VALUE      0
CURRENCY              0
dtype: int64
Checking null values in Outbound DataFrame:
OUTBOUND_DATE        0
PLANT_NAME           0
MODE_OF_TRANSPORT    0
MATERIAL_NAME        4
CUSTOMER_NUMBER      0
NET_QUANTITY_MT      0
dtype: int64
Checking null values in Material Master DataFrame:
MATERIAL_NAME                   0
POLYMER_TYPE                    0
SHELF_LIFE_IN_MONTH             0
DOWNGRADE_VALUE_LOST_PERCENT    0
dtype: int64


In [73]:
inventory_df = inventory_df.dropna()
outbound_df = outbound_df.dropna()
print("Checking null values in Inbound DataFrame:")
print(inbound_df.isnull().sum())
print("Checking null values in Inventory DataFrame:")
print(inventory_df.isnull().sum())
print("Checking null values in Outbound DataFrame:")
print(outbound_df.isnull().sum())
print("Checking null values in Material Master DataFrame:")
print(material_master_df.isnull().sum())

Checking null values in Inbound DataFrame:
INBOUND_DATE       0
PLANT_NAME         0
MATERIAL_NAME      0
NET_QUANTITY_MT    0
dtype: int64
Checking null values in Inventory DataFrame:
BALANCE_AS_OF_DATE    0
PLANT_NAME            0
MATERIAL_NAME         0
BATCH_NUMBER          0
UNRESRICTED_STOCK     0
STOCK_UNIT            0
STOCK_SELL_VALUE      0
CURRENCY              0
dtype: int64
Checking null values in Outbound DataFrame:
OUTBOUND_DATE        0
PLANT_NAME           0
MODE_OF_TRANSPORT    0
MATERIAL_NAME        0
CUSTOMER_NUMBER      0
NET_QUANTITY_MT      0
dtype: int64
Checking null values in Material Master DataFrame:
MATERIAL_NAME                   0
POLYMER_TYPE                    0
SHELF_LIFE_IN_MONTH             0
DOWNGRADE_VALUE_LOST_PERCENT    0
dtype: int64


In [74]:
print("\nPotential Outliers in Inventory:")
numeric_cols = inventory_df.select_dtypes(include='number').columns

for col in numeric_cols:
    Q1 = inventory_df[col].quantile(0.15)
    Q3 = inventory_df[col].quantile(0.85)
    IQR = Q3 - Q1

    outliers = inventory_df[
        (inventory_df[col] < Q1 - 1.5 * IQR) |
        (inventory_df[col] > Q3 + 1.5 * IQR)
    ]

    print(f"{col}: {len(outliers)} potential outliers")



Potential Outliers in Inventory:
UNRESRICTED_STOCK: 984 potential outliers
STOCK_SELL_VALUE: 2117 potential outliers


There are outliers, however we'll just think of those as genuine warehouse events for now

# Inventory Cleaning

Count number of scraps

In [75]:
scrap_count = (inventory_df['BATCH_NUMBER'] == 'SCRAP').sum()
print(f"Number of SCRAPS rows: {scrap_count}")

row_count = inventory_df.shape[0]
print(f"Total number of rows: {row_count}")

print("Percent of SCRAPS rows:", (scrap_count / row_count) * 100)

# just the unique currency codes
print(inventory_df['CURRENCY'].unique())

# or, if you also want counts of each
print(inventory_df['CURRENCY'].value_counts())







Number of SCRAPS rows: 262
Total number of rows: 28327
Percent of SCRAPS rows: 0.9249126275285063
['CNY' 'SGD']
CURRENCY
CNY    15510
SGD    12817
Name: count, dtype: int64


Change all currency to USD and then remove the currency column

In [76]:

fx_rates = {'SGD': 0.74, 'CNY': 0.14}
# 1) Copy your original
inventory_usd = inventory_df.copy()

# 2) Clean and convert on the copy
inventory_usd['STOCK_SELL_VALUE'] = (
    inventory_usd['STOCK_SELL_VALUE']
      .replace(r'[^\d\.]', '', regex=True)
      .astype(float)
)
inventory_usd['STOCK_SELL_USD'] = (
    inventory_usd['STOCK_SELL_VALUE']
    * inventory_usd['CURRENCY'].map(fx_rates)
)

inventory_usd.drop(columns=['CURRENCY', 'STOCK_SELL_VALUE'], inplace=True)
# inventory_df is still the old one; inventory_usd has your changes
inventory_usd



Unnamed: 0,BALANCE_AS_OF_DATE,PLANT_NAME,MATERIAL_NAME,BATCH_NUMBER,UNRESRICTED_STOCK,STOCK_UNIT,STOCK_SELL_USD
0,12/31/2023,CHINA-WAREHOUSE,MAT-0045,SCRAP,164,KG,29.54
1,12/31/2023,CHINA-WAREHOUSE,MAT-0193,6024,0,KG,12273.24
2,12/31/2023,CHINA-WAREHOUSE,MAT-0193,5755,70720,KG,58018.66
3,12/31/2023,CHINA-WAREHOUSE,MAT-0193,3142,12240,KG,10041.64
4,12/31/2023,CHINA-WAREHOUSE,MAT-0193,6734,2720,KG,2231.46
...,...,...,...,...,...,...,...
28323,12/31/2024,SINGAPORE-WAREHOUSE,MAT-0172,M2299A,74250,KG,15384.60
28324,12/31/2024,SINGAPORE-WAREHOUSE,MAT-0172,M3493A,15125,KG,3133.90
28325,12/31/2024,SINGAPORE-WAREHOUSE,MAT-0191,K1286,325,KG,456950.00
28326,12/31/2024,SINGAPORE-WAREHOUSE,MAT-0191,K8761,3000,KG,4218000.00


Inspecting SCRAP

In [77]:
scrap_df = inventory_df[
    inventory_df['BATCH_NUMBER'] == 'SCRAP'
]
scrap_df

Unnamed: 0,BALANCE_AS_OF_DATE,PLANT_NAME,MATERIAL_NAME,BATCH_NUMBER,UNRESRICTED_STOCK,STOCK_UNIT,STOCK_SELL_VALUE,CURRENCY
0,12/31/2023,CHINA-WAREHOUSE,MAT-0045,SCRAP,164,KG,211,CNY
5,12/31/2023,CHINA-WAREHOUSE,MAT-0311,SCRAP,4142,KG,25018,CNY
32,12/31/2023,CHINA-WAREHOUSE,MAT-0194,SCRAP,100,KG,314,CNY
33,12/31/2023,CHINA-WAREHOUSE,MAT-0257,SCRAP,5151,KG,10457,CNY
83,12/31/2023,CHINA-WAREHOUSE,MAT-0258,SCRAP,10048,KG,58879,CNY
...,...,...,...,...,...,...,...,...
14681,12/31/2024,CHINA-WAREHOUSE,MAT-0187,SCRAP,544,KG,506,CNY
14828,12/31/2024,CHINA-WAREHOUSE,MAT-0214,SCRAP,535,KG,3439,CNY
14834,12/31/2024,CHINA-WAREHOUSE,MAT-0300,SCRAP,1300,KG,4082,CNY
15269,12/31/2024,CHINA-WAREHOUSE,MAT-0280,SCRAP,11,KG,72,CNY


In [78]:
total_scrap_usd = inventory_usd[inventory_usd['BATCH_NUMBER'] == 'SCRAP']['STOCK_SELL_USD'].sum()

print(f"Total SCRAP sell value (USD): {total_scrap_usd:,.2f}")

total_not_scrap_usd = inventory_usd[inventory_usd['BATCH_NUMBER'] != 'SCRAP']['STOCK_SELL_USD'].sum()
print(f"Total non-SCRAP sell value (USD): {total_not_scrap_usd:,.2f}")

# Percentage of total inventory value that is SCRAP
scrap_percentage = (total_scrap_usd / (total_scrap_usd + total_not_scrap_usd) * 100) if (total_scrap_usd + total_not_scrap_usd) else 0
print(f"Percentage of total inventory value that is SCRAP: {scrap_percentage:.2f}%")




Total SCRAP sell value (USD): 590,259.18
Total non-SCRAP sell value (USD): 1,902,313,204.10
Percentage of total inventory value that is SCRAP: 0.03%


Since it's only 0.03 percent of the total value we can drop all rows with SCRAP

In [79]:
inventory_cleaned_usd = inventory_usd[
    inventory_usd['BATCH_NUMBER'] != 'SCRAP'
].copy()

inventory_cleaned_usd.reset_index(drop=True, inplace=True)
inventory_cleaned_usd.head()

print("Checking null values in cleaned Inventory DataFrame:")

print("----------------------------")
inventory_cleaned_usd = inventory_cleaned_usd.dropna(
    subset=['BATCH_NUMBER']
).reset_index(drop=True)

print(inventory_cleaned_usd.isnull().sum())


Checking null values in cleaned Inventory DataFrame:
----------------------------
BALANCE_AS_OF_DATE    0
PLANT_NAME            0
MATERIAL_NAME         0
BATCH_NUMBER          0
UNRESRICTED_STOCK     0
STOCK_UNIT            0
STOCK_SELL_USD        0
dtype: int64


## Clean Duplicate Rows

In [80]:
# duplicates = inventory_cleaned_usd.duplicated()
# print(f"Number of duplicate rows: {duplicates.sum()}")

# print(f"Number of rows before removing duplicates: {inventory_cleaned_usd.shape[0]}")

# inventory_cleaned_usd = inventory_cleaned_usd.drop_duplicates().reset_index(drop=True)
# print(f"Number of rows after removing duplicates: {inventory_cleaned_usd.shape[0]}")

In [81]:
inventory_cleaned_usd.head()

# Convert BALANCE_AS_OF_DATE to datetime if not already
inventory_cleaned_usd['BALANCE_AS_OF_DATE'] = pd.to_datetime(
    inventory_cleaned_usd['BALANCE_AS_OF_DATE'], errors='coerce'
)

inventory_cleaned_usd['month'] = (
    inventory_cleaned_usd['BALANCE_AS_OF_DATE']
    .dt.to_period('M')
    .astype(str)
)

# Quick check
inventory_cleaned_usd[['BALANCE_AS_OF_DATE','month']].head()


# 1) Keep only Singapore warehouse
sg_inventory = inventory_cleaned_usd[
    inventory_cleaned_usd['PLANT_NAME'] == 'SINGAPORE-WAREHOUSE'
].copy()

# 2) Aggregate unrestricted stock (in MT) by material and month
monthly_stock = (
    sg_inventory
      .groupby(['month','PLANT_NAME','MATERIAL_NAME'], as_index=False)
      .agg(InitialInv_MT=('UNRESRICTED_STOCK','sum'))
      .sort_values(['month','MATERIAL_NAME'])
)

# 3) Inspect the result
monthly_stock.head()

monthly_stock

Unnamed: 0,month,PLANT_NAME,MATERIAL_NAME,InitialInv_MT
0,2023-12,SINGAPORE-WAREHOUSE,MAT-0005,15000
1,2023-12,SINGAPORE-WAREHOUSE,MAT-0007,13400
2,2023-12,SINGAPORE-WAREHOUSE,MAT-0015,1007500
3,2023-12,SINGAPORE-WAREHOUSE,MAT-0022,6875
4,2023-12,SINGAPORE-WAREHOUSE,MAT-0028,19250
...,...,...,...,...
2230,2024-12,SINGAPORE-WAREHOUSE,MAT-0423,197900
2231,2024-12,SINGAPORE-WAREHOUSE,MAT-0424,630750
2232,2024-12,SINGAPORE-WAREHOUSE,MAT-0426,2397000
2233,2024-12,SINGAPORE-WAREHOUSE,MAT-0428,264950


In [82]:

forcast = pd.read_csv("30June_EM_2025_HackathonData/Monthly_Forecast_Summary.csv")
# 2) Prepare inbound events
sg_inb = (
    inbound_df[inbound_df['PLANT_NAME']=='SINGAPORE-WAREHOUSE']
    .assign(
        INBOUND_DATE=lambda df: pd.to_datetime(
            df['INBOUND_DATE'], format='%Y/%m/%d', errors='coerce'
        )
    )
    .sort_values(['MATERIAL_NAME','INBOUND_DATE'])
)

sg_inb.head()

sg_inb_daily = (
    sg_inb
    .groupby(['INBOUND_DATE','MATERIAL_NAME'], as_index=False)
    .agg(InboundQty_MT=('NET_QUANTITY_MT','sum'))
    .sort_values(['INBOUND_DATE','MATERIAL_NAME'])
)

sg_inb_daily.head()

Unnamed: 0,INBOUND_DATE,MATERIAL_NAME,InboundQty_MT
0,2023-11-07,MAT-0085,23.375
1,2023-11-07,MAT-0116,46.75
2,2023-11-07,MAT-0309,49.5
3,2023-11-13,MAT-0273,24.75
4,2023-11-13,MAT-0354,24.75


In [83]:
months = sg_inventory['month']

mats = sg_inventory['MATERIAL_NAME'].unique()


# cal_frames = []
# for pr in months:
#     period = pd.Period(pr, freq='M')
#     first = period.to_timestamp()
#     last  = period.to_timestamp('M')
#     dates = pd.date_range(first, last, freq='D')
#     for mat in mats:
#         cal_frames.append(pd.DataFrame({'date': dates, 'MATERIAL_NAME': mat}))
# calendar = pd.concat(cal_frames,ignore_index=True)
calendar = pd.read_csv("30June_EM_2025_HackathonData/daily_calendar.csv")

In [84]:
sg_inb_daily = sg_inb_daily.rename(
    columns={'INBOUND_DATE':'date'}
)

# Ensure 'date' column in calendar is datetime for merge
calendar['date'] = pd.to_datetime(calendar['date'], errors='coerce')

# 2) Merge calendar with inbound, filling missing with 0
calendar = (
    calendar
    .merge(
        sg_inb_daily[['date','MATERIAL_NAME','InboundQty_MT']],
        on=['date','MATERIAL_NAME'],
        how='left'
    )
    .fillna({'InboundQty_MT': 0})
)


# 4) Merge `calendar` with shelf life from material_master_df
calendar = calendar.merge(
    material_master_df[['MATERIAL_NAME','SHELF_LIFE_IN_MONTH']],
    on='MATERIAL_NAME',
    how='left'
)

calendar.head() 

Unnamed: 0,date,MATERIAL_NAME,InboundQty_MT_x,InboundQty_MT_y,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y
0,2023-12-01,MAT-0130,0.0,,8,0.0,8
1,2023-12-02,MAT-0130,0.0,,8,0.0,8
2,2023-12-03,MAT-0130,0.0,,8,0.0,8
3,2023-12-04,MAT-0130,0.0,,8,0.0,8
4,2023-12-05,MAT-0130,0.0,,8,0.0,8


In [85]:
# Convert 'Month' from "YYYY-MM" to month number 1–12
forcast['Month'] = (
    pd.to_datetime(forcast['Month'], format='%Y-%m', errors='coerce')
      .dt.month
)

# Quick check
forcast_sg = forcast[forcast['Warehouse'] == 'SINGAPORE']
forcast_sg.head()

Unnamed: 0,Warehouse,Month,Total_Cap_KT,Predicted_Outbound_KT,Predicted_Inventory_KT
0,SINGAPORE,1,53.5,8.71,39.581
1,SINGAPORE,2,53.5,10.95,36.763
2,SINGAPORE,3,53.5,9.36,35.049
3,SINGAPORE,4,53.5,7.52,35.663
4,SINGAPORE,5,53.5,10.74,36.374


In [86]:
calendar.drop(columns=["InboundQty_MT_x"], inplace=True)
calendar.drop(columns=["InboundQty_MT_y"], inplace=True)

In [87]:
calendar

Unnamed: 0,date,MATERIAL_NAME,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y
0,2023-12-01,MAT-0130,8,0.0,8
1,2023-12-02,MAT-0130,8,0.0,8
2,2023-12-03,MAT-0130,8,0.0,8
3,2023-12-04,MAT-0130,8,0.0,8
4,2023-12-05,MAT-0130,8,0.0,8
...,...,...,...,...,...
81922281,2024-11-12,MAT-0012,2,0.0,2
81922282,2024-11-13,MAT-0012,2,0.0,2
81922283,2024-11-14,MAT-0012,2,247.5,2
81922284,2024-11-15,MAT-0012,2,0.0,2


In [88]:
calendar['Month'] = calendar['date'].dt.month

In [89]:
calendar

Unnamed: 0,date,MATERIAL_NAME,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y,Month
0,2023-12-01,MAT-0130,8,0.0,8,12
1,2023-12-02,MAT-0130,8,0.0,8,12
2,2023-12-03,MAT-0130,8,0.0,8,12
3,2023-12-04,MAT-0130,8,0.0,8,12
4,2023-12-05,MAT-0130,8,0.0,8,12
...,...,...,...,...,...,...
81922281,2024-11-12,MAT-0012,2,0.0,2,11
81922282,2024-11-13,MAT-0012,2,0.0,2,11
81922283,2024-11-14,MAT-0012,2,247.5,2,11
81922284,2024-11-15,MAT-0012,2,0.0,2,11


In [90]:
calendar

Unnamed: 0,date,MATERIAL_NAME,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y,Month
0,2023-12-01,MAT-0130,8,0.0,8,12
1,2023-12-02,MAT-0130,8,0.0,8,12
2,2023-12-03,MAT-0130,8,0.0,8,12
3,2023-12-04,MAT-0130,8,0.0,8,12
4,2023-12-05,MAT-0130,8,0.0,8,12
...,...,...,...,...,...,...
81922281,2024-11-12,MAT-0012,2,0.0,2,11
81922282,2024-11-13,MAT-0012,2,0.0,2,11
81922283,2024-11-14,MAT-0012,2,247.5,2,11
81922284,2024-11-15,MAT-0012,2,0.0,2,11


In [91]:


forcast_sg = (
    pd.read_csv("30June_EM_2025_HackathonData/Monthly_Forecast_Summary.csv")
      .query("Warehouse=='SINGAPORE'")
      .assign(
          Month=lambda df: pd.to_datetime(df['Month'],
                                          format='%Y-%m',
                                          errors='coerce').dt.month,
          Predicted_Outbound_MT=lambda df: df['Predicted_Outbound_KT'] * 1000
      )
      [['Month','Predicted_Outbound_MT']]
)

forcast_sg.head()

# 3) calendar needs the same key
calendar['Month'] = calendar['date'].dt.month

# 4) merge
calendar = calendar.merge(
    forcast_sg,
    on='Month',
    how='left'
)

# calendar

In [92]:
calendar

Unnamed: 0,date,MATERIAL_NAME,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y,Month,Predicted_Outbound_MT
0,2023-12-01,MAT-0130,8,0.0,8,12,13970.0
1,2023-12-02,MAT-0130,8,0.0,8,12,13970.0
2,2023-12-03,MAT-0130,8,0.0,8,12,13970.0
3,2023-12-04,MAT-0130,8,0.0,8,12,13970.0
4,2023-12-05,MAT-0130,8,0.0,8,12,13970.0
...,...,...,...,...,...,...,...
81922281,2024-11-12,MAT-0012,2,0.0,2,11,16470.0
81922282,2024-11-13,MAT-0012,2,0.0,2,11,16470.0
81922283,2024-11-14,MAT-0012,2,247.5,2,11,16470.0
81922284,2024-11-15,MAT-0012,2,0.0,2,11,16470.0


In [None]:


# # 2) Flag last day of each month
calendar['is_last'] = calendar['date'].dt.is_month_end




In [94]:
calendar

Unnamed: 0,date,MATERIAL_NAME,SHELF_LIFE_IN_MONTH_x,InboundQty_MT,SHELF_LIFE_IN_MONTH_y,Month,Predicted_Outbound_MT,is_last
0,2023-12-01,MAT-0130,8,0.0,8,12,13970.0,False
1,2023-12-02,MAT-0130,8,0.0,8,12,13970.0,False
2,2023-12-03,MAT-0130,8,0.0,8,12,13970.0,False
3,2023-12-04,MAT-0130,8,0.0,8,12,13970.0,False
4,2023-12-05,MAT-0130,8,0.0,8,12,13970.0,False
...,...,...,...,...,...,...,...,...
81922281,2024-11-12,MAT-0012,2,0.0,2,11,16470.0,False
81922282,2024-11-13,MAT-0012,2,0.0,2,11,16470.0,False
81922283,2024-11-14,MAT-0012,2,247.5,2,11,16470.0,False
81922284,2024-11-15,MAT-0012,2,0.0,2,11,16470.0,False


KeyError: 'Predicted_Inventory_MT'

In [96]:
forcast_sg

Unnamed: 0,Month,Predicted_Outbound_MT
0,1,8710.0
1,2,10950.0
2,3,9360.0
3,4,7520.0
4,5,10740.0
5,6,9440.0
6,7,11380.0
7,8,11330.0
8,9,16700.0
9,10,20980.0
