In [2]:
# 3_data_transformation.ipynb

import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:

# Load validated raw data
df = pd.read_csv("supply_chain_dataset.csv", parse_dates=["date"])

In [4]:

# Set index to date
df.set_index("date", inplace=True)

In [5]:

# Optional: resample to weekly (you can change to 'M' for monthly)
df = df.groupby(["SKU", pd.Grouper(freq="W")]).agg({
    "Number of products sold": "sum",
    "Revenue generated": "sum",
    "Price": "mean",
    "Stock levels": "mean",
    "Lead times": "mean",
    "Order quantities": "sum",
    "Shipping times": "mean",
    "Shipping costs": "mean",
    "Production volumes": "mean",
    "Manufacturing costs": "mean",
    "Defect rates": "mean",
    "Costs": "mean",
    "Product type": "first",
    "Availability": "first",
    "Customer demographics": "first",
    "Shipping carriers": "first",
    "Supplier name": "first",
    "Location": "first",
    "Transportation modes": "first",
    "Routes": "first"
}).reset_index()

In [6]:

# Encode categorical features
cat_cols = ["Product type", "Availability", "Customer demographics",
            "Shipping carriers", "Supplier name", "Location", 
            "Transportation modes", "Routes"]

In [7]:

le_dict = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    le_dict[col] = le  # Save for reverse mapping if needed

In [8]:

# Normalize numerical features (except date, SKU, target column)
num_cols = df.select_dtypes(include=['float64', 'int64']).columns.difference(['Number of products sold'])
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

In [10]:

# Save transformed data
df.to_csv("transformed_supply_chain.csv", index=False)
df.head()


Unnamed: 0,SKU,date,Number of products sold,Revenue generated,Price,Stock levels,Lead times,Order quantities,Shipping times,Shipping costs,...,Defect rates,Costs,Product type,Availability,Customer demographics,Shipping carriers,Supplier name,Location,Transportation modes,Routes
0,SKU_001,2022-01-02,21,-2.947519,-0.69059,2.359389,-1.43899,-2.650958,0.974828,-1.176731,...,-3.143542,-0.363953,1.241973,-0.931841,0.016498,-1.1334,1.18772,0.486528,-1.156173,-1.142696
1,SKU_001,2022-01-09,78,0.867567,0.630588,-0.405358,-0.847007,-0.780169,-0.32302,1.801566,...,-1.616511,-0.535918,1.241973,1.073145,-1.22085,0.076021,-0.039747,1.379631,1.311269,0.03824
2,SKU_001,2022-01-16,73,1.346456,1.413351,-0.874779,-0.156359,0.30691,1.840059,0.527652,...,-0.477751,-0.386537,-1.190996,-0.931841,1.253845,-1.1334,-1.267213,0.486528,1.311269,1.219176
3,SKU_001,2022-01-23,78,1.163567,1.105818,1.106451,-0.649679,-0.236629,-1.765072,-0.925504,...,0.305146,-0.484922,1.241973,1.073145,-1.22085,0.076021,1.18772,-1.299677,0.077548,0.03824
4,SKU_001,2022-01-30,66,0.864244,1.100155,-0.294906,0.928945,-0.223989,0.253801,0.591606,...,0.095441,-0.111748,1.241973,-0.931841,-1.22085,0.076021,1.18772,0.486528,-1.156173,-1.142696
