In [None]:
%pip install xlrd openpyxl scikit-learn tqdm torchviz pandas plotly numpy nbformat tqdm torchvision

In [None]:
import pandas as pd
import sklearn as sk
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import tqdm as tqdm
import plotly.express as px
from torch.utils.data import DataLoader
import torch.amp as amp
import torchvision.models as models
from torch.profiler import profile, record_function, ProfilerActivity
import torch.multiprocessing as mp

np.version.full_version

In [None]:
hope_production_df = pd.read_excel('data.xlsx', sheet_name="HOPE PRODUCTION")
hope_storage_df = pd.read_excel('data.xlsx', sheet_name="HOPE STORAGE AFTER COOKING")

faith_production_df = pd.read_excel('data.xlsx', sheet_name="FAITH PRODUCTION")
faith_storage_df = pd.read_excel('data.xlsx', sheet_name="FAITH STORAGE AFTER COOKING")

hope_faith_df = pd.read_excel('data.xlsx', sheet_name="HOPE-FAITH PACKAGE WEIGHTS")
hope_faith_df.rename(columns={
    x: f"Sample {i + 1}" for i, x in 
    enumerate((x for x in hope_faith_df.columns if "Unnamed" in x))}, inplace=True)

hope_faith_df.columns

In [None]:
hope_df = hope_storage_df.merge(hope_production_df, right_on=["BATCH no.", "PRODUCTION DATE"], left_on=["BATCH no.", "BATCH INTO STORAGE"])
faith_df = faith_storage_df.merge(faith_production_df, right_on=["BATCH no.", "PRODUCTION DATE"], left_on=["BATCH no.", "BATCH INTO STORAGE"])
hope_df["PRODUCT"] = 5409
faith_df["PRODUCT"] = 5030

HOPE_SHELF_LIFE = 28
FAITH_SHELF_LIFE = 30

hope_df["ESTIMATED EXPIRY"] = hope_df["BATCH INTO STORAGE"] + pd.Timedelta(days=HOPE_SHELF_LIFE)
faith_df["ESTIMATED EXPIRY"] = faith_df["BATCH INTO STORAGE"] + pd.Timedelta(days=FAITH_SHELF_LIFE)

hope_pre_df = hope_df.merge(
        hope_faith_df, left_on=["ESTIMATED EXPIRY", "PRODUCT"], right_on=["EXPIRY DATE", "PRODUCT"]
    )
faith_pre_df = faith_df.merge(
        hope_faith_df, left_on=["ESTIMATED EXPIRY", "PRODUCT"], right_on=["EXPIRY DATE", "PRODUCT"]
    )


pre_df = pd.concat([hope_pre_df, faith_pre_df])
display(pre_df.columns)
# TODO: ONE PACKAGE HAS MORE THAN ONE PRODUCT
pre_df["PRODUCT AMOUNT EXPECTED"] = [1000 * row["BATCH WEIGHT (kg) AFTER COOKING"] / row["AVERAGE WEIGHT (g)"] for _, row in pre_df.iterrows()]

pre_df

In [5]:
INPUT_SIZE = 5
OUTPUT_SIZE = 1

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__(self)
        INTERNAL = 7
        self.l1 = nn.Linear(INPUT_SIZE, INTERNAL)
        self.relu = nn.ReLU()
        self.l2 = nn.Linear(INTERNAL, INTERNAL)
        self.lrelu = nn.LeakyReLU()
        self.l3 = nn.Linear(INTERNAL, OUTPUT_SIZE)
    
    def forward(self, x):
        return self.l3(self.lrelu(self.l2(self.relu(self.l1(x)))))

In [None]:
deviations = []

for i, row in pre_df.iterrows():
    cols = [col for col in pre_df.columns if "Sample" in col]
    ls = []
    for col in cols:
        v = row[col]
        if np.isnan(v):
            continue
        ls.append(v)
    deviations.append(np.std(ls))

len(deviations)

In [16]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
df = pd.DataFrame()

df['product'] = le.fit_transform(pre_df['PRODUCT'])

df['time_in_storage'] = [x / np.timedelta64(1, 'D') for x in (pre_df["BATCH OUT OF STORAGE"] - pre_df["BATCH INTO STORAGE"]).values]
df['weekday'] = [x.weekday() for x in pre_df["BATCH INTO STORAGE"]]
df['weekday'] = df['weekday'] / max(df['weekday'])
df['input_amount'] = preprocessing.MinMaxScaler().fit_transform(pre_df["BATCH WEIGHT (kg) BEFORE COOKING"].values.reshape(-1, 1))
df['cooking_out__storage_in'] = preprocessing.MinMaxScaler().fit_transform(pre_df["BATCH WEIGHT (kg) AFTER COOKING"].values.reshape(-1, 1))
df['storage_out__packaging_in'] = preprocessing.MinMaxScaler().fit_transform(pre_df["BATCH WEIGHT LEAVING STORAGE (KG)"].values.reshape(-1, 1))
df['product_amount_expected'] = preprocessing.MinMaxScaler().fit_transform(pre_df["PRODUCT AMOUNT EXPECTED"].values.reshape(-1, 1))
df['mean'] = preprocessing.MinMaxScaler().fit_transform(pre_df["AVERAGE WEIGHT (g)"].values.reshape(-1, 1))
df['stdev'] = deviations

df

Unnamed: 0,product,time_in_storage,weekday,input_amount,cooking_out__storage_in,storage_out__packaging_in,product_amount_expected,mean,stdev
0,1,0.0,0.500000,0.653935,0.610111,0.610111,0.752764,0.031817,3.952847
1,1,0.0,0.500000,0.653935,0.610111,0.610111,0.759485,0.013522,3.160888
2,1,0.0,0.500000,0.655252,0.609163,0.609163,0.751597,0.031817,3.952847
3,1,0.0,0.500000,0.655252,0.609163,0.609163,0.758308,0.013522,3.160888
4,1,0.0,0.500000,0.656569,0.606951,0.606951,0.748874,0.031817,3.952847
...,...,...,...,...,...,...,...,...,...
34714,0,3.0,0.666667,0.828449,0.812954,0.812954,0.695072,0.945912,3.673703
34715,0,3.0,0.666667,0.818900,0.807899,0.807899,0.699840,0.907254,6.415238
34716,0,3.0,0.666667,0.818900,0.807899,0.807899,0.690750,0.945912,3.673703
34717,0,2.0,0.666667,0.540665,0.527330,0.527330,0.456813,0.907254,6.415238


In [8]:
class Data(torch.utils.data.Dataset):
    def __init__(self):
        self.data = df
        self.target = ["input_amount","cooking_out__storage_in","storage_out__packaging_in"] 
        
        x_data, y_data = [], []
        for _, row in self.data.iterrows():
            x_row, y_row = [], []
            for k, v in row.items():
                if k in self.target:
                    y_row.append(v)
                else:
                    x_row.append(v)
            x_data.append(x_row)
            y_data.append(y_row)

        self.x_tensor = torch.tensor(x_data, dtype=torch.float32, device='cuda')
        self.y_tensor = torch.tensor(y_data, dtype=torch.float32, device='cuda')
    def __len__(self) -> int:
        return len(self.data)

    def __getitem__(self, index) -> tuple[torch.Tensor, torch.Tensor]:
        return self.x_tensor[index], self.y_tensor[index]
        
torch.backends.cudnn.benchmark = True

data = Data()

def loader(data):
    return DataLoader(
        data,
        batch_size=64,
        shuffle=True
    )

In [None]:
INPUT_SIZE = len(data[0][0])
OUTPUT_SIZE = len(data[0][1])
INTERNAL = 16

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.l1 = nn.Linear(INPUT_SIZE, INTERNAL)
        self.lrelu = nn.LeakyReLU()
        self.l2 = nn.Linear(INTERNAL, INTERNAL)
        self.relu = nn.ReLU()
        self.l3 = nn.Linear(INTERNAL, INTERNAL)
        self.silu = nn.SiLU()
        self.l4 = nn.Linear(INTERNAL, INTERNAL)
        self.relu2 = nn.ReLU()
        self.l5 = nn.Linear(INTERNAL, OUTPUT_SIZE)
    
    def forward(self, x):
        return self.l5(self.relu2(self.l4(self.silu(self.l3(self.relu(self.l2((self.lrelu(self.l1(x))))))))))

INPUT_SIZE, INTERNAL, INTERNAL, INTERNAL, OUTPUT_SIZE

In [None]:
train_dataset, test_dataset = torch.utils.data.random_split(data, [.8, .2])
train_dataset = loader(train_dataset)
display(train_dataset)
display(test_dataset)

In [None]:
device = torch.device('cuda')
device.type

In [12]:
model = Net().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scaler = torch.amp.GradScaler(device.type)
epochs = 22  

In [None]:
for i in tqdm.tqdm(range(epochs)):
    for j, (x_train, y_train) in enumerate(train_dataset):
        with torch.amp.autocast(device_type='cuda'):
            y_pred = model(x_train)
            cost = criterion(y_pred, y_train)
        
        optimizer.zero_grad(set_to_none=True)
        scaler.scale(cost).backward()
        scaler.step(optimizer)
        scaler.update()
    if i % 10 == 0:
        torch.save(model, f"out2/model-{i}.pt")

torch.save(model, "model.pt")

In [None]:
model = torch.load('model.pt')
model.eval()

losses = []
with torch.no_grad():
    for i, (x, y) in enumerate(test_dataset):
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x).to(device)
        loss = criterion(y_pred, y)
        losses.append(loss.cpu())

loss_df = pd.DataFrame(dict(losses=losses))

fig = px.scatter(loss_df, x="losses")
fig.show()

In [None]:
sum([1 for l in losses if l > 0.1]) / len(losses)