In [3]:
import os, sys
sys.path.append("../../../")

from src.core.module import Module, Linear, LayerNorm
from src.core.losses import CrossEntropy, BCE
from src.core.optim import Standard, AdamW
from src.core.tensor import Tensor
from src.utils.lr_scheduler import LRScheduler
import numpy as np
import time
from typing import List
# from src.tokenizer.tokenizer import Tokenizer
import pandas as pd

In [4]:
class Net(Module):
    def __init__(self):
        super().__init__()
        self.fc1 = self.linear(7, 28, name="fc1")
        self.fc2 = self.linear(28, 28, name="fc2")
        self.fc3 = self.linear(28, 28, name="fc3")
        self.fc4 = self.linear(28, 1, name="fc4")
        self.ln = self.layer_norm(axis=-1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.gelu(x)
        x = self.dropout(x, p=0.1)
        x = self.ln(x)
        x = self.fc2(x)
        x = self.gelu(x)
        x = self.dropout(x, p=0.1)
        x = self.ln(x)
        x = self.fc3(x)
        x = self.gelu(x)
        x = self.dropout(x, p=0.1)
        x = self.ln(x)
        x = self.fc4(x)
        return x
    
    def train(self, x: Tensor, y: Tensor, optimizer, num_epochs=100):
        for epoch in range(num_epochs):
            y_hat = self.forward(x)
            
            loss = BCE(y_hat, y)
            
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            if epoch % 10 == 0:

                print(f"Epoch {epoch}, Loss: {loss.data}")





df = pd.read_csv("../../../src/experiments/data.csv")
df['Quality'] = df['Quality'].apply(lambda x: 1 if x == "Good" else 0)
X = Tensor(np.array(df.drop('Quality', axis=1).values))[:128]
y = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[:128]

X_test = Tensor(np.array(df.drop('Quality', axis=1).values))[128:]
y_test = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[128:]

In [16]:
net = Net()

net._build(X.shape)
scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
optimizer = AdamW(net.parameters(), lr=scheduler, clip_norm=100.0)

net.train(X, y, optimizer, num_epochs=1000)
print(net.parameters())

net.save_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")

Epoch 0, Loss: 1.7659166828109965
Epoch 10, Loss: 1.6682216533710454
Epoch 20, Loss: 1.8326242163770514
Epoch 30, Loss: 1.7823999999996616
Epoch 40, Loss: 1.6287984907379416
Epoch 50, Loss: 1.5960633291754411
Epoch 60, Loss: 1.5847815133471612
Epoch 70, Loss: 1.5636408686151333
Epoch 80, Loss: 1.5888730730459724
Epoch 90, Loss: 1.4473616535424678
Epoch 100, Loss: 1.4458688576050571
Epoch 110, Loss: 1.387400520458249
Epoch 120, Loss: 1.3768331507310074
Epoch 130, Loss: 1.3332856234426433
Epoch 140, Loss: 1.2501292896116216
Epoch 150, Loss: 1.2925819962075695
Epoch 160, Loss: 1.1966336484401991
Epoch 170, Loss: 1.1765257637465465
Epoch 180, Loss: 1.166709367547036
Epoch 190, Loss: 1.087282584390387
Epoch 200, Loss: 1.0744981210188995
Epoch 210, Loss: 1.0294025282068908
Epoch 220, Loss: 0.8896441554509364
Epoch 230, Loss: 0.9252740108073634
Epoch 240, Loss: 0.8031220966447461
Epoch 250, Loss: 0.6691644509776359
Epoch 260, Loss: 0.6139324723886659
Epoch 270, Loss: 0.6249288939766477
Epoch 

In [5]:
new_net = Net()
print(new_net)
new_net._build(X.shape)
scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
optimizer = AdamW(new_net.parameters(), lr=scheduler, clip_norm=100.0)
new_net.load_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")
print(new_net.parameters())
new_net.train(X, y, optimizer, 1000)


Architecture:
  linear_0 (linear):
    fc1 (linear):
      linear_1_linear_1_fc1_weight: shape=(7, 28), dtype=float64
      linear_1_linear_1_fc1_bias: shape=(28,), dtype=float64
  linear_1 (linear):
    fc2 (linear):
      linear_2_linear_1_fc2_weight: shape=(28, 28), dtype=float64
      linear_2_linear_1_fc2_bias: shape=(28,), dtype=float64
  linear_2 (linear):
    fc3 (linear):
      linear_3_linear_1_fc3_weight: shape=(28, 28), dtype=float64
      linear_3_linear_1_fc3_bias: shape=(28,), dtype=float64
  linear_3 (linear):
    fc4 (linear):
      linear_4_linear_1_fc4_weight: shape=(28, 1), dtype=float64
      linear_4_linear_1_fc4_bias: shape=(1,), dtype=float64
  layer_norm_0 (layer_norm):
    None (layernorm):
{'linear_1_linear_1_fc1_weight': Tensor(data=[[-1.09860756e-01 -5.80649495e-01  2.44288594e-01  3.54645520e-01
  -3.57530385e-01  1.01892769e-01  2.11457267e-01  2.18879506e-01
   6.91494793e-02  3.49628985e-01  5.99101037e-02 -4.92275476e-01
   2.08297689e-02  1.12714246e-