In [2]:
import os, sys
sys.path.append("../../../")

from src.core.module import Module, Linear, LayerNorm
from src.core.losses import CrossEntropy, BCE
from src.core.optim import Standard, AdamW
from src.core.tensor import Tensor
from src.utils.lr_scheduler import LRScheduler
import numpy as np
import time
from typing import List
from src.tokenizer.tokenizer import Tokenizer
import pandas as pd

In [3]:
class Net(Module):
    def __init__(self):
        super().__init__()
        self.fc1 = self.linear(7, 1, name="fc1")
        self.ln = self.layer_norm(axis=-1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.gelu(x)
        x = self.dropout(x, p=0.1)
        x = self.ln(x)
        x = self.sigmoid(x)
        return x
    
    def train(self, x: Tensor, y: Tensor, optimizer, num_epochs=100):
        for epoch in range(num_epochs):
            y_hat = self.forward(x)
            
            loss = BCE(y_hat, y)
            
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            # if epoch % 10 == 0:

            #     print(f"Epoch {epoch}, Loss: {loss.data}")





df = pd.read_csv("../../../src/experiments/data.csv")
df['Quality'] = df['Quality'].apply(lambda x: 1 if x == "Good" else 0)
X = Tensor(np.array(df.drop('Quality', axis=1).values))[:128]
y = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[:128]

X_test = Tensor(np.array(df.drop('Quality', axis=1).values))[128:]
y_test = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[128:]

In [3]:
net = Net()

net._build(X.shape)
scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
optimizer = AdamW(net.parameters(), lr=scheduler, clip_norm=100.0)

net.train(X, y, optimizer, num_epochs=1000)
print(net.parameters())

net.save_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")

{'linear_1_linear_1_fc1_weight': Tensor(data=[[-0.3298]
 [ 0.3357]
 [ 0.3323]
 [-0.3281]
 [-0.3408]
 [ 0.3281]
 [ 0.3303]], shape=(7, 1), dtype=float16), 'linear_1_linear_1_fc1_bias': Tensor(data=[0.00462], shape=(1,), dtype=float16), 'layer_norm_1_layernorm_1_gamma': Tensor(data=[1.], shape=(1,), dtype=float16), 'layer_norm_1_layernorm_1_beta': Tensor(data=[0.3027], shape=(1,), dtype=float16)}


In [4]:
new_net = Net()
print(new_net)
new_net._build(X.shape)
scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
optimizer = AdamW(new_net.parameters(), lr=scheduler, clip_norm=100.0)
new_net.load_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")
print(new_net.parameters())
# new_net.train(X, y, optimizer, 1000)


Architecture:
  linear_0 (linear):
    fc1 (linear):
      linear_1_linear_1_fc1_weight: shape=(7, 1), dtype=float16
      linear_1_linear_1_fc1_bias: shape=(1,), dtype=float16
  layer_norm_0 (layer_norm):
    None (layernorm):
{'linear_1_linear_1_fc1_weight': Tensor(data=[[-0.3298]
 [ 0.3357]
 [ 0.3323]
 [-0.3281]
 [-0.3408]
 [ 0.3281]
 [ 0.3303]], shape=(7, 1), dtype=float16), 'linear_1_linear_1_fc1_bias': Tensor(data=[0.00462], shape=(1,), dtype=float16), 'layer_norm_1_layernorm_1_gamma': Tensor(data=[1.], shape=(1,), dtype=float16), 'layer_norm_1_layernorm_1_beta': Tensor(data=[0.3027], shape=(1,), dtype=float16)}
