In [1]:
import os, sys
sys.path.append("../../../")

from src.core.module import Module, Linear, LayerNorm
from src.core.losses import CrossEntropy, BCE
from src.core.optim import Standard, AdamW
from src.core.tensor import Tensor
from src.utils.lr_scheduler import LRScheduler
import numpy as np
import time
from typing import List
from src.tokenizer.tokenizer import Tokenizer
import pandas as pd

In [3]:
class Net(Module):
    def __init__(self):
        super().__init__()
        self.fc1 = self.linear(7, 10, name="fc1")
        self.fc2 = self.linear(10,1, name="fc2")
        self.ln = self.layer_norm(axis=-1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.gelu(x)
        x = self.dropout(x, p=0.1)
        x = self.ln(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x
    
    def train(self, x: Tensor, y: Tensor, optimizer, num_epochs=100):
        for epoch in range(num_epochs):
            y_hat = self.forward(x)
            
            loss = BCE(y_hat, y)
            
            loss.backward()
            
            optimizer.step()
            optimizer.zero_grad()
            if epoch % 10 == 0:

                print(f"Epoch {epoch}, Loss: {loss.data}")

if __name__ == "__main__":
    df = pd.read_csv("../../../src/experiments/data.csv")
    df['Quality'] = df['Quality'].apply(lambda x: 1 if x == "Good" else 0)
    X = Tensor(np.array(df.drop('Quality', axis=1).values))[:128]
    y = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[:128]

    X_test = Tensor(np.array(df.drop('Quality', axis=1).values))[128:]
    y_test = Tensor(np.array(df['Quality'].values).reshape((-1, 1)))[128:]

    net = Net()

    net._build(X.shape)
    scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
    optimizer = AdamW(net.parameters(), lr=scheduler, clip_norm=100.0)

    net.train(X, y, optimizer)

    net.save_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")

    print(optimizer.params)




Epoch 0, Loss: 0.68994140625
Epoch 10, Loss: 0.68896484375
Epoch 20, Loss: 0.6884765625
Epoch 30, Loss: 0.68603515625
Epoch 40, Loss: 0.68359375
Epoch 50, Loss: 0.68115234375
Epoch 60, Loss: 0.677734375
Epoch 70, Loss: 0.673828125
Epoch 80, Loss: 0.6689453125
Epoch 90, Loss: 0.66357421875
{'linear_0_linear_weight': {'param': Tensor(data=[[ 0.002892]
 [-0.005096]
 [ 0.004   ]
 [-0.004284]
 [ 0.00989 ]
 [-0.02658 ]
 [ 0.0144  ]
 [-0.00733 ]
 [ 0.01363 ]
 [-0.00877 ]], shape=(10, 1), dtype=float16), 'm_t': array([[ 0.186  ],
       [-0.195  ],
       [-0.4458 ],
       [ 0.08704],
       [-0.06714],
       [ 0.4558 ],
       [-0.581  ],
       [ 0.2827 ],
       [-0.4736 ],
       [ 0.5054 ]], dtype=float16), 'v_t': array([[0.00582  ],
       [0.004368 ],
       [0.02089  ],
       [0.0008345],
       [0.0002098],
       [0.02014  ],
       [0.02596  ],
       [0.007084 ],
       [0.01639  ],
       [0.02171  ]], dtype=float16)}, 'linear_0_linear_bias': {'param': Tensor(data=[0.000373], s

In [5]:
new_net = Net()
new_net._build(X.shape)
scheduler = LRScheduler(warmup_steps=1000, total_steps=10000, min_lr=1e-5, max_lr=3e-4, final_lr=1e-6)
optimizer = AdamW(new_net.parameters(), lr=scheduler, clip_norm=100.0)
new_net.load_checkpoint(optimizer, "../../../checkpoints/simple_linear_model")
print(optimizer.params)
new_net.train(X, y, optimizer, 10000)


{'linear_0_linear_weight': {'param': Tensor(data=[[ 0.002892]
 [-0.005096]
 [ 0.004   ]
 [-0.004284]
 [ 0.00989 ]
 [-0.02658 ]
 [ 0.0144  ]
 [-0.00733 ]
 [ 0.01363 ]
 [-0.00877 ]], shape=(10, 1), dtype=float16), 'm_t': array([[ 0.186  ],
       [-0.195  ],
       [-0.4458 ],
       [ 0.08704],
       [-0.06714],
       [ 0.4558 ],
       [-0.581  ],
       [ 0.2827 ],
       [-0.4736 ],
       [ 0.5054 ]], dtype=float16), 'v_t': array([[0.00582  ],
       [0.004368 ],
       [0.02089  ],
       [0.0008345],
       [0.0002098],
       [0.02014  ],
       [0.02596  ],
       [0.007084 ],
       [0.01639  ],
       [0.02171  ]], dtype=float16)}, 'linear_0_linear_bias': {'param': Tensor(data=[0.000373], shape=(1,), dtype=float16), 'm_t': array([0.], dtype=float16), 'v_t': array([0.], dtype=float16)}, 'linear_0_layer_norm_gamma': {'param': Tensor(data=[0.9185 0.888  0.9805 1.     1.     1.     1.146  1.034  1.156  1.082 ], shape=(10,), dtype=float16), 'm_t': array([ 0.001157 ,  0.001412 , -