***Test trained pyTorch weights on my network***

The goal of this notebook was to see if my model was set up correctly by training a pyTorch model and then applying those weights to my model to see if I acheived the same result.

Spoiler: It did

In [None]:
import numpy as np
from Model import NeuralNet
PATH = "classification2.txt" #Data format: [feature_1, feature_2, label]

X,y = [], []
data_set = []
with open(PATH, "r") as file:
    for line in file:
        clean_line = line.strip()
        train_test = clean_line.split(",")
        
        data_set.append(np.array([float(s) for s in train_test]))

 
data_set = np.array(data_set)
data_set.shape

(118, 3)

In [2]:
np.random.seed(42)

data_set = np.random.permutation(data_set)


X, y = data_set[:,:-1], data_set[:,-1].reshape(-1,1)

train_test_split = 0.8
m_train = int(len(data_set)*train_test_split)
X_train, y_train = X[:m_train], y[:m_train]
X_test, y_test = X[m_train:], y[m_train:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape, data_set.shape

((94, 2), (94, 1), (24, 2), (24, 1), (118, 3))

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset

Xtr_t = torch.from_numpy(X_train.astype(np.float32)) #X_tra_t -> X_train_tensor
ytr_t = torch.from_numpy(y_train.astype(np.float32)).view(-1, 1)
Xte_t = torch.from_numpy(X_test.astype(np.float32))
yte_t = torch.from_numpy(y_test.astype(np.float32)).view(-1, 1)



train_loader = DataLoader(TensorDataset(Xtr_t, ytr_t), batch_size=32, shuffle=True)


In [4]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(2, 8),
            nn.Sigmoid(),
            nn.Linear(8,5),
            nn.Sigmoid(),
            nn.Linear(5, 1)   # logits (no sigmoid here)
        )
    def forward(self, x):
        return self.net(x)     # returns logits

def acc_from_logits(logits, targets):
    preds = (torch.sigmoid(logits) >= 0.5).float()
    return (preds.eq(targets)).float().mean()



In [5]:
torch.manual_seed(42)

model = Net()
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)


epochs = 1000
for epoch in range(epochs):
    model.train()
    for xb, yb in train_loader:
        optimizer.zero_grad()
        logits = model(xb)
        loss = criterion(logits, yb)
        loss.backward()
        optimizer.step()

    if epoch % 50 == 0:
        model.eval()
        with torch.inference_mode():
            tr_logits = model(Xtr_t)
            tr_loss = criterion(tr_logits, ytr_t).item()
            tr_acc  = acc_from_logits(tr_logits, ytr_t).item()

            te_logits = model(Xte_t)
            te_loss = criterion(te_logits, yte_t).item()
            te_acc  = acc_from_logits(te_logits, yte_t).item()

        print(f"Epoch {epoch:4d} | train loss {tr_loss:.4f} acc {tr_acc:.3f} | "
              f"test loss {te_loss:.4f} acc {te_acc:.3f}")

Epoch    0 | train loss 0.6945 acc 0.489 | test loss 0.6891 acc 0.583
Epoch   50 | train loss 0.6919 acc 0.511 | test loss 0.6981 acc 0.417
Epoch  100 | train loss 0.6826 acc 0.574 | test loss 0.6981 acc 0.625
Epoch  150 | train loss 0.6551 acc 0.585 | test loss 0.6821 acc 0.625
Epoch  200 | train loss 0.6019 acc 0.745 | test loss 0.6472 acc 0.583
Epoch  250 | train loss 0.5029 acc 0.809 | test loss 0.6101 acc 0.583
Epoch  300 | train loss 0.3681 acc 0.862 | test loss 0.5239 acc 0.833
Epoch  350 | train loss 0.3246 acc 0.872 | test loss 0.5442 acc 0.750
Epoch  400 | train loss 0.3050 acc 0.894 | test loss 0.5765 acc 0.750
Epoch  450 | train loss 0.2944 acc 0.883 | test loss 0.6112 acc 0.750
Epoch  500 | train loss 0.2861 acc 0.883 | test loss 0.6298 acc 0.750
Epoch  550 | train loss 0.2804 acc 0.883 | test loss 0.6501 acc 0.750
Epoch  600 | train loss 0.2755 acc 0.883 | test loss 0.6644 acc 0.750
Epoch  650 | train loss 0.2712 acc 0.883 | test loss 0.6731 acc 0.750
Epoch  700 | train l

In [None]:
NN_scratch = NeuralNet(n_inputs=2, hidden_layers=[8, 5], n_output=1)

def copy_torch_to_scratch(torch_model, scratch_model):
    
    linear_layers = [m for m in torch_model.modules() if isinstance(m, nn.Linear)]
    assert len(linear_layers) == len(scratch_model.W), "Architectures don't match."

    for l, layer in enumerate(linear_layers):
        W = layer.weight.detach().cpu().numpy().astype(np.float64)  # (out, in)
        b = layer.bias.detach().cpu().numpy().astype(np.float64)    # (out,)
        W_scratch = np.concatenate([b[:, None], W], axis=1)         # (out, in+1) -> [bias | weights]
        assert W_scratch.shape == scratch_model.W[l].shape, f"Shape mismatch at layer {l}"
        scratch_model.W[l][:] = W_scratch

In [7]:
def sigmoid(z): #activation function
    return 1/(1+np.exp(-z))

def sigmoid_derivative(activation_l):
    return activation_l*(1-activation_l)

In [8]:
def accuracy(pred, labels):

    comparison = (pred==labels)
    correct_pred = np.count_nonzero(comparison)
    return correct_pred/len(labels)

In [9]:
def test(NN: NeuralNet, X_test, y_test):
    loss = 0
    predictions = []
    for i in range(len(X_test)):
        pred = NN.forward_prop(X_test[i])[-1]
        loss += 1/len(X_test) * pred

        predictions.append(1 if pred >=0.5 else 0)
    acc = accuracy(np.array([predictions]).T,y_test)

    return loss, acc

In [10]:
copy_torch_to_scratch(model, NN_scratch)
test_loss, test_acc = test(NN_scratch, X_test, y_test)
print(test_loss, test_acc)

[[0.4689703]] 0.75
