Last time, linear regression was good enough to figure out where the dividing line runs. Now, what if there are multiple lines, or lines with wild curves? Time for gradient descent!

In [9]:
from torch.utils.data import Dataset, DataLoader
import torch as tc

class GradientDescender:
    def __init__(self,initial_dataset:Dataset):
        self.dataset = initial_dataset

NUMBER_OF_FEATURES = 4

class GradientDataset(Dataset):
    def __init__(self,data,labels):
        super().__init__()
        self.features:tc.Tensor = tc.tensor(data,dtype=tc.float32)
        self.labels:tc.Tensor = tc.tensor(labels,dtype=tc.float32)
    
    def __getitem__(self,index) -> tuple[tc.Tensor, tc.Tensor]:
        datapoint = self.features[index]
        datalabel = self.labels[index]
        return datapoint, datalabel

    def __len__(self) -> int:
        return self.labels.shape[0]

In [10]:
from numpy import array
data = array([[ 0.77, -1.14],
       [-0.33,  1.44],
       [ 0.91, -3.07],
       [-0.37, -1.91],
       [-0.63, -1.53],
       [ 0.39, -1.99],
       [-0.49, -2.74],
       [-0.68, -1.52],
       [-0.1 , -3.43],
       [-0.05, -1.95],
       [ 3.88,  0.65],
       [ 0.73,  2.97],
       [ 0.83,  3.94],
       [ 1.59,  1.25],
       [ 1.14,  3.91],
       [ 1.73,  2.8 ],
       [ 1.31,  1.85],
       [ 1.56,  3.85],
       [ 1.23,  2.54],
       [ 1.33,  2.03]])
labels = array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

dataset = GradientDataset(data,labels)

train_loader = DataLoader(dataset=dataset, batch_size=10, shuffle=True)

In [11]:

class LogisticRegression(tc.nn.Module):
    def __init__(self,feature_count:int):
        super().__init__() #Magic incantation to ensure it's a Module.
        self.l_linear:tc.nn.Linear = tc.nn.Linear(in_features=feature_count, out_features=1)

    def forward(self, x):
        logit = self.l_linear(x)
        probabilities = tc.sigmoid(logit)
        return probabilities


Time to do the training!

In [12]:
model = LogisticRegression(2)
#model() 
optimiser = tc.optim.SGD(model.parameters(),lr = 0.05)
epochs = 25 #Let's split the difference for now~ 
for epoch in range(epochs):
    model.train()
    for batch_id, (features,label) in enumerate(train_loader):
        out:tc.Tensor = model(features)

        entropy = tc.nn.functional.binary_cross_entropy(out,label.view(out.shape))
        optimiser.zero_grad()
        entropy.backward()
        optimiser.step()

        print(f"(e/b) {epoch}/{batch_id}: {entropy}",end="\r")
    print()



(e/b) 0/1: 1.2851252555847168
(e/b) 1/1: 0.9563752412796027
(e/b) 2/1: 0.8644943237304688
(e/b) 3/1: 0.5678653717041016
(e/b) 4/1: 0.6574111580848694
(e/b) 5/1: 0.4674594998359685
(e/b) 6/1: 0.36210691928863525
(e/b) 7/1: 0.48643189668655396
(e/b) 8/1: 0.46297580003738403
(e/b) 9/1: 0.3001677095890045
(e/b) 10/1: 0.35435402393341064
(e/b) 11/1: 0.2849898934364319
(e/b) 12/1: 0.21365562081336975
(e/b) 13/1: 0.20167095959186554
(e/b) 14/1: 0.20654626190662384
(e/b) 15/1: 0.23921974003314972
(e/b) 16/1: 0.32767659425735474
(e/b) 17/1: 0.22183279693126678
(e/b) 18/1: 0.17049723863601685
(e/b) 19/1: 0.1409577578306198
(e/b) 20/1: 0.12470986694097519
(e/b) 21/1: 0.17798857390880585
(e/b) 22/1: 0.10546517372131348
(e/b) 23/1: 0.15026587247848512
(e/b) 24/1: 0.12561306357383728


Accuracy function next!

In [None]:
def compute_accuracy(to_test:LogisticRegression, datasource:DataLoader) -> float: #assuming the accuracy will be some 1 to 0 number
    to_test.eval()
    correct, total = 0, 0
    for idx, (features, class_labels) in enumerate(datasource):
        with tc.no_grad():
            probs = to_test(features)
            prediction = tc.where(probs > 0.5,1,0)
            lab = class_labels.view(prediction.shape).to(prediction.dtype)
            correct_pick = prediction == lab
            correct += tc.sum(correct_pick)
            total += len(correct_pick)
    return float(correct) / float(total)

In [13]:
class DunderExample:
    def __init__(self):
        print("Called the _init_ dunder.")

    def __call__(self,value:int):
        print(f"Called _call_ dunder, with value {value}")
    
    def __getitem__(self,name):
        if isinstance(name,tuple):
            print(f"Called _getitem_ dunder, with coordinates {name}")
        else:
            print(f"Called _getitem_ dunder, with index {name}")

example = DunderExample()
example(1234)

example[9]
example[1,2]

Called the _init_ dunder.
Called _call_ dunder, with value 1234
Called _getitem_ dunder, with index 9
Called _getitem_ dunder, with coordinates (1, 2)
