In [5]:
from torch import Tensor
from torch import nn
import torch
from math import sqrt

class Linear:
    def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
        self.in_features = in_features
        self.out_features = out_features
        self.has_bias = bias
        bound = sqrt(1/in_features)
        self.weight = (torch.rand((out_features, in_features)) - 0.5)*bound*2
        self.weight.requires_grad = True
        if bias:
            self.bias = (torch.rand(out_features) - 0.5)*bound*2
            self.bias.requires_grad = True

    def __call__(self, x):
        output = self.weight @ x.T
        if self.has_bias:
            output += self.bias.unsqueeze(1)
        return output.T

In [24]:
from tensor import Tensor
from math import sqrt

class Linear2:
     
    def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
        self.in_features = in_features
        self.out_features = out_features
        self.has_bias = bias
        bound = sqrt(1/in_features)
        self.weight = Tensor.rand([out_features, in_features], -1, 1)*bound
        self.weight.init_grad()
        if bias:
            self.bias = Tensor.rand([out_features], -1, 1)*bound
            self.bias.init_grad()

    def __call__(self, x):
        output = self.weight @ x.t()
        if self.has_bias:
            output += self.bias.unsqueeze(1)
        return output.t()

In [54]:
import torch

def softmax(logits):
    exp_logits = torch.exp(logits) 
    return exp_logits / exp_logits.sum(axis=1, keepdim=True)

def one_hot(labels, num_classes):
    # Create a tensor of zeros with shape [len(labels), num_classes]
    # and scatter 1s according to label indices
    return torch.zeros(len(labels), num_classes).scatter_(1, labels.unsqueeze(1), 1.)

def cross_entropy_from_scratch(predictions, labels, num_classes):
    """
    predictions: Tensor of model predictions (logits), shape (N, M)
    labels: Ground truth labels, shape (N,)
    num_classes: Number of classes in the dataset
    """
    # Convert predictions to probabilities
    probs = softmax(predictions)
    
    # Convert labels to one-hot encoding
    one_hot_labels = one_hot(labels, num_classes)
    
    # Calculate cross entropy loss
    loss = -torch.sum(one_hot_labels * torch.log(probs + 1e-9)) / predictions.shape[0]
    return loss

# Example usage
logits = torch.tensor([[2.0, 1.5, 0.1], [0.5, 2.0, 1.5]])
labels = torch.tensor([0, 2])  # Assuming class 0 and class 2 are correct
num_classes = 3

loss = cross_entropy_from_scratch(logits, labels, num_classes)
print("Cross Entropy Loss:", loss.item())


Cross Entropy Loss: 0.8336127996444702


In [49]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.optim as optim

# Step 1: Load the MNIST dataset
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Step 2: Define a simple neural network model
class Net():
    def __init__(self):
        self.fc1 = Linear(28*28, 10)
        self.fc2 = Linear(10, 10)

    def forward(self, x):
        x = x.view(-1, 28*28)
        # x = Tensor.from_torch(x)
        x = (self.fc1(x)).relu()
        x = self.fc2(x)

        # x = torch.Tensor(x.data).reshape(x.shape)
        return x
    
    def parameters(self):
        return [self.fc1.weight, self.fc1.bias, self.fc2.weight, self.fc2.bias]

    def __call__(self, x):
        return self.forward(x)

model = Net()

# Step 3: Define a loss function and optimizer
lr = 0.01

# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=lr)

params = model.parameters()

# Step 4: Train the model
for epoch in range(1):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        for p in params:
            p.requires_grad = True
            if p.grad != None:
                p.grad *= 0

        outputs = model(inputs)
        loss = cross_entropy_from_scratch(outputs, labels, 10)
        loss.backward()

        for p in params:
            p.requires_grad = False
            p -= p.grad * lr

        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(trainloader)}")

print('Finished Training')

# Step 5: Evaluate the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')


Epoch 1, Loss: 0.9223239532729456
Finished Training
Accuracy of the network on the 10000 test images: 86.4%
