<a href="https://colab.research.google.com/github/ratulb/pytorch/blob/main/mnist_in_pythor_from_scratch_cleaned_wip3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, random_split
import random
import numpy as np
import torch
import time


# Define a transform to convert the data to tensor
random.seed(24)
np.random.seed(42)
torch.manual_seed(24)

to_tensor = transforms.ToTensor()

# Load the MNIST dataset

In [None]:
# Download the training and test datasets
train_dataset = torchvision.datasets.MNIST(root='./', train=True, download=True, transform=to_tensor)
#test_dataset = torchvision.datasets.MNIST(root='./', train=False, download=True, transform=to_tensor)
train_ds, val_ds = random_split(train_dataset, [59000, 1000])
len(train_ds), len(val_ds)
#len(train_dataset)

In [None]:
batch_size = 4

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [None]:
def convert_to_list(dataset):
    images = []
    labels = []
    for img, label in dataset:
        # Convert the tensor image to a list
        img_list = img.squeeze().tolist()
        images.append(img_list)
        labels.append(label)
    return images, labels

In [None]:
def one_hot_encode(values, num_classes):
    """
    Convert a list of values into one-hot encoded format.
    Args:
        values (list of list): List containing the values to encode.
        num_classes (int): Number of classes for one-hot encoding.
    Returns:
        list of list: One-hot encoded representation of the input values.
    """
    one_hot_encoded = []
    for value in values:
        one_hot = [0] * num_classes
        one_hot[value[0]] = 1
        one_hot_encoded.append(one_hot)
    return one_hot_encoded

#values = [[1], [0], [2], [3]]
#num_classes = 4

#one_hot_encoded_values = one_hot_encode(values, num_classes)
#print(one_hot_encoded_values)


In [None]:
def forward_pass(input_batch, all_neurons_weights, biases):
    batch_output = []
    for inputs in input_batch:
        layer_output = []
        for weights, bias in zip(all_neurons_weights, biases):
            neuron_output = sum(x * w for x, w in zip(inputs, weights)) + bias
            layer_output.append(neuron_output)
        batch_output.append(layer_output)
    return batch_output

def calculate_errors(batch_output_of_forward_pass, target_batch):
    squared_errors = []
    residual_errors = []
    for output, target in zip(batch_output_of_forward_pass, target_batch):
        try:
            sample_squared_errors = [(o - t) ** 2 for o, t in zip(output, target)]
        except OverflowError as e:
            print("Overflow error.", e)
            sample_squared_errors = 0
        #sample_squared_errors = [(o - t) ** 2 for o, t in zip(output, target)]
        sample_residual_errors = [(o - t) for o, t in zip(output, target)]
        squared_errors.append(sample_squared_errors)
        residual_errors.append(sample_residual_errors)
    return squared_errors, residual_errors

def calculate_weight_and_bias_deltas(output_of_calculate_errors, input_batch, learning_rate):
    residual_errors = output_of_calculate_errors[1]
    weight_deltas = [[0 for _ in range(len(input_batch[0]))] for _ in range(len(residual_errors[0]))]
    bias_deltas = [0 for _ in range(len(residual_errors[0]))]

    for residual_error, inputs in zip(residual_errors, input_batch):
        for neuron_index in range(len(residual_error)):
            for feature_index in range(len(inputs)):
                weight_deltas[neuron_index][feature_index] += learning_rate * residual_error[neuron_index] * inputs[feature_index]
            bias_deltas[neuron_index] += learning_rate * residual_error[neuron_index]
    return weight_deltas, bias_deltas

def update_weights_biases(output_of_calculate_weight_and_bias_deltas, weights, biases):
    weight_deltas, bias_deltas = output_of_calculate_weight_and_bias_deltas
    for i in range(len(weights)):
        for j in range(len(weights[i])):
            weights[i][j] -= weight_deltas[i][j]
        biases[i] -= bias_deltas[i]
    return weights, biases

def batch_validation(weights, biases, batch):
    images, labels = batch
    images = [image.squeeze().tolist() for image in images]
    images = [[pixel for row in image for pixel in row] for image in images]
    outputs = forward_pass(images, weights, biases)
    outputs = torch.tensor([np.argmax(softmax(output)) for output in outputs])
    accuracy_and_counts = accuracy(outputs, labels)
    return accuracy_and_counts

def evaluate(weights, biases, val_loader, epoch=None):
    accum_acc_and_counts = [batch_validation(weights, biases, batch) for batch in val_loader]
    return epoch_end_validation(accum_acc_and_counts, epoch=None)

def epoch_end_validation(accum_acc_and_counts, epoch):
    accum_acc_and_counts = torch.stack(accum_acc_and_counts)
    acc, count, total = accum_acc_and_counts[:, 0].mean(), accum_acc_and_counts[:, 1].sum(), accum_acc_and_counts[:, 2].sum()
    print(f"Epoch {epoch}, Accuracy: {acc}, Count: {count}, total: {total}")


def accuracy(outputs, labels):
    count = torch.sum(outputs == labels).item()
    return torch.tensor([count/labels.numel(), count, labels.numel()])


def train(num_epochs, learning_rate):
    num_features = 784
    num_neurons = 10

    weights = [[random.random() for _ in range(num_features)] for _ in range(num_neurons)]
    biases = [random.random() for _ in range(num_neurons)]

    for epoch in range(num_epochs):
        start = time.time()
        for input_batch, target_batch in train_loader:
            input_batch =input_batch.squeeze().tolist()
            input_batch = [[item for sublist in outer for item in sublist] for outer in input_batch]
            target_batch = target_batch.tolist()
            target_batch = [[l] for l in target_batch]
            target_batch = one_hot_encode(target_batch, 10)
            batch_output = forward_pass(input_batch, weights, biases)
            errors = calculate_errors(batch_output, target_batch)
            deltas = calculate_weight_and_bias_deltas(errors, input_batch, learning_rate)
            weights, biases = update_weights_biases(deltas, weights, biases)
        end = time.time()

        if epoch % 2 == 0 or epoch % 2 == 1:
            total_error = sum(sum(e) for e in errors[0])
            print(f"Epoch {epoch+1} completed in {end - start} seconds")
            print(f"Epoch {epoch}, Error: {total_error}, weights: {weights}, Biases: {biases}")
            evaluate(weights, biases, val_loader, epoch)
    return weights, biases

num_epochs = 5
learning_rate = 0.00125


weights, biases = train(num_epochs, learning_rate)
print("Trained weights:", weights)
print("Trained biases:", biases)



In [None]:
import math

def softmax(x):
    # Compute the exponential of each element in the input list
    exp_x = [math.exp(i) for i in x]
    # Sum all the exponential values
    sum_exp_x = sum(exp_x)
    # Divide each exponential value by the sum of all exponential values
    softmax_x = [j / sum_exp_x for j in exp_x]
    return softmax_x

# Example usage
input_list = [2.0, 1.0, 0.1]
output_list = softmax(input_list)

print(output_list)


In [None]:
print(val_ds[4][0].shape, val_ds[4][1])
image_bytes = [val_ds[0][0].squeeze().tolist()]
flattened_bytes = [item for sublist1 in image_bytes for sublist2 in sublist1 for item in sublist2]

#weights =
#biases =

input = flattened_bytes
outputs = [0] * len(weights)  # Initialize outputs with the length of w

for index, pair in enumerate(zip(weights, biases)):
    for i in range(len(input)):
        outputs[index] += input[i] * pair[0][i] + pair[1]

print(outputs)
soft_maxed_outputs = softmax(outputs)
print(soft_maxed_outputs)
print(np.argmax(soft_maxed_outputs))




In [None]:
import random
import math
import time
import torch
import numpy as np

def one_hot_encode(values, num_classes):
    one_hot_encoded = []
    for value in values:
        one_hot = [0] * num_classes
        one_hot[value[0]] = 1
        one_hot_encoded.append(one_hot)
    return one_hot_encoded

def softmax(x):
    exp_x = [math.exp(i) for i in x]
    sum_exp_x = sum(exp_x)
    softmax_x = [j / sum_exp_x for j in exp_x]
    return softmax_x

def forward_pass(input_batch, all_neurons_weights, biases):
    batch_output = []
    for inputs in input_batch:
        layer_output = []
        for weights, bias in zip(all_neurons_weights, biases):
            neuron_output = sum(x * w for x, w in zip(inputs, weights)) + bias
            layer_output.append(neuron_output)
        batch_output.append(layer_output)
    return batch_output

def calculate_errors(batch_output_of_forward_pass, target_batch):
    squared_errors = []
    residual_errors = []
    for output, target in zip(batch_output_of_forward_pass, target_batch):
        sample_squared_errors = [(o - t) ** 2 for o, t in zip(output, target)]
        sample_residual_errors = [(o - t) for o, t in zip(output, target)]
        squared_errors.append(sample_squared_errors)
        residual_errors.append(sample_residual_errors)
    return squared_errors, residual_errors

def calculate_weight_and_bias_deltas(output_of_calculate_errors, input_batch, learning_rate):
    residual_errors = output_of_calculate_errors[1]
    weight_deltas = [[0 for _ in range(len(input_batch[0]))] for _ in range(len(residual_errors[0]))]
    bias_deltas = [0 for _ in range(len(residual_errors[0]))]

    for residual_error, inputs in zip(residual_errors, input_batch):
        for neuron_index in range(len(residual_error)):
            for feature_index in range(len(inputs)):
                weight_deltas[neuron_index][feature_index] += learning_rate * residual_error[neuron_index] * inputs[feature_index]
            bias_deltas[neuron_index] += learning_rate * residual_error[neuron_index]
    return weight_deltas, bias_deltas

def update_weights_biases(output_of_calculate_weight_and_bias_deltas, weights, biases):
    weight_deltas, bias_deltas = output_of_calculate_weight_and_bias_deltas
    for i in range(len(weights)):
        for j in range(len(weights[i])):
            weights[i][j] -= weight_deltas[i][j]
        biases[i] -= bias_deltas[i]
    return weights, biases

def xavier_init(num_features, num_neurons):
    limit = math.sqrt(6 / (num_features + num_neurons))
    return [[random.uniform(-limit, limit) for _ in range(num_features)] for _ in range(num_neurons)]

def train(num_epochs, learning_rate):
    num_features = 784
    num_neurons = 10

    weights = xavier_init(num_features, num_neurons)
    biases = [random.uniform(-math.sqrt(6 / (num_features + num_neurons)), math.sqrt(6 / (num_features + num_neurons))) for _ in range(num_neurons)]

    for epoch in range(num_epochs):
        start = time.time()
        for input_batch, target_batch in train_loader:
            input_batch = input_batch.squeeze().tolist()
            input_batch = [[item for sublist in outer for item in sublist] for outer in input_batch]
            target_batch = target_batch.tolist()
            target_batch = [[l] for l in target_batch]
            target_batch = one_hot_encode(target_batch, 10)
            batch_output = forward_pass(input_batch, weights, biases)
            errors = calculate_errors(batch_output, target_batch)
            deltas = calculate_weight_and_bias_deltas(errors, input_batch, learning_rate)
            weights, biases = update_weights_biases(deltas, weights, biases)
        end = time.time()

        if epoch % 2 == 0 or epoch % 2 == 1:
            total_error = sum(sum(e) for e in errors[0])
            print(f"Epoch {epoch+1} completed in {end - start} seconds")
            print(f"Epoch {epoch}, Error: {total_error}, weights: {weights}, Biases: {biases}")

    return weights, biases

num_epochs = 3
learning_rate = 0.00125

# Assume train_loader and val_ds are defined and properly set up

weights, biases = train(num_epochs, learning_rate)



In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
bytes_,lab = val_ds[211]
plt.imshow(bytes_[0], cmap='gray')
print("Label: ", lab)
squeezed = bytes_[0].squeeze()
image_bytes = [bytes_[0].squeeze().tolist()]
flattened_bytes = [item for sublist1 in image_bytes for sublist2 in sublist1 for item in sublist2]

input = flattened_bytes
outputs = [0] * len(weights)

for index, pair in enumerate(zip(weights, biases)):
    outputs[index] = sum(x * w for x, w in zip(input, pair[0])) + pair[1]

#print(outputs)
soft_maxed_outputs = softmax(outputs)
print(soft_maxed_outputs)
print(np.argmax(soft_maxed_outputs))


In [None]:
from torchvision.datasets import MNIST

In [None]:
dataset = MNIST(root='./', download=True)
len(dataset)
test_dataset = MNIST(root='./', train=False)
len(test_dataset)
dataset[0]

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
image, label = dataset[10]
plt.imshow(image, cmap='gray')
print('Label:', label)

In [None]:
# MNIST dataset (images and labels)
dataset = MNIST(root='./',
                train=True,
                transform=transforms.ToTensor())

In [None]:
img_tensor, label = dataset[0]
print(img_tensor.shape, label)

In [None]:
print(img_tensor[0,10:15,10:15])
print(torch.max(img_tensor), torch.min(img_tensor))

In [None]:
# Plot the image by passing in the 28x28 matrix
plt.imshow(img_tensor[0,10:15,10:15], cmap='gray');

In [None]:
from torch.utils.data import random_split

train_ds, val_ds = random_split(dataset, [50000, 10000])
len(train_ds), len(val_ds)

In [None]:
from torch.utils.data import DataLoader

batch_size = 128

train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [None]:
import torch.nn as nn

input_size = 28*28
num_classes = 10

# Logistic regression model
model = nn.Linear(input_size, num_classes)
print(model.weight.shape)
#model.weight

In [None]:
print(model.bias.shape)
model.bias

In [None]:
for images, labels in train_loader:
    #print(labels)
    #print(images.shape)
    outputs = model(images.reshape(-1, 784))
    print(outputs.shape)
    break

In [None]:
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)

    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out

In [None]:
model = MnistModel()

In [None]:
for images, labels in train_loader:
    #print(labels)
    #print(images.shape)
    outputs = model(images)

    print(outputs)
    break

In [None]:
print(model.linear.weight.shape)
model.parameters()

In [None]:
import torch.nn.functional as F

In [None]:
torch.sum(outputs[:1].data).item()

In [None]:
probs = F.softmax(outputs, dim=1)

In [None]:
torch.sum(probs[:1].data).item()

In [None]:
torch.argmax(probs[:1].data)

In [None]:
max_prob, label = torch.max(outputs[:1].data, dim=1)
max_prob, label

In [None]:
max_probs, preds = torch.max(probs, dim=1)
print(preds)
#print(max_probs)

In [None]:
torch.sum(labels == preds)/labels.shape[0]

In [None]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [None]:
torch.where(preds == labels)[0].shape[0]/128

In [None]:
#batch_validation(weights, biases, next(iter(val_loader)))
x = torch.tensor([3,3, 3, 9])
y = torch.tensor([3, 3, 4,9])
sum(x == y).item()/x.numel()

In [None]:
 #torch.tensor([np.argmax(softmax(output)) for output in [[1,2], [4,3]]])
stacked_up = torch.stack([torch.tensor([1.0,5,6,8]), torch.tensor([1.0,2,4,8]), torch.tensor([1,2,3,8])])
stacked_up[:, 3].sum(dim=0)

In [None]:
def xyz(x=None):
    if x is None:
        x = 1
    else:
        x = x + 100
    return x

xyz(9)