## Define dataset

## Imports

In [None]:
!pip install gguf
!pip install safetensors

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

from gguf import GGUFWriter

from safetensors.torch import save_file, load_file
import torch

from collections import OrderedDict

import matplotlib.pyplot as plt

In [None]:
# Define the PyTorch model
class MNIST_MLP_Named(nn.Module):
    def __init__(self):
        super(MNIST_MLP_Named, self).__init__()
        self.model = nn.Sequential(
          OrderedDict([
                ("flatten",    nn.Flatten()),
                ("hidden1",   nn.Linear(28 * 28, 128)),
                ("relu1",      nn.ReLU()),
                ("dropout1",  nn.Dropout(0.2)),
                ("output",     nn.Linear(128, 10))
            ])
        )

    def forward(self, x):
        return self.model(x)



## Prepara dataset

In [None]:
# Prepare dataset and DataLoader
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)


## Training setup

In [None]:
# Training setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = MNIST_MLP_Named().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


### Train

In [None]:
# Training loop with evaluation
num_epochs = 5
train_losses, test_losses = [], []
train_accuracies, test_accuracies = [], []

for epoch in range(num_epochs):
    model.train()
    total_loss, correct = 0, 0
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()

    avg_loss = total_loss / len(train_loader)
    train_accuracy = correct / len(train_loader.dataset)
    train_losses.append(avg_loss)
    train_accuracies.append(train_accuracy)

    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()

    test_loss /= len(test_loader)
    test_accuracy = correct / len(test_loader.dataset)
    test_losses.append(test_loss)
    test_accuracies.append(test_accuracy)

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss:.4f}, Train Acc: {train_accuracy:.4f}, Test Loss: {test_loss:.4f}, Test Acc: {test_accuracy:.4f}")


## Plot

In [None]:
# Plotting the results
def plot_resuls(train_losses, test_losses, train_accuracies, test_accuracies):
  plt.figure(figsize=(10, 5))
  fig, axs = plt.subplots(2, 1, figsize=(10, 8))

  axs[0].plot(range(1, num_epochs + 1), train_losses, label='Train Loss')
  axs[0].plot(range(1, num_epochs + 1), test_losses, label='Test Loss')
  axs[0].set_xlabel('Epoch')
  axs[0].set_ylabel('Loss')
  axs[0].set_title('Loss over Epochs')
  axs[0].legend()

  axs[1].plot(range(1, num_epochs + 1), train_accuracies, label='Train Accuracy')
  axs[1].plot(range(1, num_epochs + 1), test_accuracies, label='Test Accuracy')
  axs[1].set_xlabel('Epoch')
  axs[1].set_ylabel('Accuracy')
  axs[1].set_title('Accuracy over Epochs')
  axs[1].legend()

  plt.tight_layout()
  plt.show()


## GGUF Export

In [None]:
def export_to_gguf(model, filename):
  writer = GGUFWriter(filename, "generic")
  for name, param in model.named_parameters():
    writer.add_tensor(name, param.detach().numpy())

  writer.write_header_to_file()
  writer.write_kv_data_to_file()
  writer.write_tensors_to_file()
  writer.close()

  print(f"Model saved as {filename}")


### Safetensors export

In [None]:
def save_safetensors(model, filename):
  tensors = {}
  for name, param in model.named_parameters():
    tensors[name] = param.detach()

  save_file(tensors, filename)
  print(f"Model saved as {filename}")

tensors = {}
for name, param in model.named_parameters():
  tensors[name] = param.detach()

save_file(tensors, "./mnist_mlp.safetensors")


# Now loading
loaded = load_file("./mnist_mlp.safetensors")

In [None]:
# prompt: Add code for inference with trainede model with shiwling of random loaded picture

import matplotlib.pyplot as plt
import random
from torchvision.utils import make_grid

# Set the model to evaluation mode
model.eval()

# Get a random index from the test dataset
random_index = random.randint(0, len(test_dataset) - 1)

# Load the image and its label
image, label = test_dataset[random_index]

# Add a batch dimension for the model
image_batch = image.unsqueeze(0).to(device)

# Perform inference
with torch.no_grad():
    output = model(image_batch)
    predicted_class = output.argmax(dim=1).item()

# Denormalize the image for display
denormalized_image = image * 0.3081 + 0.1307

# Display the image and the prediction
plt.imshow(denormalized_image.squeeze().cpu().numpy(), cmap='gray')
plt.title(f"True Label: {label}, Predicted: {predicted_class}")
plt.axis('off')
plt.show()