In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 10)  # Input layer to Hidden layer
        self.fc2 = nn.Linear(10, 10)   # Hidden layer to Output layer

    def forward(self, x):
        x = F.relu(self.fc1(x.view(-1, 784)))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [3]:
# Data preprocessing
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [4]:
# Dataset loading
train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('data', train=False, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

In [5]:
# Training and testing functions
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [7]:
# Setup device, model, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

# Training loop
for epoch in range(1, 10):  # Adjust the number of epochs as needed
    train(model, device, train_loader, optimizer, epoch)
    #test(model, device, test_loader)



In [70]:
import numpy as np

# Function to save model weights and biases as binary files
def save_weights_and_biases(model):
    for name, parameter in model.named_parameters():
        #parameter.data.cpu().numpy()
        param_data = parameter.data.cpu().numpy().flatten()  # Move the tensor to CPU and convert to NumPy
        file_name = f"{name.replace('.', '_')}.bin"  # Replace dots in names with underscores for filenames
        param_data.tofile(file_name)
        print(f"Saved {file_name}")

In [21]:
save_weights_and_biases(model)

Saved fc1_weight.bin
Saved fc1_bias.bin
Saved fc2_weight.bin
Saved fc2_bias.bin


In [107]:
# Modify the dataset loading to not use DataLoader since we want to access a single image directly
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

test_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)

# Access the first image and its label
image, label = test_dataset[0]

# The image is already in tensor form, but we need to add a batch dimension at the beginning
image = image.unsqueeze(0)

# Assuming your model is named 'model' and is already loaded and trained
# and 'device' is defined as in your code

# Move image to the same device as the model
image = image.to(device)

# Perform the classification
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # No need to track gradients for validation/testing
    output = model(image)
    predicted_label = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability

print(f"Predicted Label: {predicted_label.item()}, Actual Label: {label}")

Predicted Label: 5, Actual Label: 5


In [62]:
def print_layer_output(module, input, output):
    print(output)

# Assuming 'model' is your instance of Net
handle = model.fc1.register_forward_hook(print_layer_output)

# Now, when you perform a forward pass, the output of fc1 will be printed:
# For example, using the first image from the MNIST dataset as shown in the previous code snippet.
model.eval()
with torch.no_grad():
    _ = model(image)

# Don't forget to remove the hook when you're done to avoid altering the behavior unintentionally
handle.remove()

tensor([[-3.1762,  4.4226,  0.6379,  6.9450,  8.1929,  3.7955,  0.8442,  1.4294,
         -4.7221,  9.6552]])


In [64]:
for name, parameter in model.named_parameters():
    if name == 'fc1.weight':
        param_data = parameter.data.cpu().numpy()  # Move the tensor to CPU and convert to NumPy
        print(param_data.shape)
    if name == 'fc1.bias':
        param_bias = parameter.data.cpu().numpy()  # Move the tensor to CPU and convert to NumPy
        print(param_bias.shape)

(10, 784)
(10,)


In [109]:
image.shape

torch.Size([1, 1, 28, 28])

In [110]:
flattened_tensor = image.view(-1)

print(flattened_tensor.size())  # Should print torch.Size([784])

torch.Size([784])


In [111]:
# Convert tensor to numpy array
flattened_array = flattened_tensor.numpy()

initial_index = 9*16
final_index = 10*16
# Perform matrix multiplication
result = param_data.flatten() @ flattened_array[:28]
#result = param_data[:,initial_index:final_index] @ flattened_array[initial_index:final_index] #+ param_bias
print(result)

[ 0.23367587  0.29422194  0.02999098 -0.07587945  0.01631195  0.15406579
  0.35718498  0.05255867  0.26541394 -0.24861467]


In [103]:
param_data[:,initial_index:final_index]

array([[-0.00917219, -0.01012454,  0.00575211, -0.03331763, -0.00510417,
        -0.03795844, -0.01233142, -0.10264165, -0.04232003, -0.05728726,
        -0.0186581 ,  0.00743315, -0.0058446 ,  0.01992852,  0.05828208,
         0.07711641],
       [-0.04916483, -0.03351561, -0.06335033, -0.01391312,  0.00356992,
        -0.01903719, -0.02954159, -0.05425746, -0.09002895, -0.02478559,
        -0.04109292, -0.02898013,  0.02363174,  0.03888603,  0.02864696,
        -0.00698481],
       [ 0.02705055,  0.01471456,  0.0073227 , -0.00647504,  0.01133923,
         0.0080873 , -0.00570219, -0.04784397, -0.03196152, -0.0430212 ,
         0.01950537, -0.01283287,  0.02113139, -0.01420124,  0.00347038,
        -0.01650197],
       [-0.00240677,  0.04210202,  0.04077978,  0.01077317,  0.04211942,
         0.02006197,  0.03079542,  0.02601452,  0.06147995,  0.05401466,
         0.00661837,  0.01165761,  0.06208581, -0.01045334, -0.00507818,
        -0.00781497],
       [-0.00411734, -0.02221525, -0

In [106]:
flattened_array[initial_index:final_index]

array([-0.42421296, -0.42421296, -0.42421296, -0.42421296, -0.42421296,
       -0.42421296, -0.42421296, -0.42421296, -0.42421296, -0.42421296,
       -0.42421296, -0.42421296, -0.42421296, -0.42421296, -0.42421296,
       -0.42421296], dtype=float32)

In [79]:
param_data.flatten()[:140]

array([-0.0458191 ,  0.01698243,  0.01067879, -0.03650709, -0.04565034,
        0.00868056, -0.01806587, -0.00648912, -0.00593405,  0.02437885,
       -0.0340474 ,  0.01143396, -0.02999866, -0.03985174, -0.01715232,
       -0.03415582,  0.000643  , -0.03140581, -0.04144616,  0.02499945,
       -0.00829009, -0.01180074, -0.01798108, -0.02656857,  0.00809063,
        0.00032076, -0.04492859,  0.00624798,  0.0193358 , -0.03380025,
       -0.009534  ,  0.00291923, -0.0245493 , -0.01703419, -0.0416005 ,
        0.00504714,  0.00399562,  0.01194636,  0.02461998, -0.02956784,
       -0.01487653,  0.02863905,  0.00414832,  0.01333327, -0.01529653,
        0.01579291,  0.02066944, -0.01047433,  0.02820223, -0.0325526 ,
        0.0129239 , -0.03062511, -0.03449157, -0.01063934,  0.01013319,
        0.00465741, -0.01517251,  0.01529183, -0.00821066, -0.04391089,
        0.01755815,  0.0156984 ,  0.02072206,  0.0350907 , -0.00157402,
        0.01737464,  0.0420107 ,  0.02197047,  0.01803929,  0.04

In [None]:
param_data