In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np

In [2]:
# Returns data tensors for images and labels (binary values 0 and 1)
def load_data(filepath='train.csv'):
    data = pd.read_csv(filepath)
    labels = data['label'].values
    pixels = data.drop('label', axis=1).values
    
    # Convert to binary (0 or 1)
    pixels = (pixels > 127).astype(np.float32)
    
    return torch.FloatTensor(pixels), torch.LongTensor(labels)

In [3]:
# Define the ScalableLinear layer without bias
class ScalableLinear(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.weight = nn.Parameter(torch.randn(out_features, in_features, dtype=torch.float32))  # Initialize weights

    def forward(self, x):
        return torch.mm(x, self.weight.t())

    def scale_weights(self, target_min, target_max):
        """Scale the weights of the layer to a desired integer range."""
        with torch.no_grad():
            # Get the min and max values of the layer's weights
            weight_min = self.weight.min()
            weight_max = self.weight.max()

            # Compute scaling factor
            scale = (target_max - target_min) / (weight_max - weight_min)
            zero_point = target_min - weight_min * scale

            # Apply scaling to weights
            quantized_weights = torch.round(self.weight * scale + zero_point)

            # Clip to the target range (make sure no value goes outside the desired range)
            quantized_weights = torch.clamp(quantized_weights, target_min, target_max)

            # Update weights with quantized values
            self.weight.data = quantized_weights


 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 
# Define the neural network with scalable layers
class ScalableNet(nn.Module):
    def __init__(self, input_size=784):
        super().__init__()
        self.layer1 = ScalableLinear(input_size, 64)
        self.layer2 = ScalableLinear(64, 64)
        self.layer3 = ScalableLinear(64, 32)
        self.layer4 = ScalableLinear(32, 10)

    def forward(self, x):
        x = self.layer1(x)
        x = torch.relu(x)
        x = self.layer2(x)
        x = torch.relu(x)
        x = self.layer3(x)
        x = torch.relu(x)
        x = self.layer4(x)
        return x

    # Helper function that scales weights directly
    def scale_weights(self, target_min, target_max):
        """Scale weights for all layers."""
        self.layer1.scale_weights(target_min, target_max)
        self.layer2.scale_weights(target_min, target_max)
        self.layer3.scale_weights(target_min, target_max)
        self.layer4.scale_weights(target_min, target_max)

In [4]:
# Entry point in training loop that scales our weights
def gradual_scale_weights(model, initial_target_min, initial_target_max, final_target_min, final_target_max, step_size, epoch, max_epochs):
    """
    Gradually scale the weights of each layer after each epoch.
    """
    # Compute the scaling range for this epoch based on the progress in training
    scale_min = initial_target_min + (final_target_min - initial_target_min) * (epoch / max_epochs)
    scale_max = initial_target_max + (final_target_max - initial_target_max) * (epoch / max_epochs)

    # Apply gradual scaling to each layer
    model.scale_weights(target_min=int(scale_min), target_max=int(scale_max))

In [21]:
# Load your data and preprocess it here
X_train, y_train = load_data()

# Train the model
model = ScalableNet()

In [22]:
epochs=30
batch_size = 128
initial_target_min= -32
initial_target_max= 31
final_target_min= -128
final_target_max= 127
step_size= 0.1
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
n_samples = X_train.shape[0]
n_batches = n_samples // batch_size

In [23]:
# Training loop
for epoch in range(epochs):
    total_loss = 0
    correct = 0
    
    for i in range(n_batches):
        start_idx = i * batch_size
        end_idx = start_idx + batch_size
        batch_X = X_train[start_idx:end_idx]
        batch_y = y_train[start_idx:end_idx]
        
        # Forward pass
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        # Backward pas
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == batch_y).sum().item()
    
    avg_loss = total_loss / n_samples
    accuracy = correct / n_samples
    print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')

    # Scale the weights using 
    # Gradual weight scaling after each epoch
    gradual_scale_weights(model, initial_target_min, initial_target_max, final_target_min, final_target_max, step_size, epoch, epochs)

Epoch [1/30], Loss: 0.4942, Accuracy: 0.3877
Epoch [2/30], Loss: 62686.3156, Accuracy: 0.6056
Epoch [3/30], Loss: 19606.2193, Accuracy: 0.7675
Epoch [4/30], Loss: 19917.6780, Accuracy: 0.8119
Epoch [5/30], Loss: 21908.1348, Accuracy: 0.8352
Epoch [6/30], Loss: 24577.3792, Accuracy: 0.8507
Epoch [7/30], Loss: 29386.0641, Accuracy: 0.8621
Epoch [8/30], Loss: 33226.9711, Accuracy: 0.8699
Epoch [9/30], Loss: 37997.2656, Accuracy: 0.8757
Epoch [10/30], Loss: 43004.6519, Accuracy: 0.8816
Epoch [11/30], Loss: 48877.4919, Accuracy: 0.8848
Epoch [12/30], Loss: 56584.4659, Accuracy: 0.8916
Epoch [13/30], Loss: 63764.6206, Accuracy: 0.8958
Epoch [14/30], Loss: 71814.4586, Accuracy: 0.8972
Epoch [15/30], Loss: 81569.3629, Accuracy: 0.8991
Epoch [16/30], Loss: 93376.0606, Accuracy: 0.9001
Epoch [17/30], Loss: 104841.4994, Accuracy: 0.9051
Epoch [18/30], Loss: 116829.9022, Accuracy: 0.9057
Epoch [19/30], Loss: 129064.4479, Accuracy: 0.9065
Epoch [20/30], Loss: 141041.0925, Accuracy: 0.9086
Epoch [21

In [8]:
# # Check min and max values before and after scaling
# print("Before scaling:")
# for layer in model.children():
#     if isinstance(layer, ScalableLinear):
#         print(f"Layer weights min: {layer.weight.min().item()}, max: {layer.weight.max().item()}")

# # # Apply scaling
# # model.scale_weights(target_min=-128, target_max=127)

# print("\nAfter scaling:")
# for layer in model.children():
#     if isinstance(layer, ScalableLinear):
#         print(f"Layer weights min: {layer.weight.min().item()}, max: {layer.weight.max().item()}")


In [25]:
# Test the model after scaling the weights
def test_model(model, X_test, y_test):
    model.eval()  # Set the model to evaluation mode
    criterion = nn.CrossEntropyLoss()
    
    # Evaluate on the test set
    with torch.no_grad():
        outputs = model(X_test)
        loss = criterion(outputs, y_test)
        
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == y_test).sum().item()
        accuracy = correct / y_test.size(0)
        
    print(f"Test Loss: {loss.item():.4f}, Test Accuracy: {accuracy:.4f}")
    
# Assuming you have the test set X_test and y_test available
# Run the evaluation after scaling the weights
# model.scale_weights(target_min=0, target_max=255)

# Test the model after scaling
test_model(model, X_train, y_train)


Test Loss: 62792664.0000, Test Accuracy: 0.9035


In [24]:
for i, layer in enumerate(model.children()):
        if isinstance(layer, ScalableLinear):  # Ensure that the layer is of type ScalableLinear
            print(f"Layer {i+1} Weights:\n", layer.weight.data)

Layer 1 Weights:
 tensor([[  1.,  38.,  61.,  ..., -40.,   7.,  32.],
        [ 40.,   6.,   3.,  ...,   4.,   7.,   4.],
        [ 40.,   1.,  37.,  ...,  -1.,  -0.,   4.],
        ...,
        [ 38.,  79.,  -1.,  ..., -30.,   7.,  -0.],
        [  7.,  38.,  39.,  ...,  38., -34.,  32.],
        [ -2.,  38.,  40.,  ...,   3.,   6.,  -2.]])
Layer 2 Weights:
 tensor([[-41.,   1., -40.,  ..., -39.,  48.,  30.],
        [ 81.,  38., -37.,  ...,   5.,  -9.,  84.],
        [ -5.,  -1.,  -3.,  ...,  77., -11.,  36.],
        ...,
        [ 80., -75.,  -5.,  ..., 121.,  79.,  40.],
        [-50., -33., -49.,  ...,  27.,  72., -40.],
        [ 44.,  31.,  13.,  ...,  37.,   4., -46.]])
Layer 3 Weights:
 tensor([[ 37., -39.,   1.,  ...,  30.,  40.,  34.],
        [-40.,  36.,  78.,  ...,   7.,  48.,   6.],
        [  6.,  66., -29.,  ...,   3.,  40.,   3.],
        ...,
        [ 37.,  -1.,  -0.,  ...,  34., -40.,  -3.],
        [ 50.,   2.,   8.,  ...,  24.,  29.,   5.],
        [ 85.,   3., 

In [27]:
import csv

In [44]:
def save_weights_as_hex(model):
    weight_matrices = [model.layer1.weight.data, model.layer2.weight.data, model.layer3.weight.data, model.layer4.weight.data]

    for idx, weight_matrix in enumerate(weight_matrices, start=1):
        # Flatten weight matrix
        flattened_weights = weight_matrix.flatten().cpu().numpy()
        
        # Open corresponding file for saving weights
        with open(f'matrix{idx}.mif', 'w') as file:
            for weight in flattened_weights:
                # Convert directly to integer
                int_weight = int(weight.item())
                # Format as 8-digit unsigned hexadecimal
                hex_weight = f"{int_weight & 0xFFFFFFFF:08X}"
                # Write only the value
                file.write(f"{hex_weight}\n")

def save_random_image(X_train, y_train):
    # Randomly select an image index
    idx = random.randint(0, X_train.size(0) - 1)
    print(y_train[idx])
    
    # Get the corresponding image data
    image_data = X_train[idx].numpy()  # Convert to numpy array
    
    # Open file to save the image data
    with open(f'random_image.txt', 'w') as file:
        for pixel in image_data:
            # Map binary pixel directly to integer values (0 or 1)
            int_pixel = int(pixel)
            # Format as 8-digit unsigned hexadecimal
            hex_pixel = f"{int_pixel & 0xFFFFFFFF:08X}"
            # Write only the value
            file.write(f"{hex_pixel}\n")


In [42]:
import random
# Save the weights as signed 8 hexadecimal digits in index: value pairs
save_weights_as_hex(model)

In [45]:
save_random_image(X_train, y_train)

tensor(4)
