<a href="https://colab.research.google.com/github/kiplangatkorir/Hierarchical-Compression-of-LLM-Weights-using-Kolmogorov-Arnold-Networks/blob/main/simplified_KAN_compressor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This project implements a memory-efficient compression technique for Large Language Models (LLMs) using Principal Component Analysis (PCA). The goal is to significantly reduce the model size while maintaining as much performance as possible.



## Results
In our tests using the MNIST dataset:

Original Model Accuracy: 97.96%
Compressed Model Accuracy: 93.38%
Accuracy difference: 4.58 percentage points
Compression ratio: 0.18 (82% reduction in model size)

These results demonstrate a significant reduction in model size while maintaining strong performance.

In [None]:
pip install torch



In [None]:
import torch
import torch.nn as nn
import numpy as np
from sklearn.decomposition import PCA

In [None]:
class MemoryEfficientCompression:
    def __init__(self, model, compression_ratio=0.5):
        self.model = model
        self.compression_ratio = compression_ratio
        self.compressed_state = None
        self.pca_models = {}

    def compress(self):
        compressed_state = {}
        for name, param in self.model.named_parameters():
            if param.dim() > 1:
                shape = param.shape
                flattened = param.data.flatten().numpy()
                # Calculate n_components based on the minimum of shape dimensions
                n_components = max(1, int(min(shape) * self.compression_ratio))

                pca = PCA(n_components=n_components)
                compressed = pca.fit_transform(flattened.reshape(-1, shape[1]))

                compressed_state[name] = {
                    'compressed': torch.from_numpy(compressed).float(),
                    'shape': shape,
                    'mean': torch.from_numpy(pca.mean_).float(),
                    'components': torch.from_numpy(pca.components_).float()
                }
                self.pca_models[name] = pca
            else:
                compressed_state[name] = param.data

        self.compressed_state = compressed_state
        return compressed_state

    def decompress(self):
        if self.compressed_state is None:
            raise ValueError("Model hasn't been compressed yet.")

        decompressed_state = {}
        for name, compressed_data in self.compressed_state.items():
            if isinstance(compressed_data, dict):  # Compressed tensor
                pca = self.pca_models[name]
                decompressed = pca.inverse_transform(compressed_data['compressed'].numpy())
                decompressed = torch.from_numpy(decompressed).float().view(compressed_data['shape'])
                decompressed_state[name] = decompressed
            else:  # Uncompressed tensor
                decompressed_state[name] = compressed_data

        return decompressed_state

    def apply_compressed_weights(self):
        decompressed_state = self.decompress()
        with torch.no_grad():
            for name, param in self.model.named_parameters():
                param.copy_(decompressed_state[name])

def compress_model(model, compression_ratio=0.5):
    compressor = MemoryEfficientCompression(model, compression_ratio)
    compressed_state = compressor.compress()

    total_params = sum(p.numel() for p in model.parameters())
    compressed_params = sum(c['compressed'].numel() for c in compressed_state.values() if isinstance(c, dict))
    compressed_params += sum(c.numel() for c in compressed_state.values() if not isinstance(c, dict))

    print(f"Original parameters: {total_params}")
    print(f"Compressed parameters: {compressed_params}")
    print(f"Compression ratio: {compressed_params / total_params:.2f}")

    return compressor

In [None]:
# Example usage
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

compressor = compress_model(model, compression_ratio=0.5)

# To use the compressed model:
compressor.apply_compressed_weights()
print("Compression and decompression complete.")

Original parameters: 235146
Compressed parameters: 41404
Compression ratio: 0.18
Compression and decompression complete.


**Testing Phase**

In [None]:
# Import necessary libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split

In [None]:
# Load MNIST dataset
def load_mnist_data():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

    return train_loader, test_loader

In [None]:
def train_model(model, train_loader, epochs=10, lr=0.001):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            optimizer.zero_grad()
            output = model(data.view(data.size(0), -1))
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

            if batch_idx % 100 == 0:
                print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                      f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')

    return model

In [None]:
def evaluate_model(model, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data.view(data.size(0), -1))
            test_loss += nn.functional.cross_entropy(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)

    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f})')
    return accuracy


In [None]:
# Load MNIST data
train_loader, test_loader = load_mnist_data()

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 56013234.45it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 1868801.20it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 12859525.51it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 2629472.57it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [None]:
# Define the model
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

In [None]:
# Train the original model
print("Training original model...")
model = train_model(model, train_loader)


Training original model...


In [None]:
# Evaluate the original model
print("\nEvaluating original model...")
original_accuracy = evaluate_model(model, test_loader)


Evaluating original model...

Test set: Average loss: 0.0913, Accuracy: 9796/10000 (0.98)


In [None]:
# Compress the model
print("\nCompressing model...")
compressor = compress_model(model, compression_ratio=0.5)
compressor.apply_compressed_weights()



Compressing model...
Original parameters: 235146
Compressed parameters: 41404
Compression ratio: 0.18


In [None]:
# Evaluate the compressed model
print("\nEvaluating compressed model...")
compressed_accuracy = evaluate_model(model, test_loader)


Evaluating compressed model...

Test set: Average loss: 0.2274, Accuracy: 9338/10000 (0.93)


In [None]:
print(f"\nOriginal Model Accuracy: {original_accuracy:.4f}")
print(f"Compressed Model Accuracy: {compressed_accuracy:.4f}")
print(f"Accuracy difference: {original_accuracy - compressed_accuracy:.4f}")


Original Model Accuracy: 0.9796
Compressed Model Accuracy: 0.9338
Accuracy difference: 0.0458
