In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import math
import optuna
import detectors
import timm
from torchvision import transforms
import sys
# %matplotlib notebook
%matplotlib inline

# Append local modules to 'sys.path':
proj_path = "/home/ohada/DeepProject/ProjectPath"
if proj_path not in sys.path:
    sys.path.append(proj_path)

In [2]:
# show the names all available GPU devices:
[torch.cuda.device(i) for i in range(torch.cuda.device_count())]
torch.cuda.device_count()
torch.cuda.get_device_name(0)

'NVIDIA RTX A5000'

In [6]:
# Get the CIFAR10 dataset from 'torch':
from torchvision import datasets
from torchvision.transforms import ToTensor

# Download the training data from open datasets.
training_data = datasets.CIFAR10(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

# Create data loaders:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [7]:
model = timm.create_model("resnet18_cifar10", pretrained=True)

In [6]:
# calculate the model's accuracy on the test data:
model.eval()
correct = 0
total = 0

# CIFAR10 mean and standard deviation:
mean = [       0.4914,      0.4822,      0.4465    ]
std = [      0.2023,      0.1994,      0.201 ]
#mean = [       0.5,      0.5,      0.5    ]
#std = [      0.5,      0.5,      0.5 ]

normalize = transforms.Normalize(mean, std)

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        images, labels = data
        # Normalize the images batch:
        images = normalize(images)
        outputs = model(images)
        predicted = outputs.argmax(dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if i % 10 == 0:
            print(f'For step {i} the accuracy is {100 * correct / total}%')

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total}%')

For step 0 the accuracy is 95.3125%
For step 10 the accuracy is 94.88636363636364%
For step 20 the accuracy is 95.38690476190476%
For step 30 the accuracy is 95.2116935483871%
For step 40 the accuracy is 94.74085365853658%
For step 50 the accuracy is 94.79166666666667%
For step 60 the accuracy is 94.82581967213115%
For step 70 the accuracy is 94.93838028169014%
For step 80 the accuracy is 94.86882716049382%
For step 90 the accuracy is 94.86607142857143%
For step 100 the accuracy is 94.87933168316832%
For step 110 the accuracy is 94.80574324324324%
For step 120 the accuracy is 94.86053719008264%
For step 130 the accuracy is 95.00238549618321%
For step 140 the accuracy is 95.06870567375887%
For step 150 the accuracy is 94.98137417218543%
Accuracy of the network on the 10000 test images: 94.98%


In [10]:
def quant_tensor(tensor, size=4*4):
    quant_model_flatten = tensor
    if tensor.numel() < size:
        size = tensor.numel()

    orig_shape = quant_model_flatten.shape
    quant_model_flatten = quant_model_flatten.flatten()
    # For each 'size' subsection, keep the maximum value, and set the rest to zero:
    for i in range(0, len(quant_model_flatten), size):
        # Get the maximum value in the subsection:
        max_val = quant_model_flatten[i:i+size].max()
        max_index = quant_model_flatten[i:i+size].argmax()
        # Set the subsection to zero:
        quant_model_flatten[i:i+size] = 0
        # Restore only 'max_val' to 'max_index' in the subsection:
        quant_model_flatten[i+max_index] = max_val

    quant_model_flatten = quant_model_flatten.view(orig_shape)

    return quant_model_flatten

def quantize_linear_layers(model, size=4*4):
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            # Extract weights and biases
            weight = module.weight
            bias = module.bias

            # Quantize weights
            quantized_weight = quant_tensor(weight, size)

            # Replace original weights with quantized weights
            module.weight = nn.Parameter(quantized_weight, requires_grad=False)

            # Optional: Quantize biases if they exist
            quantized_bias = quant_tensor(bias)
            module.bias = nn.Parameter(quantized_bias, requires_grad=False)
    return model

quant_model = quantize_linear_layers(model)

In [11]:
# calculate the model's accuracy on the test data:
quant_model.eval()
correct = 0
total = 0

# CIFAR10 mean and standard deviation:
mean = [       0.4914,      0.4822,      0.4465    ]
std = [      0.2023,      0.1994,      0.201 ]
#mean = [       0.5,      0.5,      0.5    ]
#std = [      0.5,      0.5,      0.5 ]

normalize = transforms.Normalize(mean, std)

with torch.no_grad():
    for i, data in enumerate(test_dataloader):
        images, labels = data
        # Normalize the images batch:
        images = normalize(images)
        outputs = quant_model(images)
        predicted = outputs.argmax(dim=1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        if i % 10 == 0:
            print(f'For step {i} the accuracy is {100 * correct / total}%')

print(f'Accuracy of the quantized network on the 10000 test images: {100 * correct / total}%')

For step 0 the accuracy is 34.375%
For step 10 the accuracy is 25.568181818181817%
For step 20 the accuracy is 26.785714285714285%
For step 30 the accuracy is 27.52016129032258%
For step 40 the accuracy is 28.201219512195124%
For step 50 the accuracy is 28.61519607843137%
For step 60 the accuracy is 28.50922131147541%
For step 70 the accuracy is 28.763204225352112%
For step 80 the accuracy is 28.53009259259259%
For step 90 the accuracy is 28.537087912087912%
For step 100 the accuracy is 28.836633663366335%
For step 110 the accuracy is 28.95551801801802%
For step 120 the accuracy is 28.873966942148762%
For step 130 the accuracy is 28.924141221374047%
For step 140 the accuracy is 28.97828014184397%
For step 150 the accuracy is 28.756208609271525%
Accuracy of the quantized network on the 10000 test images: 28.74%


LoRA implementation and tests over MLP module and VGG pretrained

In [None]:
vgg16_model = torchvision.models.vgg16(pretrained=True)

In [111]:
class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.randn(in_dim, rank) * std_dev)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

def ReplaceLinearToLoRA(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            setattr(model, name, LinearWithLoRA(module, rank=rank, alpha=alpha))
        else:
            ReplaceLinearToLoRA(module, rank=rank, alpha=alpha)

def FreeazeModel(model):
    for param in model.parameters():
        param.requires_grad = False
    
def UnfreezeLoRA(model):
    for child in model.children():
        if isinstance(child, LoRALayer):
            for param in child.parameters():
                param.requires_grad = True
        else:
            # Recursively freeze linear layers in children modules
            UnfreezeLoRA(child)

In [112]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, num_features, 
        num_hidden_1, num_hidden_2, num_classes):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(num_features, num_hidden_1),
            nn.ReLU(),
            nn.Linear(num_hidden_1, num_hidden_2),
            nn.ReLU(),

            nn.Linear(num_hidden_2, num_classes)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


model = MultilayerPerceptron(
    num_features=100,
    num_hidden_1=1000,
    num_hidden_2=1000, 
    num_classes=10
)

print(model)

MultilayerPerceptron(
  (layers): Sequential(
    (0): Linear(in_features=100, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=1000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1000, out_features=10, bias=True)
  )
)


In [109]:
ReplaceLinearToLoRA(model, 4, 8)
print(model)

MultilayerPerceptron(
  (layers): Sequential(
    (0): LinearWithLoRA(
      (linear): Linear(in_features=100, out_features=1000, bias=True)
      (lora): LoRALayer()
    )
    (1): ReLU()
    (2): LinearWithLoRA(
      (linear): Linear(in_features=1000, out_features=1000, bias=True)
      (lora): LoRALayer()
    )
    (3): ReLU()
    (4): LinearWithLoRA(
      (linear): Linear(in_features=1000, out_features=10, bias=True)
      (lora): LoRALayer()
    )
  )
)


In [None]:
FreeazeModel(model)
UnfreezeLoRA(model)

for name, param in model.named_parameters():
    print(f"{name}: {param.requires_grad}")

In [None]:
print(vgg16_model)

In [None]:
ReplaceLinearToLoRA(vgg16_model, 4, 8)
print(vgg16_model)

In [None]:
FreeazeModel(vgg16_model)
UnfreezeLoRA(vgg16_model)
for name, param in vgg16_model.named_parameters():
    print(f"{name}: {param.requires_grad}")

In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
from train_evaluate.train_model import *
study = optuna_trials()

Files already downloaded and verified
Files already downloaded and verified
Epoch [1/4], Step [5000/5000], Loss: nan, Accuracy: 10.05%
Val Loss: nan, Val Accuracy: 9.88%
Epoch [2/4], Step [5000/5000], Loss: nan, Accuracy: 10.03%
Val Loss: nan, Val Accuracy: 9.88%
Epoch [3/4], Step [5000/5000], Loss: nan, Accuracy: 10.03%
Val Loss: nan, Val Accuracy: 9.88%
Epoch [4/4], Step [5000/5000], Loss: nan, Accuracy: 10.03%


[W 2024-03-17 15:05:09,003] Trial 0 failed with parameters: {'lora_alpha': 40, 'optimizer': 'SGD', 'lr': 0.021811288119580597, 'batch_size': 8, 'num_epochs': 4} because of the following error: The value nan is not acceptable.
[W 2024-03-17 15:05:09,004] Trial 0 failed with value nan.


Val Loss: nan, Val Accuracy: 9.88%
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/3], Step [625/625], Loss: nan, Accuracy: 10.27%
Val Loss: nan, Val Accuracy: 9.67%
Epoch [2/3], Step [625/625], Loss: nan, Accuracy: 10.08%
Val Loss: nan, Val Accuracy: 9.67%
Epoch [3/3], Step [625/625], Loss: nan, Accuracy: 10.08%


[W 2024-03-17 15:05:33,624] Trial 1 failed with parameters: {'lora_alpha': 24, 'optimizer': 'SGD', 'lr': 0.19912349713389108, 'batch_size': 64, 'num_epochs': 3} because of the following error: The value nan is not acceptable.
[W 2024-03-17 15:05:33,625] Trial 1 failed with value nan.


Val Loss: nan, Val Accuracy: 9.67%
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/6], Step [625/625], Loss: nan, Accuracy: 10.62%
Val Loss: nan, Val Accuracy: 10.00%
Epoch [2/6], Step [625/625], Loss: nan, Accuracy: 10.00%
Val Loss: nan, Val Accuracy: 10.00%
Epoch [3/6], Step [625/625], Loss: nan, Accuracy: 10.00%
Val Loss: nan, Val Accuracy: 10.00%
Epoch [4/6], Step [625/625], Loss: nan, Accuracy: 10.00%
Val Loss: nan, Val Accuracy: 10.00%
Epoch [5/6], Step [625/625], Loss: nan, Accuracy: 10.00%
Val Loss: nan, Val Accuracy: 10.00%
Epoch [6/6], Step [625/625], Loss: nan, Accuracy: 10.00%


[W 2024-03-17 15:06:19,338] Trial 2 failed with parameters: {'lora_alpha': 48, 'optimizer': 'SGD', 'lr': 0.015138820653668743, 'batch_size': 64, 'num_epochs': 6} because of the following error: The value nan is not acceptable.
[W 2024-03-17 15:06:19,338] Trial 2 failed with value nan.


Val Loss: nan, Val Accuracy: 10.00%
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/5], Step [625/625], Loss: 0.1714, Accuracy: 96.17%
Val Loss: 0.1014, Val Accuracy: 98.30%
Epoch [2/5], Step [625/625], Loss: 0.1740, Accuracy: 96.13%
Val Loss: 0.6072, Val Accuracy: 83.27%
Epoch [3/5], Step [625/625], Loss: 0.1705, Accuracy: 96.27%
Val Loss: 0.0746, Val Accuracy: 98.60%
Epoch [4/5], Step [625/625], Loss: 0.1159, Accuracy: 97.70%
Val Loss: 0.1325, Val Accuracy: 97.53%
Epoch [5/5], Step [625/625], Loss: 0.2009, Accuracy: 95.79%
Val Loss: 1.3934, Val Accuracy: 69.71%
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/4], Step [5000/5000], Loss: 2.2882, Accuracy: 73.38%
Val Loss: 5.2116, Val Accuracy: 46.69%
Epoch [2/4], Step [5000/5000], Loss: 3.0742, Accuracy: 74.94%
Val Loss: 0.9900, Val Accuracy: 79.88%
Epoch [3/4], Step [5000/5000], Loss: 4.1397, Accuracy: 74.81%
Val Loss: 0.4905, Val Accuracy: 86.07%
Epoch [4/4], S

[W 2024-03-17 15:11:41,656] Trial 8 failed with parameters: {'lora_alpha': 48, 'optimizer': 'SGD', 'lr': 0.0634383447468684, 'batch_size': 32, 'num_epochs': 7} because of the following error: The value nan is not acceptable.
[W 2024-03-17 15:11:41,657] Trial 8 failed with value nan.


Val Loss: nan, Val Accuracy: 10.71%
Files already downloaded and verified
Files already downloaded and verified
Epoch [1/5], Step [625/625], Loss: 13.6492, Accuracy: 53.09%
Val Loss: 2.0995, Val Accuracy: 76.05%
Epoch [2/5], Step [625/625], Loss: 5.9190, Accuracy: 60.47%
Val Loss: 0.3330, Val Accuracy: 92.08%
Epoch [3/5], Step [625/625], Loss: 0.1708, Accuracy: 96.05%
Val Loss: 0.0850, Val Accuracy: 98.36%
Epoch [4/5], Step [625/625], Loss: 0.1408, Accuracy: 96.97%
Val Loss: 0.0874, Val Accuracy: 98.60%
Epoch [5/5], Step [625/625], Loss: 58.5076, Accuracy: 82.99%
Val Loss: 41.9884, Val Accuracy: 65.85%
Study statistics: 
 Number of finished trials:  10
 Number of pruned trials:  0
 Number of complete trials:  6
Best trial:
 Value:  41.98835474974031
 Params: 
 lora_alpha: 48
 optimizer: Adam
 lr: 0.16628497525202182
 batch_size: 64
 num_epochs: 5


For 'mean' quant, 'rank'=10:

Study statistics:
 Number of finished trials:  10
 Number of pruned trials:  0
 Number of complete trials:  9
Best trial:
 Value:  29.649906891629023
 Params:
 lora_alpha: 32
 optimizer: Adam
 lr: 0.08913593530369923
 batch_size: 32
 num_epochs: 5

In [8]:
study.best_trial

9

In [12]:
[i for i in timm.list_models(pretrained=True) if 'vgg' in i and 'cifar' in i]

['vgg16_bn_cifar10', 'vgg16_bn_cifar100']

In [13]:
m_vgg = timm.create_model("vgg16_bn_cifar10", pretrained=True)

Downloading: "https://huggingface.co/edadaltocg/vgg16_bn_cifar10/resolve/main/pytorch_model.bin" to /home/ohada/.cache/torch/hub/checkpoints/vgg16_bn_cifar10.pth
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56.2M/56.2M [00:02<00:00, 29.1MB/s]


In [22]:
[i for i in timm.list_models(pretrained=True) if 'vgg' in i]

['repvgg_a0.rvgg_in1k',
 'repvgg_a1.rvgg_in1k',
 'repvgg_a2.rvgg_in1k',
 'repvgg_b0.rvgg_in1k',
 'repvgg_b1.rvgg_in1k',
 'repvgg_b1g4.rvgg_in1k',
 'repvgg_b2.rvgg_in1k',
 'repvgg_b2g4.rvgg_in1k',
 'repvgg_b3.rvgg_in1k',
 'repvgg_b3g4.rvgg_in1k',
 'repvgg_d2se.rvgg_in1k',
 'vgg11.tv_in1k',
 'vgg11_bn.tv_in1k',
 'vgg13.tv_in1k',
 'vgg13_bn.tv_in1k',
 'vgg16.tv_in1k',
 'vgg16_bn.tv_in1k',
 'vgg16_bn_cifar10',
 'vgg16_bn_cifar100',
 'vgg16_bn_svhn',
 'vgg19.tv_in1k',
 'vgg19_bn.tv_in1k']

In [16]:
m_dense = timm.create_model("densenet121_cifar10", pretrained=True)

Downloading: "https://huggingface.co/edadaltocg/densenet121_cifar10/resolve/main/pytorch_model.bin" to /home/ohada/.cache/torch/hub/checkpoints/densenet121_cifar10.pth
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.19M/4.19M [00:00<00:00, 6.34MB/s]


In [23]:
# count the number of 'Linear' layers in the model:
def count_linear_layers(model):
    count = 0
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            count += 1
    return count

count_linear_layers(timm.create_model("vgg16.tv_in1k", pretrained=True))

model.safetensors:   0%|          | 0.00/553M [00:00<?, ?B/s]

1

In [24]:
m_vgg_1k = timm.create_model("vgg16.tv_in1k", pretrained=True)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1