In [1]:
import copy
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import math
import optuna
import detectors
import timm
from torchvision import transforms
import sys
from torchvision import datasets
# %matplotlib notebook
%matplotlib inline

# Append local modules to 'sys.path':
proj_path = "/home/ohada/DeepProject/ProjectPath"
if proj_path not in sys.path:
    sys.path.append(proj_path)

from train_evaluate.quant_func import *
from helper_functions.quant_lora import *
from train_evaluate.train_model import *

In [2]:
# show the names all available GPU devices:
[torch.cuda.device(i) for i in range(torch.cuda.device_count())]
torch.cuda.device_count()
torch.cuda.get_device_name(0)

'NVIDIA RTX A5000'

In [3]:
m_resnet = timm.create_model("resnet18.tv_in1k", pretrained=True)

In [4]:
from train_evaluate.train_model import *
_, test_loader = get_imagenet_data(batch_size=64, loader=True)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

m_resnet = m_resnet.to(device)
evaluate_test(m_resnet, test_loader, device)

Accuracy of the network on 10000 test images: 66.46%


66.46

In [None]:
vgg16_model = torchvision.models.vgg16(pretrained=True)

In [111]:
class LoRALayer(nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = nn.Parameter(torch.randn(in_dim, rank) * std_dev)
        self.B = nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

class LinearWithLoRA(nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

def ReplaceLinearToLoRA(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, nn.Linear):
            setattr(model, name, LinearWithLoRA(module, rank=rank, alpha=alpha))
        else:
            ReplaceLinearToLoRA(module, rank=rank, alpha=alpha)

def FreeazeModel(model):
    for param in model.parameters():
        param.requires_grad = False
    
def UnfreezeLoRA(model):
    for child in model.children():
        if isinstance(child, LoRALayer):
            for param in child.parameters():
                param.requires_grad = True
        else:
            # Recursively freeze linear layers in children modules
            UnfreezeLoRA(child)

In [112]:
class MultilayerPerceptron(nn.Module):
    def __init__(self, num_features, 
        num_hidden_1, num_hidden_2, num_classes):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(num_features, num_hidden_1),
            nn.ReLU(),
            nn.Linear(num_hidden_1, num_hidden_2),
            nn.ReLU(),

            nn.Linear(num_hidden_2, num_classes)
        )

    def forward(self, x):
        x = self.layers(x)
        return x


model = MultilayerPerceptron(
    num_features=100,
    num_hidden_1=1000,
    num_hidden_2=1000, 
    num_classes=10
)

print(model)

MultilayerPerceptron(
  (layers): Sequential(
    (0): Linear(in_features=100, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=1000, bias=True)
    (3): ReLU()
    (4): Linear(in_features=1000, out_features=10, bias=True)
  )
)


In [109]:
ReplaceLinearToLoRA(model, 4, 8)
print(model)

MultilayerPerceptron(
  (layers): Sequential(
    (0): LinearWithLoRA(
      (linear): Linear(in_features=100, out_features=1000, bias=True)
      (lora): LoRALayer()
    )
    (1): ReLU()
    (2): LinearWithLoRA(
      (linear): Linear(in_features=1000, out_features=1000, bias=True)
      (lora): LoRALayer()
    )
    (3): ReLU()
    (4): LinearWithLoRA(
      (linear): Linear(in_features=1000, out_features=10, bias=True)
      (lora): LoRALayer()
    )
  )
)


In [None]:
FreeazeModel(model)
UnfreezeLoRA(model)

for name, param in model.named_parameters():
    print(f"{name}: {param.requires_grad}")

In [None]:
print(vgg16_model)

In [None]:
ReplaceLinearToLoRA(vgg16_model, 4, 8)
print(vgg16_model)

In [None]:
FreeazeModel(vgg16_model)
UnfreezeLoRA(vgg16_model)
for name, param in vgg16_model.named_parameters():
    print(f"{name}: {param.requires_grad}")

In [28]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
optuna_trials()

ModuleNotFoundError: No module named 'pydev_jupyter_utils'

For 'mean' quant, 'rank'=10:

Study statistics:
 Number of finished trials:  10
 Number of pruned trials:  0
 Number of complete trials:  9
Best trial:
 Value:  29.649906891629023
 Params:
 lora_alpha: 32
 optimizer: Adam
 lr: 0.08913593530369923
 batch_size: 32
 num_epochs: 5

In [8]:
study.best_trial

9

In [12]:
[i for i in timm.list_models(pretrained=True) if 'vgg' in i and 'cifar' in i]

['vgg16_bn_cifar10', 'vgg16_bn_cifar100']

In [13]:
m_vgg = timm.create_model("vgg16_bn_cifar10", pretrained=True)

Downloading: "https://huggingface.co/edadaltocg/vgg16_bn_cifar10/resolve/main/pytorch_model.bin" to /home/ohada/.cache/torch/hub/checkpoints/vgg16_bn_cifar10.pth
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56.2M/56.2M [00:02<00:00, 29.1MB/s]


In [22]:
[i for i in timm.list_models(pretrained=True) if 'vgg' in i]

['repvgg_a0.rvgg_in1k',
 'repvgg_a1.rvgg_in1k',
 'repvgg_a2.rvgg_in1k',
 'repvgg_b0.rvgg_in1k',
 'repvgg_b1.rvgg_in1k',
 'repvgg_b1g4.rvgg_in1k',
 'repvgg_b2.rvgg_in1k',
 'repvgg_b2g4.rvgg_in1k',
 'repvgg_b3.rvgg_in1k',
 'repvgg_b3g4.rvgg_in1k',
 'repvgg_d2se.rvgg_in1k',
 'vgg11.tv_in1k',
 'vgg11_bn.tv_in1k',
 'vgg13.tv_in1k',
 'vgg13_bn.tv_in1k',
 'vgg16.tv_in1k',
 'vgg16_bn.tv_in1k',
 'vgg16_bn_cifar10',
 'vgg16_bn_cifar100',
 'vgg16_bn_svhn',
 'vgg19.tv_in1k',
 'vgg19_bn.tv_in1k']

In [16]:
m_dense = timm.create_model("densenet121_cifar10", pretrained=True)

Downloading: "https://huggingface.co/edadaltocg/densenet121_cifar10/resolve/main/pytorch_model.bin" to /home/ohada/.cache/torch/hub/checkpoints/densenet121_cifar10.pth
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.19M/4.19M [00:00<00:00, 6.34MB/s]


In [23]:
# count the number of 'Linear' layers in the model:
def count_linear_layers(model):
    count = 0
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            count += 1
    return count

count_linear_layers(timm.create_model("vgg16.tv_in1k", pretrained=True))

model.safetensors:   0%|          | 0.00/553M [00:00<?, ?B/s]

1

In [24]:
m_vgg_1k = timm.create_model("vgg16.tv_in1k", pretrained=True)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1