<a href="https://colab.research.google.com/github/shira-chesler/Thesis_tries/blob/main/shira_model_function_composition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tqdm



In [None]:
from google.colab import drive
drive.mount('/drive')

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


In [None]:
import torch
import torchvision
import torchvision.datasets as datasets
from torchvision import transforms

from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
from tqdm import tqdm

In [None]:
print(torch.cuda.is_available())
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

if torch.cuda.is_available():
  torch.cuda.set_device(0)

True
Using device: cuda


In [None]:
transform = transforms.Compose([
    transforms.ToTensor()
])

training_set = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
validation_set = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

training_loader = torch.utils.data.DataLoader(training_set, batch_size=20, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=20, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


-------------------------2222dddd222222igggjdd222222First model-------------------------

In [None]:
import math
class MyLinearLayer(torch.nn.Module):
    """ Custom Linear layer but mimics a standard linear layer """
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        weights = torch.Tensor(size_in, size_out)
        self.weights = torch.nn.Parameter(weights)  # nn.Parameter is a Tensor that's a module parameter.
        bias = torch.Tensor(1, size_out)
        self.bias = torch.nn.Parameter(bias)

        # initialize weights and biases
        torch.nn.init.kaiming_uniform_(self.weights, a=math.sqrt(5)) # weight init
        fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weights)
        bound = 1 / math.sqrt(fan_in)
        torch.nn.init.uniform_(self.bias, -bound, bound)  # bias init

        self.weights = torch.nn.Parameter(torch.cat((self.bias.t(), self.weights.t()), axis = 1))
        self.bias = torch.nn.Parameter(torch.zeros(1, size_out))

    def forward(self, x):
        #add 1 in vector x
        # print(x.shape, torch.ones(1, x.shape[1]).shape)
        x = torch.cat((torch.ones(x.shape[0], 1), x), axis = 1)
        # print("bias size:", self.bias.t().shape)
        # print("weight size", self.weights.t().shape)
        # weight_with_bias = torch.cat((self.bias.t().reshape(1, self.bias.t().shape[0]), self.weights.t()), axis = 0)
        # print(x.shape)
        # print(self.weights.t().shape)
        # print("x: ", x.shape, "w: ", self.weights.t().shape)
        w_times_x = torch.mm(x, self.weights.t())
        return w_times_x

In [None]:
class SimpleModel(torch.nn.Module):

    def __init__(self):
      super(SimpleModel, self).__init__()
      self.flatten = torch.nn.Flatten()
      self.linear1 = torch.nn.Linear(3072, 4)
      self.activation1 = torch.nn.ReLU()
      self.linear2 = MyLinearLayer(4, 8)
      self.activation2 = torch.nn.ReLU()
      self.linear3 = MyLinearLayer(8, 10)
      self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
      x = self.flatten(x)
      x = self.linear1(x)
      x = self.activation1(x)
      x = self.linear2(x)
      x = self.activation2(x)
      x = self.linear3(x)
      x = self.softmax(x)
      return x

first_model = SimpleModel()

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(first_model.parameters(), lr=0.001, momentum=0.9)

In [None]:
def train_one_epoch(epoch_index, tb_writer):
    running_loss = 0.

    for i, data in enumerate(training_loader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = first_model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if i % 1000 == 999:
            current_loss = running_loss / 1000  # loss per batch
            print('  batch {} loss: {}'.format(i + 1, current_loss))
            tb_x = epoch_index * len(training_loader) + i + 1
            tb_writer.add_scalar('Loss/train_batch', current_loss, tb_x)
            running_loss = 0.

    avg_loss = running_loss / len(training_loader)
    tb_writer.add_scalar('Loss/train_epoch', avg_loss, epoch_index + 1)
    print('Epoch {} average loss: {}'.format(epoch_index, avg_loss))

    return avg_loss


In [None]:
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
writer = SummaryWriter('runs/fashion_trainer_{}'.format(timestamp))
epoch_number = 0

EPOCHS = 5

best_vloss = 1_000_000.

for epoch in range(EPOCHS):
    print('EPOCH {}:'.format(epoch + 1))

    # Make sure gradient tracking is on, and do a pass over the data
    first_model.train(True)
    avg_loss = train_one_epoch(epoch + 1, writer)


    running_vloss = 0.0
    # Set the model to evaluation mode, disabling dropout and using population
    # statistics for batch normalization.
    first_model.eval()

    # Disable gradient computation and reduce memory consumption.
    with torch.no_grad():
          for i, vdata in tqdm(enumerate(validation_loader), total=len(validation_loader)):
            vinputs, vlabels = vdata
            voutputs = first_model(vinputs)
            vloss = loss_fn(voutputs, vlabels)
            running_vloss += vloss

            avg_vloss = running_vloss / (i + 1)
            # print('LOSS train {} valid {}'.format(avg_loss, avg_vloss))

            # Log the running loss averaged per batch
            # for both training and validation
            writer.add_scalars('Training vs. Validation Loss',
                            { 'Training' : avg_loss, 'Validation' : avg_vloss },
                            epoch_number + 1)
            writer.flush()

            # Track best performance, and save the model's state
            if avg_vloss < best_vloss:
                best_vloss = avg_vloss
                model_path = 'model_{}_{}'.format(timestamp, epoch_number)
                torch.save(first_model.state_dict(), model_path)

            epoch_number += 1

EPOCH 1:
  batch 1000 loss: 2.30267742562294
  batch 2000 loss: 2.302058845758438
Epoch 1 average loss: 0.4603355540275574


100%|██████████| 500/500 [00:02<00:00, 245.61it/s]


EPOCH 2:
  batch 1000 loss: 2.3011601157188415
  batch 2000 loss: 2.300096718788147
Epoch 2 average loss: 0.459809529209137


100%|██████████| 500/500 [00:01<00:00, 266.11it/s]


EPOCH 3:
  batch 1000 loss: 2.2939949424266817
  batch 2000 loss: 2.2768335988521575
Epoch 3 average loss: 0.45203308391571045


100%|██████████| 500/500 [00:02<00:00, 218.44it/s]


EPOCH 4:
  batch 1000 loss: 2.2450287343263624
  batch 2000 loss: 2.2339828469753265
Epoch 4 average loss: 0.4448083688735962


100%|██████████| 500/500 [00:02<00:00, 200.07it/s]


EPOCH 5:
  batch 1000 loss: 2.213556687116623
  batch 2000 loss: 2.2079639376401903
Epoch 5 average loss: 0.44033006067276004


100%|██████████| 500/500 [00:01<00:00, 261.17it/s]


In [None]:
# Assuming you have a pretrained model
pretrained_model = first_model

# Create empty dictionaries to store weights and biases
weights_dict = {}
biases_dict = {}

# Loop through named parameters and separate weights and biases
for name, param in pretrained_model.named_parameters():
    if 'weight' in name:
        weights_dict[name] = {'values': param.data}
    elif 'bias' in name:
        biases_dict[name] = {'values': param.data}

# Print or use the dictionaries as needed
print("Weights:")
for name, weight_info in weights_dict.items():
    print(f"{name}: {weight_info['values']}")

print("\nBiases:")
for name, bias_info in biases_dict.items():
    print(f"{name}: {bias_info['values']}")

Weights:
linear1.weight: tensor([[ 0.0051, -0.0109, -0.0134,  ..., -0.0040,  0.0027,  0.0017],
        [ 0.0269,  0.0241,  0.0337,  ..., -0.0123,  0.0015, -0.0125],
        [-0.0224, -0.0223, -0.0128,  ..., -0.0117, -0.0044, -0.0146],
        [-0.0102, -0.0151, -0.0137,  ...,  0.0135,  0.0161,  0.0035]])
linear2.weights: tensor([[ 0.0837, -0.0039, -0.2401, -0.0642, -0.1319],
        [ 0.0380,  0.4827, -0.0205,  0.0570, -0.1938],
        [ 0.4209, -0.4782,  0.7862, -0.1284,  0.2963],
        [ 0.0903,  0.3750, -0.0516,  0.3940,  0.4866],
        [ 0.2186,  0.5517, -0.0586, -0.2068, -0.0029],
        [ 0.2117,  0.5269,  0.0459, -0.3604, -0.3269],
        [ 0.1510,  0.1431,  0.3781,  0.4818, -0.5797],
        [-0.2690,  0.4615,  0.0696, -0.5976, -0.0938]])
linear3.weights: tensor([[-0.0618,  0.1792,  0.2288, -0.5460,  0.2095,  0.3731,  0.3982, -0.5379,
          0.3945],
        [-0.1426, -0.2694,  0.1873,  0.3319, -0.2772,  0.1929,  0.1849, -0.0798,
          0.1765],
        [ 0.1317, -

-------------------------Second model-------------------------

In [None]:
class SimpleModel2(torch.nn.Module):

    def __init__(self):
      super(SimpleModel2, self).__init__()
      self.flatten = torch.nn.Flatten()
      self.linear1 = MyLinearLayer(3072, 4)
      self.activation1 = torch.nn.ReLU()
      self.linear2 = MyLinearLayer(4, 10)
      self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
      x = self.flatten(x)
      x = self.linear1(x)
      x = self.activation1(x)
      x = self.linear2(x)
      x = self.softmax(x)
      return x

second_model = SimpleModel2()

------------Linear algebric Operations---------------

In [None]:
for name, param in first_model.named_parameters():
  if name == 'linear2.weights':
    print(param.shape)
    print(param)

torch.Size([8, 5])
Parameter containing:
tensor([[ 0.0837, -0.0039, -0.2401, -0.0642, -0.1319],
        [ 0.0380,  0.4827, -0.0205,  0.0570, -0.1938],
        [ 0.4209, -0.4782,  0.7862, -0.1284,  0.2963],
        [ 0.0903,  0.3750, -0.0516,  0.3940,  0.4866],
        [ 0.2186,  0.5517, -0.0586, -0.2068, -0.0029],
        [ 0.2117,  0.5269,  0.0459, -0.3604, -0.3269],
        [ 0.1510,  0.1431,  0.3781,  0.4818, -0.5797],
        [-0.2690,  0.4615,  0.0696, -0.5976, -0.0938]], requires_grad=True)


In [None]:
import numpy as np
print(weights_dict['linear2.weights']['values'].shape)
print(weights_dict['linear3.weights']['values'].shape)
relu_of_first = torch.relu(weights_dict['linear2.weights']['values'])
new_first = torch.cat(torch.ones(1),axis=1)
new_weights = torch.mm(weights_dict['linear3.weights']['values'], new_first)
print(new_weights.shape)

torch.Size([8, 5])
torch.Size([10, 9])


TypeError: cat() received an invalid combination of arguments - got (Tensor, axis=int), but expected one of:
 * (tuple of Tensors tensors, int dim, *, Tensor out)
 * (tuple of Tensors tensors, name dim, *, Tensor out)
