In [41]:
import random

import numpy as np
import pandas as pd
import torchvision.models as models
import torchvision
import torch.nn as nn
import torch
import tensorflow as tf
from torch.optim.lr_scheduler import MultiStepLR

from data_loader import *

Collecting tf-nightly
  Downloading tf_nightly-2.10.0.dev20220521-cp38-cp38-win_amd64.whl (359.4 MB)
Collecting gast<=0.4.0,>=0.2.1
  Using cached gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting astunparse>=1.6.0
  Using cached astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting google-pasta>=0.1.1
  Using cached google_pasta-0.2.0-py3-none-any.whl (57 kB)
Collecting keras-nightly~=2.10.0.dev
  Downloading keras_nightly-2.10.0-py2.py3-none-any.whl (1.7 MB)
Collecting flatbuffers<2,>=1.12
  Using cached flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Collecting protobuf<3.20,>=3.9.2
  Downloading protobuf-3.19.4-cp38-cp38-win_amd64.whl (895 kB)
Collecting tf-estimator-nightly~=2.10.0.dev
  Downloading tf_estimator_nightly-2.10.0.dev2022052308-py2.py3-none-any.whl (438 kB)
Collecting h5py>=2.9.0
  Using cached h5py-3.6.0-cp38-cp38-win_amd64.whl (2.8 MB)
Collecting tb-nightly~=2.10.0.a
  Downloading tb_nightly-2.10.0a20220522-py3-none-any.whl (5.8 MB)
Installing collected packages: pro

You should consider upgrading via the 'c:\users\macch\desktop\venv_transfer_learning\scripts\python.exe -m pip install --upgrade pip' command.




In [64]:
device = torch.device('cpu') # training on CPU

In [65]:
def hybrid_blocks(student, teacher):
    '''
    Function used to get BasicBlocks from ResNet class model
    '''
    student_layers = [student.layer1, student.layer2, student.layer3, student.layer4]
    teacher_layers = [teacher.layer1, teacher.layer2, teacher.layer3, teacher.layer4]

    student_blocks = []
    teacher_blocks = []
    
    for i in range(len(student_layers)):
        teacher_blocks += list(np.array_split(teacher_layers[i], len(student_layers[i]))) # divide teacher blocks into n list, where n is number of student blocks
        student_blocks += [el for el in student_layers[i]]

    return student_blocks, teacher_blocks

In [66]:
def forward(x, student, teacher, a_all):
    '''
    Forward function for hybrid ResNet 
    '''
    def _forward_blocks(x, student_blocks, teacher_blocks, a_all):
        '''
        Forward function containing only hybrid blocks predicitons
        '''
        len_teacher_blocks = len(teacher_blocks)
        len_student_blocks = len(student_blocks)
        assert len_teacher_blocks == len_student_blocks   # check if size of blocks is the same
        tmp_x = x
        for i in range(len_student_blocks): # hybrid block
            if a_all[i] == 1: # student path
                tmp_x = student_blocks[i].forward(tmp_x)

            if a_all[i] == 0: # teacher path
                for j in range(len(teacher_blocks[i])):
                    tmp_x = teacher_blocks[i][j].forward(tmp_x)

        return tmp_x, a_all

    student_blocks, teacher_blocks = hybrid_blocks(student, teacher)
    
    softmax = nn.Softmax(dim=1)
    
    tmp_x = x     # forward pipeline
    tmp_x = student.conv1(tmp_x)
    tmp_x = student.bn1(tmp_x)
    tmp_x = student.relu(tmp_x)
    tmp_x = student.maxpool(tmp_x)
    tmp_x, a_all = _forward_blocks(tmp_x, student_blocks, teacher_blocks, a_all)
    tmp_x = student.avgpool(tmp_x)
    tmp_x = torch.flatten(tmp_x, 1)
    tmp_x = student.fc(tmp_x)
    output = softmax(tmp_x)
    
    return output

In [67]:
def training(data, student, teacher, p, epochs = 200,intervals=200):
    # dodałem parametr intervals:
    # jeśli intervals = epochs     mamy Uniform schedule
    # jesli intervals = 1          mamy Linear growth schedule
    # jesli 1 < intervals < epochs mamy Review schedule, gdzie intervals oznacza liczbę "powtórek"
    loss_function = nn.CrossEntropyLoss()
    #optimizer = optim.Adam(student.parameters(), lr=0.001)
    #optimizer(SGD) i modyfikacja learning rate(MultiStepLR) z artykułu
    optimizer = optim.SGD(student.parameters(), lr=0.1, weight_decay=0.0001, momentum=0.9)
    scheduler = MultiStepLR(optimizer, milestones=[100,150], gamma=0.1)
    train_loss = []
    train_score = []
    x=np.linspace(p, 1, int(epochs/intervals))
    print(f"x = {x}")
    p_all=np.tile(x,intervals)
    print(f"p_all = {p_all}")
    for e in range(epochs):
        print(f"\nEpoch no. {e}")
        score = 0
        loss = 0
        student_blocks, teacher_blocks = hybrid_blocks(student, teacher)
        #a_all = [np.random.binomial(1, p) for i in range(len(student_blocks))]
        a_all = [np.random.binomial(1, p_all[e]) for i in range(len(student_blocks))]   # hybrid block building schema 
        print(f"p_all[e] = {p_all[e]}")
        print(f"a_all = {a_all}")
        #a_all =[1,0,1,1,1,1,1,1]
        for block, a in zip(student_blocks,a_all):
            if a==0:
                for param in block.parameters():
                    param.requires_grad=False
            else:
                for param in block.parameters():
                    param.requires_grad=True      
        for image, label in data:
            student_blocks, teacher_blocks = hybrid_blocks(student, teacher)
            image = image.to(device)
            label = label.to(device)
            optimizer.zero_grad()
            y_pred = forward(image, student, teacher, a_all)
            loss = loss_function(y_pred, label)         
            loss.backward() 
            optimizer.step()
            val, index_ = torch.max(y_pred, axis=1)
            score += torch.sum(index_ == label.data).item()
            loss += loss.item()
        scheduler.step()
            
        epoch_score = score / len(data)
        epoch_loss = loss / len(data)
        train_loss.append(epoch_loss)
        train_score.append(epoch_score)
        print("Training loss: {}, accuracy: {}".format(epoch_loss, epoch_score))
        return train_loss, train_score

In [48]:
resnet34 = models.resnet34(pretrained=True)
resnet18 = models.resnet18(pretrained=True)
resnet18.fc =  nn.Linear(512, 2)
resnet34.fc = nn.Linear(512, 2)

In [None]:
print("Training:\n") 
training(trainloader, resnet18, resnet34, 0.8,epochs=6,intervals=6)

Training:

x = [0.8]
p_all = [0.8 0.8 0.8 0.8 0.8 0.8]

Epoch no. 0
p_all[e] = 0.8
a_all = [0, 1, 0, 0, 1, 1, 1, 1]
