###**Models**

####**Model_VGG**

In [None]:
"""
For training CSRNet teacher
"""

import torch.nn as nn
import torch
from torchvision import models
# from utils import save_net,load_net
import time


class CSRNet(nn.Module):

    def __init__(self, pretrained=True):
        super(CSRNet, self).__init__()
        self.seen = 0
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat = [512, 512, 512, 256, 128, 64]
        self.frontend = make_layers(self.frontend_feat)
        # cal_para(self.frontend)
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        if pretrained:
            self._initialize_weights(mode='normal')
            mod = models.vgg16(pretrained=True)
            state_keys = list(self.frontend.state_dict().keys())
            pretrain_keys = list(mod.state_dict().keys())
            for i in range(len(self.frontend.state_dict().items())):
                # self.frontend.state_dict().items()[i][1].data[:] = mod.state_dict().items()[i][1].data[:]
                # print(mod.state_dict()[pretrain_keys[i]])
                self.frontend.state_dict()[state_keys[i]].data = mod.state_dict()[pretrain_keys[i]].data
        else:
            self._initialize_weights(mode='kaiming')

                
    def forward(self, x):
        # front relates to VGG
        x = self.frontend(x)
        # backend relates to dilated convolution
        x = self.backend(x)
        x = self.output_layer(x)
        return x

    def _initialize_weights(self, mode):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                if mode == 'normal':
                    nn.init.normal_(m.weight, std=0.01)
                elif mode == 'kaiming':
                    nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            
                
def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

####**Model_Teacher_VGG**

In [None]:
"""
Teacher model in SKT
"""
import torch.nn as nn
import torch
from torchvision import models
#from utils import save_net, load_net, cal_para


class CSRNet_teacher(nn.Module):
    
    def __init__(self, pretrained=False):
        super(CSRNet_teacher, self).__init__()
        self.seen = 0
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat = [512, 512, 512, 256, 128, 64]
        self.frontend = make_layers(self.frontend_feat)
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)
        self._initialize_weights()
        self.features = []
        if pretrained:
            print ('load vgg pretrained model')
            mod = models.vgg16(pretrained=True)
            for i in range(len(self.frontend.state_dict().items())):
                self.frontend.state_dict().items()[i][1].data[:] = mod.state_dict().items()[i][1].data[:]

    def forward(self, x):
        self.features = []
        # frontend: VGG
        x = self.frontend(x)
        # backend: dilated convolution
        x = self.backend(x)
        x = self.output_layer(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def regist_hook(self):
        self.features = []

        def get(model, input, output):
            # function will be automatically called each time, since the hook is injected
            self.features.append(output.detach())

        for name, module in self._modules['frontend']._modules.items():
            if name in ['1', '4', '9', '16']:
                self._modules['frontend']._modules[name].register_forward_hook(get)
        for name, module in self._modules['backend']._modules.items():
            if name in ['1', '7']:
                self._modules['backend']._modules[name].register_forward_hook(get)


def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [None]:
train_json = "/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/part_A_final/json/part_A_train.json"
val_json = "/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/part_A_final/json/part_A_val.json"
test_json = "/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/part_A_final/json/part_A_test.json"
task = "A"

teacher_model = CSRNet_teacher()
weight = torch.load('/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/checkpoint.pth.tar', \
                    map_location=torch.device('cpu'))['state_dict']
teacher_model.load_state_dict(weight)
pre = teacher_model.load_state_dict(weight)

mae_best_prec1 = 1e6
mse_best_prec1 = 1e6
batch_size = 6
momentum = 0.95
decay = 1 * 1e-4  # 5 * 1e-4
start_epoch = 0
epochs = 1000
workers = 0  #4
seed = time.time()
print_freq = 400   #30


original_lr = 1e-4  #1e-4
lr = 1e-7       #1e-4
steps = [-1,1,100,150]
scales = [1,1,1,1]
teacher_ckpt ="/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/checkpoint.pth.tar"
student_ckpt = None

lamb_fsp = 0.5
lamb_cos = 0.5
out = "/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/CSRNet_models_weights/partA_student.pth.t"

In [None]:
# parser = argparse.ArgumentParser(description='CSRNet-SKT distillation')
# parser.add_argument('train_json', metavar='TRAIN',
#                     help='path to train json')
# parser.add_argument('val_json', metavar='VAL',
#                     help='path to val json')
# parser.add_argument('test_json', metavar='TEST',
#                     help='path to test json')
# parser.add_argument('--lr', default=None, type=float,
#                     help='learning rate')
# # parser.add_argument('--teacher', '-t', default=None, type=str,
# #                     help='teacher net version')
# parser.add_argument('--teacher_ckpt', '-tc', default=None, type=str,
#                     help='teacher checkpoint')
# # parser.add_argument('--student', '-s', default=None, type=str,
# #                     help='student net version')
# parser.add_argument('--student_ckpt', '-sc', default=None, type=str,
#                     help='student checkpoint')
# parser.add_argument('--lamb_fsp', '-laf', type=float, default=None,
#                     help='weight of dense fsp loss')
# parser.add_argument('--lamb_cos', '-lac', type=float, default=None,
#                     help='weight of cos loss')
# parser.add_argument('--gpu', metavar='GPU', type=str, default='0',
#                     help='GPU id to use')
# parser.add_argument('--out', metavar='OUTPUT', type=str,
#                     help='path to output')

####**Model_Student_VGG**

In [None]:
"""
Student model (1/n-CSRNet) in SKT.
"""
import torch.nn as nn
import torch
from torchvision import models

channel_nums = [[32, 64, 128, 256],  # half
                [21, 43, 85, 171],  # third
                [16, 32, 64, 128],  # quarter
                [13, 26, 51, 102],  # fifth
                ]

class CSRNet_student(nn.Module):
    def __init__(self, ratio=4, transform=True):
        super(CSRNet_student, self).__init__()
        self.seen = 0
        self.transform = transform
        channel = channel_nums[ratio-2]
        self.conv0_0 = conv_layers(3, channel[0])
        if self.transform:
            self.transform0_0 = feature_transform(channel[0], 64)
        self.conv0_1 = conv_layers(channel[0], channel[0])

        self.pool0 = pool_layers()
        if transform:
            self.transform1_0 = feature_transform(channel[0], 64)
        self.conv1_0 = conv_layers(channel[0], channel[1])
        self.conv1_1 = conv_layers(channel[1], channel[1])

        self.pool1 = pool_layers()
        if transform:
            self.transform2_0 = feature_transform(channel[1], 128)
        self.conv2_0 = conv_layers(channel[1], channel[2])
        self.conv2_1 = conv_layers(channel[2], channel[2])
        self.conv2_2 = conv_layers(channel[2], channel[2])

        self.pool2 = pool_layers()
        if transform:
            self.transform3_0 = feature_transform(channel[2], 256)
        self.conv3_0 = conv_layers(channel[2], channel[3])
        self.conv3_1 = conv_layers(channel[3], channel[3])
        self.conv3_2 = conv_layers(channel[3], channel[3])

        self.conv4_0 = conv_layers(channel[3], channel[3], dilation=2)
        if transform:
            self.transform4_0 = feature_transform(channel[3], 512)
        self.conv4_1 = conv_layers(channel[3], channel[3], dilation=2)
        self.conv4_2 = conv_layers(channel[3], channel[3], dilation=2)
        self.conv4_3 = conv_layers(channel[3], channel[2], dilation=2)
        if transform:
            self.transform4_3 = feature_transform(channel[2], 256)
        self.conv4_4 = conv_layers(channel[2], channel[1], dilation=2)
        self.conv4_5 = conv_layers(channel[1], channel[0], dilation=2)

        self.conv5_0 = nn.Conv2d(channel[0], 1, kernel_size=1)

        self._initialize_weights()
        self.features = []

    def forward(self, x):
        self.features = []

        x = self.conv0_0(x)
        if self.transform:
            self.features.append(self.transform0_0(x))
        x = self.conv0_1(x)

        x = self.pool0(x)
        if self.transform:
            self.features.append(self.transform1_0(x))
        x = self.conv1_0(x)
        x = self.conv1_1(x)

        x = self.pool1(x)
        if self.transform:
            self.features.append(self.transform2_0(x))
        x = self.conv2_0(x)
        x = self.conv2_1(x)
        x = self.conv2_2(x)

        x = self.pool2(x)
        if self.transform:
            self.features.append(self.transform3_0(x))
        x = self.conv3_0(x)
        x = self.conv3_1(x)
        x = self.conv3_2(x)

        x = self.conv4_0(x)
        if self.transform:
            self.features.append(self.transform4_0(x))
        x = self.conv4_1(x)
        x = self.conv4_2(x)
        x = self.conv4_3(x)
        if self.transform:
            self.features.append(self.transform4_3(x))
        x = self.conv4_4(x)
        x = self.conv4_5(x)

        x = self.conv5_0(x)

        self.features.append(x)

        if self.training is True:
            return self.features
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                # nn.init.xavier_normal_(m.weight)
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu').cuda()
                # nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)


def conv_layers(inp, oup, dilation=False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    return nn.Sequential(
        nn.Conv2d(inp, oup, kernel_size=3, padding=d_rate, dilation=d_rate),
        nn.ReLU(inplace=True)
    )


def feature_transform(inp, oup):
    conv2d = nn.Conv2d(inp, oup, kernel_size=1)  # no padding
    relu = nn.ReLU(inplace=True)
    layers = []
    layers += [conv2d, relu]
    return nn.Sequential(*layers)


def pool_layers(ceil_mode=True):
    return nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=ceil_mode)

In [None]:
from torchvision import datasets, transforms
import random
train_list=[]
model = CSRNet()

dataset = listDataset(train_list,
                      shuffle=True,
                       transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Resize(768),
                        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                      std=[0.229, 0.224, 0.225]),
                       ]),
                      # target_transform=transforms.Compose([                                 
                      #   transforms.Resize(768),
                      #  ]),
                       train=True, 
                       seen=model.seen,
                       batch_size=batch_size,
                       num_workers=workers)

  f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


In [None]:
ds = listDataset(train_list,
                            transform=transforms.Compose([
                            transforms.Resize(768),
                            transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                   std=[0.229, 0.224, 0.225]),
                            ]),
                            #target_transform=transforms.Compose([                                 
                             # transforms.Resize(768),
                             # ]),
                             train=True,
                             seen=0
                             )
dl = torch.utils.data.DataLoader(
         ds,
         num_workers=workers,
         shuffle=False,
         batch_size=batch_size)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
CSRNet_student()

CSRNet_student(
  (conv0_0): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (transform0_0): Sequential(
    (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace=True)
  )
  (conv0_1): Sequential(
    (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (pool0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (transform1_0): Sequential(
    (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(inplace=True)
  )
  (conv1_0): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (conv1_1): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=True)
  (transform2_0): Sequential(
    (0): Conv2d

In [None]:
from torchsummary import summary
input=(3,768,786)
summary(CSRNet_student(),input)

AttributeError: ignored

####**Distillation**

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import time
from torchvision import models
from torchsummary import summary

In [None]:
def cosine_similarity(stu_map, tea_map):
    cs = nn.CosineSimilarity(dim=1)
    similiar = 1-cs(stu_map, tea_map)
    loss = similiar.sum()
    return loss


def cal_dense_fsp(features):
    fsp = []
    for groups in features:
        for i in range(len(groups)):
            for j in range(i+1, len(groups)):
                x = groups[i]
                y = groups[j]

                norm1 = nn.InstanceNorm2d(x.shape[1])
                norm2 = nn.InstanceNorm2d(y.shape[1])
                x = norm1(x)
                y = norm2(y)
                res = gram(x, y)
                fsp.append(res)
    return fsp


def gram(x, y):
    n = x.shape[0]
    c1 = x.shape[1]
    c2 = y.shape[1]
    h = x.shape[2]
    w = x.shape[3]
    x = x.view(n, c1, -1, 1)[0, :, :, 0]
    y = y.view(n, c2, -1, 1)[0, :, :, 0]
    y = y.transpose(0, 1)
    # print x.shape
    # print y.shape
    z = torch.mm(x, y) / (w*h)
    return z


def scale_process(features, scale=[3, 2, 1], ceil_mode=True):
    # process features for multi-scale dense fsp
    new_features = []
    for i in range(len(features)):
        if i >= len(scale):
            new_features.append(features[i])
            continue
        down_ratio = pow(2, scale[i])
        pool = nn.MaxPool2d(kernel_size=down_ratio, stride=down_ratio, ceil_mode=ceil_mode)
        new_features.append(pool(features[i]))
    return new_features

###**writer**

In [None]:
from torch.utils.tensorboard import SummaryWriter

In [None]:
writer = SummaryWriter('/content/drive/MyDrive/ShanghaiTech_Crowd_Counting_Dataset/CSRNet_models_weights/epoch=1000_3')

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"