In [None]:
#| include: false

import warnings
warnings.filterwarnings('ignore')

from fastai.vision.all import *
from fasterbench.benchmark import evaluate_cpu_speed, get_model_size, get_num_parameters

In [None]:
import torch.nn as nn
import torch
class dfus_block(nn.Module):
    def __init__(self, dim):
        super(dfus_block, self).__init__()
        self.conv1 = nn.Conv2d(dim, 128, 1, 1, 0, bias=False)

        self.conv_up1 = nn.Conv2d(128, 32, 3, 1, 1, bias=False)
        self.conv_up2 = nn.Conv2d(32, 16, 1, 1, 0, bias=False)

        self.conv_down1 = nn.Conv2d(128, 32, 3, 1, 1, bias=False)
        self.conv_down2 = nn.Conv2d(32, 16, 1, 1, 0, bias=False)

        self.conv_fution = nn.Conv2d(96, 32, 1, 1, 0, bias=False)

        #### activation function
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        """
        x: [b,c,h,w]
        return out:[b,c,h,w]
        """
        feat = self.relu(self.conv1(x))
        feat_up1 = self.relu(self.conv_up1(feat))
        feat_up2 = self.relu(self.conv_up2(feat_up1))
        feat_down1 = self.relu(self.conv_down1(feat))
        feat_down2 = self.relu(self.conv_down2(feat_down1))
        feat_fution = torch.cat([feat_up1,feat_up2,feat_down1,feat_down2],dim=1)
        feat_fution = self.relu(self.conv_fution(feat_fution))
        out = torch.cat([x, feat_fution], dim=1)
        return out

class ddfn(nn.Module):
    def __init__(self, dim, num_blocks=78):
        super(ddfn, self).__init__()

        self.conv_up1 = nn.Conv2d(dim, 32, 3, 1, 1, bias=False)
        self.conv_up2 = nn.Conv2d(32, 32, 1, 1, 0, bias=False)

        self.conv_down1 = nn.Conv2d(dim, 32, 3, 1, 1, bias=False)
        self.conv_down2 = nn.Conv2d(32, 32, 1, 1, 0, bias=False)

        dfus_blocks = [dfus_block(dim=128+32*i) for i in range(num_blocks)]
        self.dfus_blocks = nn.Sequential(*dfus_blocks)

        #### activation function
        self.relu = nn.ReLU(inplace=True)

    def forward(self, x):
        """
        x: [b,c,h,w]
        return out:[b,c,h,w]
        """
        feat_up1 = self.relu(self.conv_up1(x))
        feat_up2 = self.relu(self.conv_up2(feat_up1))
        feat_down1 = self.relu(self.conv_down1(x))
        feat_down2 = self.relu(self.conv_down2(feat_down1))
        feat_fution = torch.cat([feat_up1,feat_up2,feat_down1,feat_down2],dim=1)
        out = self.dfus_blocks(feat_fution)
        return out

class HSCNN_Plus(nn.Module):
    def __init__(self, in_channels=3, out_channels=31, num_blocks=30):
        super(HSCNN_Plus, self).__init__()

        self.ddfn = ddfn(dim=in_channels, num_blocks=num_blocks)
        self.conv_out = nn.Conv2d(128+32*num_blocks, out_channels, 1, 1, 0, bias=False)

    def forward(self, x):
        """
        x: [b,c,h,w]
        return out:[b,c,h,w]
        """
        fea = self.ddfn(x)
        out =  self.conv_out(fea)
        return out

In [None]:
# #| include: false

# def get_dls(size, bs):
#     path = URLs.IMAGENETTE_160
#     source = untar_data(path)
#     blocks=(ImageBlock, CategoryBlock)
#     tfms = [RandomResizedCrop(size, min_scale=0.35), FlipItem(0.5)]
#     batch_tfms = [Normalize.from_stats(*imagenet_stats)]

#     csv_file = 'noisy_imagenette.csv'
#     inp = pd.read_csv(source/csv_file)
#     dblock = DataBlock(blocks=blocks,
#                splitter=ColSplitter(),
#                get_x=ColReader('path', pref=source),
#                get_y=ColReader(f'noisy_labels_0'),
#                item_tfms=tfms,
#                batch_tfms=batch_tfms)

#     return dblock.dataloaders(inp, path=source, bs=bs)

In [None]:
# size, bs = 128, 32
# dls = get_dls(size, bs)

In [None]:
model_path = Path('/root/Ninjalabo/HSI/MST-plus-plus/MST-plus-plus/test_challenge_code/model_zoo/hscnn_plus.pth')

path = '/root/Ninjalabo/HSI/MST-plus-plus/MST-plus-plus/dataset/Train_RGB/'

In [None]:
model = HSCNN_Plus()
checkpoint = torch.load(model_path)
if 'state_dict' in checkpoint:
    model.load_state_dict(checkpoint['state_dict'])
else:
    model.load_state_dict(checkpoint)
model.eval()


HSCNN_Plus(
  (ddfn): ddfn(
    (conv_up1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv_up2): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (conv_down1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (conv_down2): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (dfus_blocks): Sequential(
      (0): dfus_block(
        (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (conv_up1): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (conv_up2): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (conv_down1): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (conv_down2): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (conv_fution): Conv2d(96, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (relu): ReLU(inplace=True)
     

In [None]:
files = get_image_files(path)

def label_func(f): return f[0].isupper()

dls = ImageDataLoaders.from_name_func(path, files, label_func, item_tfms=Resize(32),bs=5)



In [None]:
learn = Learner(dls, model, metrics=[accuracy])

In [None]:
num_parameters = get_num_parameters(learn.model)
disk_size = get_model_size(learn.model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters} parameters")

Model Size: 18.65 MB (disk), 4645504 parameters


In [None]:
model = learn.model.eval().to('cpu')
x,y = dls.one_batch()

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(learn.model, x[0][None])[0]:.2f}ms')

Inference Speed: 67.45ms


In [None]:
x, y = dls.one_batch()
print("Input Shape:", x.shape)
print("Target Shape:", y.shape)


Input Shape: torch.Size([5, 3, 32, 32])
Target Shape: torch.Size([5])


---

<br>

## **Knowledge Distillation**

<br>

<blockquote>
<pre><b><i> KnowledgeDistillation(teacher.model, loss) </i></b></pre>
<p style="font-size: 15px"><i>
You only need to give to the callback function your teacher learner. Behind the scenes, FasterAI will take care of making your model train using knowledge distillation.
</i></p>
</blockquote>

<br>

In [None]:
from fasterai.distill.all import *

In [None]:
teacher = vision_learner(dls, model, metrics=[accuracy])
teacher.fit_one_cycle(3, 1e-4)

TypeError: forward() got an unexpected keyword argument 'pretrained'

In [None]:
teacher = Learner(dls, model, loss_func=nn.CrossEntropyLoss(), metrics=[accuracy])
teacher.fit_one_cycle(3, 1e-4)


epoch,train_loss,valid_loss,accuracy,time


RuntimeError: only batches of spatial targets supported (3D tensors) but got targets of size: : [5]

In [None]:
student = Learner(dls, models.vgg16_bn(num_classes=10), metrics=[accuracy])
kd_cb = KnowledgeDistillationCallback(teacher.model, SoftTarget)
student.fit_one_cycle(10, 1e-4, cbs=kd_cb)

---

<br>

## **Sparsifying**

<br>

**But let's come back to our example!**

In [None]:
#| include: false
from fasterai.sparse.all import *

In [None]:
student = Learner(dls, model, metrics=[accuracy])
student.fit(10, 1e-5)

epoch,train_loss,valid_loss,accuracy,time


ValueError: Expected input batch_size (4960) to match target batch_size (5).

In [None]:
sp_cb = SparsifyCallback(sparsity=50, granularity='filter', context='global', criteria=large_final, schedule=cos)
student.fit(10, 1e-5, cbs=sp_cb)

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(student.model, x[0][None])[0]:.2f}ms')

Inference Speed: 68.80ms


In [None]:
num_parameters = get_num_parameters(student.model)
disk_size = get_model_size(student.model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters} parameters")

Model Size: 37.30 MB (disk), 4645504 parameters


---

<br>

## **Pruning**

<blockquote>
    <pre><b><i>PruneCallback(learn, sparsity, context, criteria, schedule)</i></b></pre>

<ul><i>
<li style="font-size:15px"><b>sparsity</b>: the percentage of sparsity that you want in your network </li>
<li style="font-size:15px"><b>context</b>: either <code>local</code> or <code>global</code>, will affect the selection of parameters to be choosen in each layer independently (<code>local</code>) or on the whole network (<code>global</code>).</li>
<li style="font-size:15px"><b>criteria</b>: the criteria used to select which parameters to remove (currently supported: <code>l1</code>, <code>taylor</code>)</li>
<li style="font-size:15px"><b>schedule</b>: which schedule you want to follow for the sparsification (currently supported: <a href="https://docs.fast.ai/callback.html#Annealing-functions">any scheduling function of fastai</a>, i.e <code>linear</code>, <code>cosine</code>, ... and <code>gradual</code>, common schedules such as One-Shot, Iterative or <a href="https://openreview.net/pdf?id=Sy1iIDkPM">Automated Gradual</a>)</li>
</i></ul>
</blockquote>

In [None]:
from fasterai.prune.all import *

In [None]:
pr_cb = PruneCallback(sparsity=50, context='global', criteria=large_final, schedule=cos, layer_type=[nn.Conv2d])
student.fit(5, 1e-5, cbs=pr_cb)

In [None]:
num_parameters = get_num_parameters(student.model)
disk_size = get_model_size(student.model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters} parameters")

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(student.model, x[0][None])[0]:.2f}ms')

---

<br>

## **Batch Normalization Folding**

<blockquote>
<pre><b><i>bn_folder = BN_Folder()
bn_folder.fold(learn.model))</i></b></pre>
<p style="font-size: 15px"><i>
Again, you only need to pass your model and FasterAI takes care of the rest. For models built using the nn.Sequential, you don't need to change anything. For others, if you want to see speedup and compression, you actually need to subclass your model to remove the batch norm from the parameters and from the <code>forward</code> method of your network.
</i></p>
</blockquote>

In [None]:
from fasterai.misc.bn_folding import *

In [None]:
bn_f = BN_Folder()
folded_model = bn_f.fold(student.model)

In [None]:
num_parameters = get_num_parameters(folded_model)
disk_size = get_model_size(folded_model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters} parameters")

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(folded_model, x[0][None])[0]:.2f}ms')

In [None]:
folded_learner = Learner(dls, folded_model, metrics=[accuracy])
folded_learner.validate()

---

<br>

## **FC Layers Factorization**

<blockquote>
<pre><b><i>FCD = FCDecomposer()
decomposed_model = FCD.decompose(model, percent_removed)</i></b></pre>
<p style="font-size: 15px"><i>
    The <code>percent_removed</code> corresponds to the percentage of singular values removed (<i>k</i> value above).
</i></p>
</blockquote>

In [None]:
get_model_size(decomposed_model)

In [None]:
from fasterai.misc.fc_decomposer import *

In [None]:
fc_decomposer = FC_Decomposer()
decomposed_model = fc_decomposer.decompose(folded_learner.model, percent_removed=0.5)

In [None]:
num_parameters = get_num_parameters(decomposed_model)
disk_size = get_model_size(decomposed_model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters} parameters")

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(decomposed_model, x[0][None])[0]:.2f}ms')

In [None]:
final_learner = Learner(dls, decomposed_model, metrics=[accuracy])
final_learner.fit_one_cycle(5, 1e-5)

---

## Quantization

In [None]:
from fasterai.quantize.quantize_callback import *

In [None]:
final_learner.fit_one_cycle(5, 1e-5, cbs=QuantizeCallback())

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(final_learner.model, x[0][None])[0]:.2f}ms')

In [None]:
#| include: false
def count_parameters_quantized(model):
    total_params = 0
    for module in model.modules():
        if isinstance(module, torch.nn.modules.conv.Conv2d) or \
           isinstance(module, torch.nn.Linear) or \
           isinstance(module, torch.ao.nn.quantized.modules.conv.Conv2d) or \
           isinstance(module, torch.ao.nn.quantized.modules.linear.Linear):
            
            total_params += module.weight().numel()
            
            if module.bias() is not None:
                total_params += module.bias().numel()
    return total_params

In [None]:
num_parameters = count_parameters_quantized(final_learner.model)
disk_size = get_model_size(final_learner.model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters:,} parameters")

---

## Extra Acceleration

In [None]:
from fasterai.misc.cpu_optimizer import accelerate_model_for_cpu

In [None]:
final_model = accelerate_model_for_cpu(final_learner.model, x[0][None])

In [None]:
print(f'Inference Speed: {evaluate_cpu_speed(final_model, x[0][None])[0]:.2f}ms')

In [None]:
num_parameters = get_num_parameters(final_model)
disk_size = get_model_size(final_model)
print(f"Model Size: {disk_size / 1e6:.2f} MB (disk), {num_parameters:,} parameters")

---