In [None]:
import copy
import math
import random
import time
from collections import OrderedDict, defaultdict
from typing import Union, List
from utils import *

import numpy as np
import torch
import torch.nn as nn
from torch.optim import *
from torch.utils.data import DataLoader
from torchprofile import profile_macs
from tqdm.auto import tqdm

assert torch.cuda.is_available(), \
"CUDA support is not available."

import pickle

import LiveTune as lt
import timm

In [None]:
device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

In [None]:
dataloader = get_dataloader("imagenet", 256)

# Base ViT

In [None]:
base_vit = timm.create_model("vit_base_patch16_224.orig_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=.1, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/base/new_finetuned_5epoch_frac01_lc.pth", map_location=device))

In [None]:
evaluate_model(base_vit, dataloader=dataloader, device=device)

In [None]:
collapse_model(collapsible_vit, fraction=.1, device=device, threshold=0.05)

In [None]:
evaluate(collapsible_vit, dataloader=dataloader['val'], device=device)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

In [None]:
base_vit = timm.create_model("vit_base_patch16_224.orig_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=.1, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/base/new_finetuned_5epoch_frac01_lc.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=1/6, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/base/new_finetuned_5epoch_frac01_lc_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# ViT Large

In [None]:
base_vit = timm.create_model("vit_large_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)

In [None]:
evaluate(base_vit, dataloader=dataloader['val'], device=device)

In [None]:
get_num_parameters(base_vit)

In [None]:
collapse_vit = get_collapsible_model(base_vit, fraction=.1, device=device)

In [None]:
collapse_vit.load_state_dict(torch.load("./models_archive/vit/large/finetuned_2epoch_frac01_nolc.pth", map_location=device))

In [None]:
evaluate(collapse_vit, dataloader=dataloader['val'], device=device)

In [None]:
collapse_vit.load_state_dict(torch.load("./models_archive/vit/large/finetuned_17epoch_frac01_lc.pth", map_location=device))

In [None]:
evaluate(collapse_vit, dataloader=dataloader['val'], device=device)

In [None]:
get_model_collapsible_slopes(collapse_vit)

In [None]:
collapse_model(collapse_vit, fraction=.1, threshold=0.05, device=device)

In [None]:
evaluate_model(base_vit, dataloader=dataloader, device=device)

In [None]:
evaluate_model(collapse_vit, dataloader=dataloader, device=device)

# ViT Small
## vit_small_patch16_224.augreg_in21k_ft_in1k

In [None]:
base_vit = timm.create_model("vit_small_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=.1, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/small/new_finetuned_5epoch_frac01_lc.pth", map_location=device))
collapse_model(collapsible_vit, fraction=.1, device=device, threshold=0.05)

In [None]:
evaluate_model(base_vit, dataloader=dataloader, device=device)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

In [None]:
base_vit = timm.create_model("vit_small_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=.1, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/small/new_finetuned_5epoch_frac01_lc.pth", map_location=device))
collapse_model(collapsible_vit, fraction=.1, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=1/6, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/small/new_finetuned_5epoch_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=1/6, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# Sensitivity Analysis + Tiny ViT results

In [None]:
# tiny
base_vit = timm.create_model("vit_tiny_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)

# no layer

In [None]:
evaluate_model(base_vit, dataloader=dataloader, device=device)

# 1 layer

In [None]:
collapsible_vit = get_collapsible_model(base_vit, fraction=1/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing1.pth", map_location=device)) 

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=1/12, threshold=0.05, device=device)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# 2 layers

In [None]:
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

In [None]:
torch.save(collapsible_vit.state_dict(), "./models_archive/vit/tiny/sensitivity_analysis_collapsed2.pth")

# 3 layers

In [None]:
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)

In [None]:
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=3/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

In [None]:
torch.save(collapsible_vit.state_dict(), "./models_archive/vit/tiny/sensitivity_analysis_collapsed3.pth")

# 4 layers

In [None]:
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))
collapse_model(collapsible_vit, fraction=3/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=4/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing4.pth", map_location=device))

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=4/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

In [None]:
torch.save(collapsible_vit.state_dict(), "./models_archive/vit/tiny/sensitivity_analysis_collapsed4.pth")

## 5 layers

In [None]:
base_vit = timm.create_model("vit_tiny_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=4/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing4.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=5/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing5.pth", map_location=device))

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=5/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# 6 layers

In [None]:
base_vit = timm.create_model("vit_tiny_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=4/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing4.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=5/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing5.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=6/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing6.pth", map_location=device))
collapse_model(collapsible_vit, fraction=6/12, device=device, threshold=0.05)

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# 7 layers

In [None]:
base_vit = timm.create_model("vit_tiny_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=4/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing4.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=5/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing5.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=6/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing6.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=7/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing7.pth", map_location=device))

In [None]:
collapse_model(collapsible_vit, fraction=7/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)

# 8 layers

In [None]:
base_vit = timm.create_model("vit_tiny_patch16_224.augreg_in21k_ft_in1k", pretrained=True).to(device)
collapsible_vit = get_collapsible_model(base_vit, fraction=2/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_10epoch_frac0.166_lc2.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing2.pth", map_location=device))
collapse_model(collapsible_vit, fraction=2/12, device=device, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=3/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing3.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=4/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing4.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=5/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing5.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=6/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing6.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=7/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing7.pth", map_location=device))
collapse_model(collapsible_vit, threshold=0.05)
collapsible_vit = get_collapsible_model(collapsible_vit, fraction=8/12, device=device)
collapsible_vit.load_state_dict(torch.load("./models_archive/vit/tiny/sensitivity_analysis_collapsing8.pth", map_location=device))

In [None]:
get_model_collapsible_slopes(collapsible_vit)

In [None]:
collapse_model(collapsible_vit, fraction=8/12, device=device, threshold=0.05)

In [None]:
evaluate_model(collapsible_vit, dataloader=dataloader, device=device)