In [6]:
import argparse
import os
import random
import sys
import time
from collections import OrderedDict
from datetime import datetime

sys.path.append(os.path.abspath("/ocean/projects/asc170022p/shg121/PhD/Project_Pruning"))
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm

from model_factory.model_meta import Model_Meta
from model_factory.models import Classifier
from run_manager import RunManager


import utils
import yaml
import pickle
import torch
from dataset.dataset_mnist import Dataset_mnist
from dataset.dataset_utils import get_dataset, get_transforms

In [18]:
device = utils.get_device()
print(f"Device: {device}")
data_root = "/ocean/projects/asc170022p/shg121/PhD/Project_Pruning/data/MNIST_EVEN_ODD"
json_root = "/ocean/projects/asc170022p/shg121/PhD/Project_Pruning/scripts_data"
model_arch = "Resnet_18"
dataset_name = "mnist"
pretrained = True
transfer_learning = False
chk_pt_path = "seq_epoch_20.pth.tar"
num_classes = 1
logs = "/ocean/projects/asc170022p/shg121/PhD/Project_Pruning/output"
bb_layer = "layer3"  # layer3
concept_names = ["Zero", "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine"]
img_size = 224
batch_size = 3
epochs = 50
num_workers = 4
class_list = [0, 1]
num_labels = len(class_list)
cav_vector_file = "max_pooled_train_cavs.pkl"

kernel_size={
    "layer3": 14,
    "layer4": 7
}

Device: cuda


In [8]:
def get_concept_vectors(logs, bb_layer, cav_vector_file, model_arch, dataset_name):
    start = time.time()
    cav_path = os.path.join(logs, "activations", "BB", model_arch, dataset_name)
    cav_file = open(
        os.path.join(cav_path, cav_vector_file),
        "rb")
    cavs = pickle.load(cav_file)[bb_layer]
    for i in range(cavs.shape[0]):
        cavs[i] /= np.linalg.norm(cavs[i])
    done = time.time()
    elapsed = done - start
    print("Time to load the concepts from disk: " + str(elapsed) + " secs")
    return cavs


def get_dataloader(data_root, json_root, dataset_name, img_size, batch_size):
    train_set = get_dataset(
        data_root=data_root,
        json_root=json_root,
        dataset_name=dataset_name,
        mode="train"
    )

    val_set = get_dataset(
        data_root=data_root,
        json_root=json_root,
        dataset_name=dataset_name,
        mode="val"
    )

    transform = get_transforms(size=img_size)
    train_dataset = Dataset_mnist(train_set, transform)
    train_loader = DataLoader(
        train_dataset,
        num_workers=4,
        batch_size=batch_size,
        shuffle=True
    )

    val_dataset = Dataset_mnist(val_set, transform)
    val_loader = DataLoader(
        val_dataset,
        num_workers=4,
        batch_size=batch_size,
        shuffle=False
    )

    return train_loader, val_loader

In [9]:
start = time.time()
train_loader, val_loader = get_dataloader(
    data_root,
    json_root,
    dataset_name,
    img_size,
    batch_size
)
done = time.time()
elapsed = done - start
print("Time to load the dataset from disk: " + str(elapsed) + " secs")

concept_vectors = get_concept_vectors(logs, bb_layer, cav_vector_file, model_arch, dataset_name)
final_parameters = OrderedDict(
    epoch=[epochs],
    layer=[bb_layer],
    dataset=[dataset_name],
    now=[datetime.today().strftime('%Y-%m-%d-%HH-%MM-%SS')]
)

run_id = utils.get_runs(final_parameters)[0]
device = utils.get_device()
print(f"Device: {device}")

bb_model = Classifier(
    model_arch,
    num_classes,
    pretrained,
    transfer_learning
).to(device)
bb_checkpoint_path = os.path.join(
    logs,
    "chk_pt",
    "BB",
    model_arch,
    dataset_name,
    chk_pt_path
)
g_model_checkpoint_path = os.path.join(
    logs,
    "chk_pt",
    bb_layer,
    "G",
    model_arch,
    dataset_name,
    chk_pt_path
)
tb_path = os.path.join(
    logs,
    "predictions",
    "G",
    model_arch,
    dataset_name
)
model_chk_pt = torch.load(bb_checkpoint_path)
bb_model.load_state_dict(model_chk_pt)
bb_model.eval()

bb_model_meta = None
if type(bb_layer) == str:
    bb_model_meta = Model_Meta(bb_model, [bb_layer])

utils.create_dir(
    path_dict={
        "path_name": g_model_checkpoint_path,
        "path_type": "checkpoint"
    })
utils.create_dir(
    path_dict={
        "path_name": tb_path,
        "path_type": "tensorboard"
    })

run_manager = RunManager(g_model_checkpoint_path, tb_path, train_loader, val_loader)
run_manager.begin_run(run_id)

Length of the [train] dataset: 48000
Length of the [val] dataset: 12000
Time to load the dataset from disk: 25.314823150634766 secs
Time to load the concepts from disk: 0.0015592575073242188 secs
Device: cuda
checkpoint directory is created successfully at:
/ocean/projects/asc170022p/shg121/PhD/Project_Pruning/output/chk_pt/layer3/G/Resnet_18/mnist/seq_epoch_20.pth.tar
tensorboard directory is created successfully at:
/ocean/projects/asc170022p/shg121/PhD/Project_Pruning/output/predictions/G/Resnet_18/mnist


In [10]:
print(concept_vectors)
concept_vectors.shape

[[ 0.01122835  0.02308633 -0.00960688 ... -0.01438665 -0.11421703
  -0.03689688]
 [ 0.01557193 -0.0045657   0.00506165 ...  0.04537175  0.04174709
  -0.04709763]
 [-0.00168662  0.00429404 -0.00033164 ... -0.00236819 -0.06412712
   0.01187114]
 ...
 [ 0.00633227  0.01924887 -0.08749532 ... -0.01259112 -0.20416246
   0.02583854]
 [ 0.00554564  0.02790749 -0.14038941 ...  0.00997716 -0.0129579
  -0.00388584]
 [-0.00988955 -0.01329956  0.05259284 ... -0.00647321  0.00483356
   0.01804576]]


(10, 256)

In [213]:
class G(torch.nn.Module):
    def __init__(
            self,
            g_model_ip_size,
            g_model_op_size
    ):
        super(G, self).__init__()
        self.model = torch.nn.Sequential( 
          torch.nn.Linear(in_features=g_model_ip_size, out_features=500, bias=True),
          torch.nn.Linear(in_features=500, out_features=g_model_op_size, bias=True)
        )

    def forward(self, x):
        x = self.model(x)
        return x


In [201]:
images, labels = next(iter(val_loader))
_ = bb_model(images.to(device))
activations = bb_model_meta.model_activations_store[bb_layer]
print(activations.size())
g_model_ip_size = activations.size(-1) * activations.size(-2) * concept_vectors.shape[0] 
g_model_op_size = activations.size(-1) * activations.size(-2) * concept_vectors.shape[1] 
print(g_model_ip_size)
print(g_model_op_size)

g = G(g_model_ip_size, g_model_op_size).to(device)

optimizer = torch.optim.Adam(g.parameters(), lr=1e-3)

torch.Size([3, 256, 14, 14])
1960
50176


In [210]:
criterion = torch.nn.BCELoss()
th = 0
val_after_th = 0
th_fn = torch.nn.Threshold(threshold=th, value=val_after_th)
resnet = list(list(bb_model.children())[0].children())
mid = torch.nn.Sequential(*resnet[7:9])
tail = torch.nn.Sequential(*resnet[9])

for epoch in range(epochs):
    run_manager.begin_epoch()

    with tqdm(total=len(val_loader)) as t:
        for batch_id, (images, labels) in enumerate(val_loader):
            images = images.to(device)
            labels = labels.to(torch.float32)
            labels = labels.to(device)
            labels = labels.reshape((labels.shape[0], 1))
            y_hat = bb_model(images)
            activations = bb_model_meta.model_activations_store[bb_layer]
            bs, ch = activations.size(0), activations.size(1)
            vc = torch.matmul(
                activations.reshape((bs, ch, -1)).permute((0, 2, 1)),
                torch_concept_vector.T
            ).reshape((bs, -1))
            th_vc = th_fn(vc)
            norm_vc = torch.nn.functional.normalize(th_vc, p=2, dim=1)

            print(norm_vc)
            print(norm_vc.size())
            print(np.linalg.norm(norm_vc[0].cpu().detach().numpy()))

            print("Shantanu")
            concept_to_act = g(norm_vc)
            print(concept_to_act.size())
            concept_to_act = concept_to_act.reshape(
                bs,
                concept_vectors.shape[1],
                activations.size(-1), activations.size(-2)
            )
            print(concept_to_act.size())
            print("Shantanu")
            prob_mid = mid(concept_to_act)
            bs, ch, h, w = prob_mid.size()
            print(prob_mid.size())
            prob_mid = prob_mid.reshape(bs, ch * h * w)
            y_pred = tail(prob_mid)
            print(prob_tail.size())
            print(y_pred)
            print(y_hat)
            optimizer.zero_grad()
            print(f"y_hat: {y_hat.size()}")
            print(f"y_pred: {y_pred.size()}")
            print(f"labels: {labels.size()}")
            train_loss = criterion(y_pred, labels)
            print(f"train_loss: {train_loss.item()}")
            train_loss.backward()
            optimizer.step()

            break

    break



  0%|          | 0/4000 [00:00<?, ?it/s]

tensor([[0.0000, 0.0167, 0.0000,  ..., 0.0078, 0.0313, 0.0205],
        [0.0000, 0.0153, 0.0000,  ..., 0.0071, 0.0287, 0.0187],
        [0.0000, 0.0158, 0.0000,  ..., 0.0063, 0.0301, 0.0173]],
       device='cuda:0', grad_fn=<DivBackward0>)
torch.Size([3, 1960])
1.0
Shantanu
torch.Size([3, 50176])
torch.Size([3, 256, 14, 14])
Shantanu
torch.Size([3, 512, 1, 1])
torch.Size([3, 1])
tensor([[0.9187],
        [0.9167],
        [0.9142]], device='cuda:0', grad_fn=<SigmoidBackward>)
tensor([[1.4465e-12],
        [5.2319e-10],
        [2.5952e-10]], device='cuda:0', grad_fn=<SigmoidBackward>)
y_hat: torch.Size([3, 1])
y_pred: torch.Size([3, 1])
labels: torch.Size([3, 1])
train_loss: 2.4832472801208496





In [63]:
print(activations.size())
torch_concept_vector = torch.from_numpy(concept_vectors).to(device, dtype=torch.float32)
print(torch_concept_vector.dtype)
print(torch_concept_vector.size())

bs, ch = activations.size(0), activations.size(1)

prod = torch.matmul(
    activations.reshape((bs, ch, -1)).permute((0, 2, 1)),
    torch_concept_vector.T
).reshape((bs, -1))

print(prod)
print(prod.size())

m = torch.nn.Threshold(threshold=0, value=0)
prod_f = m(prod_f)
print(prod_f)
print(prod_f.size())

norm_prod = torch.nn.functional.normalize(prod_f, p=2, dim=1)
print(norm_prod)
print(norm_prod.size())

print(np.linalg.norm(norm_prod[0].cpu().numpy()))
print(np.linalg.norm(norm_prod[1].cpu().numpy()))
print(np.linalg.norm(norm_prod[2].cpu().numpy()))

print(activations[1,:, 0, 0].size())
print(torch_concept_vector[0].size())
print(torch.matmul(activations[1,:, 0, 0].T, torch_concept_vector[0]))

torch.Size([3, 256, 14, 14])
torch.float32
torch.Size([10, 256])
tensor([[-0.6977,  0.3953, -0.4591,  ...,  0.1850,  0.7387,  0.4840],
        [-0.6977,  0.3953, -0.4591,  ...,  0.1835,  0.7400,  0.4816],
        [-0.6976,  0.3954, -0.4592,  ...,  0.1583,  0.7545,  0.4330]],
       device='cuda:0')
torch.Size([3, 1960])
tensor([[0.0000, 0.3953, 0.0000,  ..., 0.1850, 0.7387, 0.4840],
        [0.0000, 0.3953, 0.0000,  ..., 0.1835, 0.7400, 0.4816],
        [0.0000, 0.3954, 0.0000,  ..., 0.1583, 0.7545, 0.4330]],
       device='cuda:0')
torch.Size([3, 1960])
tensor([[0.0000, 0.0167, 0.0000,  ..., 0.0078, 0.0313, 0.0205],
        [0.0000, 0.0153, 0.0000,  ..., 0.0071, 0.0287, 0.0187],
        [0.0000, 0.0158, 0.0000,  ..., 0.0063, 0.0301, 0.0173]],
       device='cuda:0')
torch.Size([3, 1960])
1.0
0.99999994
1.0
torch.Size([256])
torch.Size([256])
tensor(-0.6977, device='cuda:0')


In [104]:
print(bb_model)

Classifier(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_ru

In [168]:
layer = torch.nn.Sequential(
  torch.nn.Linear(in_features=512, out_features=1, bias=True),
  torch.nn.Sigmoid()
).to(device)

In [178]:
# print(bb_model)
resnet = list(list(bb_model.children())[0].children())
print(len(resnet))

# for i in range(len(resnet)):
#     print(i, resnet[i])

x = torch.rand(3, 3, 224, 224).to(device)
print(bb_model(x).size())
head = torch.nn.Sequential(*resnet[0:7])
mid = torch.nn.Sequential(*resnet[7:9])
tail = torch.nn.Sequential(*resnet[9])
# print(head)
# print("-------")
# print(mid)
# print("-------")
# print(tail)
# prob = head(x)
# print(prob.size())
prob_mid = mid(activations)
bs, ch, h, w = prob_mid.size()
print(prob_mid.size())
prob_mid = prob_mid.reshape(bs, ch * h * w)
prob_tail = tail(prob_mid)
print(prob_tail.size())

10
torch.Size([3, 1])
torch.Size([3, 512, 1, 1])
torch.Size([3, 1])


In [195]:
x = torch.from_numpy(np.array([1, 2, 4, 5, 6, 7, 8, 3]))
print(x.size())
print(x.reshape(2, 2, 2))

torch.Size([8])
tensor([[[1, 2],
         [4, 5]],

        [[6, 7],
         [8, 3]]])
