In [67]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.transforms as transforms
from torch.autograd import Variable
import numpy as np
import os
import random
import sys
import torchvision.models as models
import torch.optim as optim
import logging
import time
import warnings
from tensorboardX import SummaryWriter
from torch.utils.data import Dataset,DataLoader
#import fine_grained_dataset_linear
#from fine_grained_dataset_linear import fine_grained_linear_dataset
#import image_aug
#from image_aug import Flip
#from image_aug import Rotate
#from image_aug import Translate
#import contrastive_loss
#from contrastive_loss import contrastive_loss
#import network
#from network import CNN
from torch.nn import functional as F
from torch import topk
import numpy as np
import skimage.transform
import copy
import skimage
from skimage import io,transform
import kornia
#from utils import getImagesInFolder, params_grad_norm, params_norm
from tqdm import tqdm
#from cam_regress_models import UNet
import argparse
import glob
import cam_regress_models as cm_mods
import disc_models as disc_mods
import custom_losses as cm_l

In [5]:
def set_all_seeds():
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    np.random.seed(42)
set_all_seeds()

In [7]:
torch.cuda.set_device(3)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [8]:
mask_model = cm_mods.UNet(in_channels=3,
                          out_channels=1,
                          depth=4,
                          padding=True,
                          batch_norm=True)
mask_model.to(DEVICE)


UNet(
  (down_path): ModuleList(
    (0): UNetConvBlock(
      (block): Sequential(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): ReLU()
        (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): UNetConvBlock(
      (block): Sequential(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU()
        (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (4): ReLU()
        (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): UNetConvBlock(
      (block): Sequential(
        (0): C

In [10]:
class_model = disc_mods.ResNet50(pretrained=True,
                                 lin_dim_in=1000,
                                 lin_dim_out=1000)
class_model.to(DEVICE)

ResNet50(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (downsample): Sequential(
          (0): Conv2d(64, 256, kernel_siz

In [34]:
train_transform = transforms.Compose([transforms.ToPILImage(),
                                      #transforms.RandomCrop([224,224]),
                                      transforms.RandomHorizontalFlip(p=0.5),
                                      transforms.RandomVerticalFlip(p=0.5),transforms.ToTensor(),
                                      transforms.Normalize(mean=(0.485, 0.456, 0.406),std= (0.229, 0.224, 0.225)) ])

In [35]:
test_transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Normalize(mean=(0.485, 0.456, 0.406),std= (0.229, 0.224, 0.225)) ])

In [52]:
class fine_grained_linear_dataset(Dataset):
    def __init__(self,txt_path,  train, img_transform1):
        #/workspace/data_fine_grain/cub200/raw/CUB_200_2011/(base) ironman@bigbox:~$ CUB_200_2011
        self.text_path = txt_path
        self.label_list = []
        self.image_list=[]
        self.train = train
        with(open(self.text_path)) as f:
            lines = f.readlines()
            #self.image_list1 = [os.path.join(self.dir_path, i.split(' ')[1]) for i in lines]
            for i in lines:
                i = i.rstrip()
                i = i.split('|')[-2]
                label =i.split('_')[-2]
                i = i.rstrip()
                #print(i)
                #print(label)
                self.image_list.append(i)
                self.label_list.append(label)
               
            self.img_transform = img_transform1
            #self.img_transform2 = img_transform2
        


    def to_uint8(self, img):
        return (img*255.).round().astype(np.uint8)

    def __getitem__(self,index):
        if(self.train):

            img1_path = self.image_list[index]
            img1_path = img1_path.rstrip()
            #img1 = cv2.imread(img1_path)
            img1 = io.imread(img1_path, plugin="pil")
            if(img1.shape.__len__() == 2):
                img1 = np.repeat(img1[:, :, np.newaxis], 3, axis=2)
            #img1 = img1.astype(np.float32)/255.0
            img_out = transform.resize(img1,(224,224))
            img_unet = transform.resize(img1, (128,128))
            #print("tthe org image")
            #print(img_out.shape)
            #print(np.unique(img_out))
            #print("check the unet")
            #plt.imshow(img_unet)
            #print(img_unet.shape)
            #print(np.unique(img_unet))
            img_out = self.to_uint8(img_out)
            img_unet = self.to_uint8(img_unet)
            img_out = self.img_transform(img_out)
            img_unet = self.img_transform(img_unet)
            label = self.label_list[index]
            label = int(label) - 1
            return {"img": img_out, "label": label, 'img_unet':img_unet}
    def __len__(self):
        return len(self.label_list)

In [53]:
train_set = fine_grained_linear_dataset('./actualimagefile.txt',True,train_transform)

In [56]:
data = train_set[1]
#print(len(train_set))
#print(data["img_unet"])
print(train_set.__len__())

1000


In [55]:
train_loader = DataLoader(train_set,batch_size = 8,shuffle = True, num_workers =1)

In [58]:
val_set = fine_grained_linear_dataset('./actualimagefile.txt',True,train_transform)

val_loader = DataLoader(val_set, batch_size=8, shuffle=False, num_workers=0)

In [59]:
MAX_EPOCHS = 1000

In [60]:
model_opt = optim.SGD(class_model.parameters(), lr=0.001,
                           weight_decay=0.005)

In [61]:
scheduler = optim.lr_scheduler.MultiStepLR(model_opt, milestones=[45,60],
                                               gamma=0.1)

In [64]:
mask_model_opt = optim.SGD(mask_model.parameters(), lr=0.001,
                           weight_decay=0.005)

In [65]:
mask_scheduler = optim.lr_scheduler.MultiStepLR(model_opt, milestones=[45,60],
                                               gamma=0.1)

In [68]:
CE_LOSS = nn.CrossEntropyLoss()
TV_LOSS = cm_l.TotalVaryLoss()

In [None]:
start_iter = 0
global_iter = 0
val_acc_max = 0.0
gauss_blur = kornia.filters.GaussianBlur2d((31,31), (5.5,5.5))
mask_reg_wt = 0.1
mask_reg_tv = 1e-6