In [None]:
import argparse
import time
import shutil
import os
import os.path as osp
import csv
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
import torchvision.models as models
from resnext_specialist import VA
from data_cnn60 import NTUDataLoaders, AverageMeter, make_dir, get_cases, get_num_classes
from sklearn.metrics import confusion_matrix
from collections import OrderedDict
import torch.nn.functional as F

# parser = argparse.ArgumentParser(description='View adaptive')
# parser.add_argument('--ss', type=int, help="split size")
# parser.add_argument('--st', type=str, help="split type")
# parser.add_argument('--dataset', type=str, help="dataset path")
# parser.add_argument('--wdir', type=str, help="directory to save weights path")
# parser.add_argument('--le', type=str, help="language embedding model")
# parser.add_argument('--ve', type=str, help="visual embedding model")
# parser.add_argument('--phase', type=str, help="train or val")
# parser.add_argument('--gpu', type=str, help="gpu device number")
# args = parser.parse_args()

gpu = '0'
ss = 5
st = 'r'
dataset_path = 'ntu_results/shift_val_5_r'
wdir = 'pos_aware_cada_vae_concatenated_latent_space_shift_5_r_val'
le = 'bert_large'
ve = 'shift'
phase = 'val'
num_classes = 60

os.environ["CUDA_VISIBLE_DEVICES"] = gpu
seed = 5
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
device = torch.device("cuda")
print(torch.cuda.device_count())

criterion2 = nn.MSELoss().to(device)

if ve == 'vacnn':
    vis_emb_input_size = 2048
elif ve == 'shift':
    vis_emb_input_size = 256
else: 
    pass    
    
text_hidden_size = 100
vis_hidden_size = 512
output_size = 50

if le == 'bert_large':
    noun_emb_input_size = 1024
    verb_emb_input_size = 1024
elif le == 'w2v':
    noun_emb_input_size = 300
    verb_emb_input_size = 300
else:
    pass

ntu_loaders = NTUDataLoaders(dataset_path, 'max', 1)
train_loader = ntu_loaders.get_train_loader(1024, 8)
zsl_loader = ntu_loaders.get_val_loader(1024, 8)
val_loader = ntu_loaders.get_test_loader(1024, 8)
zsl_out_loader = ntu_loaders.get_val_out_loader(1024, 8)
val_out_loader = ntu_loaders.get_test_out_loader(1024, 8)
train_size = ntu_loaders.get_train_size()
zsl_size = ntu_loaders.get_val_size()
val_size = ntu_loaders.get_test_size()
print('Train on %d samples, validate on %d samples' % (train_size, zsl_size))


nouns_vocab = np.load('nouns_vocab.npy')
verbs_vocab = np.load('verbs_vocab.npy')
nouns = nouns_vocab[np.argmax(np.load('nouns_ohe.npy'), -1)][:num_classes]
# nouns[nouns == 'object'] = '#'
# nouns[51] = 'someone'
# nouns[55] = 'object'
# nouns = np.load('nouns.npy')
verbs = verbs_vocab[np.argmax(np.load('verbs_ohe.npy'), -1)][:num_classes]
# nouns = np.load('nouns.npy')
# verbs = np.load('verbs.npy')
# prps = np.load('prepositions.npy')
labels = np.load('labels.npy')

if phase == 'val':
    gzsl_inds = np.load('./label_splits/'+ st + 's' + str(num_classes - ss) +'.npy')
    unseen_inds = np.sort(np.load('./label_splits/' + st + 'v' + str(ss) + '_0.npy'))
    seen_inds = np.load('./label_splits/'+ st + 's' + str(num_classes - ss -ss) + '_0.npy')
else:
    gzsl_inds = np.arange(60)
    unseen_inds = np.sort(np.load('./label_splits/' + st + 'u' + str(ss) + '.npy'))
    seen_inds = np.load('./label_splits/'+ st + 's' + str(num_classes - ss) + '.npy')

unseen_labels = labels[unseen_inds]
seen_labels = labels[seen_inds]

unseen_nouns = nouns[unseen_inds]
unseen_verbs = verbs[unseen_inds]
# unseen_prps = prps[unseen_inds]
seen_nouns = nouns[seen_inds]
seen_verbs = verbs[seen_inds]
# seen_prps = prps[seen_inds]
verb_corp = np.unique(verbs[gzsl_inds])
noun_corp = np.unique(nouns[gzsl_inds])
# prp_corp = np.unique(prps[gzsl_inds])

# import gensim
# model = gensim.models.KeyedVectors.load_word2vec_format('/ssd_scratch/cvit/pranay.gupta/GoogleNews-vectors-negative300.bin', binary=True)


# def get_w2v(model, words):
#     emb = np.zeros([300])
#     for word in words.split():
#         emb += model[word]
#     emb /= len(words.split())
    
#     return emb


verb_emb = torch.from_numpy(np.load(le + '_verb.npy')[:num_classes, :]).view([num_classes, verb_emb_input_size])
verb_emb = verb_emb/torch.norm(verb_emb, dim = 1).view([num_classes, 1]).repeat([1, verb_emb_input_size])
noun_emb = torch.from_numpy(np.load(le + '_noun.npy')[:num_classes, :]).view([num_classes, noun_emb_input_size])
noun_emb = noun_emb/torch.norm(noun_emb, dim = 1).view([num_classes, 1]).repeat([1, noun_emb_input_size])
# prp_w2v = torch.from_numpy(np.array([get_w2v(model, i) for i in prps])).view([60, 300])
# prp_w2v = noun_emb/torch.norm(prp_w2v, dim = 1).view([60, 1]).repeat([1, 300])

unseen_verb_emb = verb_emb[unseen_inds, :]
unseen_noun_emb = noun_emb[unseen_inds, :]
# unseen_prp_w2v = prp_w2v[unseen_inds, :]

seen_verb_emb = verb_emb[seen_inds, :]
seen_noun_emb = noun_emb[seen_inds, :]
# seen_prp_w2v = prp_w2v[seen_inds, :]
print("loaded language embeddings")


def get_text_data(target, verb_emb, noun_emb):
    return verb_emb[target].view(target.shape[0], verb_emb_input_size).float(), noun_emb[target].view(target.shape[0], verb_emb_input_size).float()


def save_checkpoint(state, filename='checkpoint.pth.tar', is_best=False):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
def accuracy(class_embedding, vis_trans_out, target, inds):
    inds = torch.from_numpy(inds).to(device)
    temp_vis = vis_trans_out.unsqueeze(1).expand(vis_trans_out.shape[0], class_embedding.shape[0], vis_trans_out.shape[1])
    temp_cemb = class_embedding.unsqueeze(0).expand(vis_trans_out.shape[0], class_embedding.shape[0], vis_trans_out.shape[1])
    preds = torch.argmax(torch.sum(temp_vis*temp_cemb, axis=2), axis = 1)
    acc = torch.sum(inds[preds] == target).item()/(preds.shape[0])
    return acc, torch.sum(temp_vis*temp_cemb, axis=2)


In [1]:
import numpy as np
unseen_zs = np.load('../../tf_vaegan_test/12_12_r_unseen_zs.npy')
seen_zs = np.load('../../tf_vaegan_test/12_12_r_seen_zs.npy')
unseen_train = np.load('../../synse_resources/ntu_results/shift_val_12_r/ztest_out.npy')
seen_train = np.load('../../synse_resources/ntu_results/shift_val_12_r/val_out.npy')

In [2]:
from sklearn.linear_model import LogisticRegression

## Temperature Scaling

In [3]:
def temp_scale(seen_features, T):
    return np.array([np.exp(i)/np.sum(np.exp(i)) for i in (seen_features + 1e-12)/T])

In [8]:
for f in [12]:
    print(f)
    for t in [5]:
        print(t)
        fin_val_acc = 0
        fin_train_acc = 0
        for run in range(1):
            prob_unseen_zs = unseen_zs
#             prob_unseen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in unseen_zs])
    #         prob_noun_unseen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in noun_unseen_zs])
    #         prob_verb_unseen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in verb_unseen_zs])
            prob_unseen_train = temp_scale(unseen_train, t)
#             prob_unseen_train = np.array([np.exp(i)/np.sum(np.exp(i)) for i in unseen_train])
        #     np.array([np.exp(i)/np.sum(np.exp(i)) for i in unseen_train])
            prob_seen_zs = seen_zs
#             prob_seen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in seen_zs])
    #         prob_noun_seen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in noun_seen_zs])
    #         prob_verb_seen_zs = np.array([np.exp(i)/np.sum(np.exp(i)) for i in verb_seen_zs])
            prob_seen_train = temp_scale(seen_train, t)
#             prob_seen_train = np.array([np.exp(i)/np.sum(np.exp(i)) for i in seen_train])
        #     np.array([np.exp(i)/np.sum(np.exp(i)) for i in seen_train])

            feat_unseen_zs = np.sort(prob_unseen_zs, 1)[:,::-1][:,:f]
    #         feat_noun_unseen_zs = np.sort(prob_noun_unseen_zs, 1)[:,::-1]
    #         feat_verb_unseen_zs = np.sort(prob_verb_unseen_zs, 1)[:,::-1]
            feat_unseen_train = np.sort(prob_unseen_train, 1)[:,::-1][:,:f]
            feat_seen_zs = np.sort(prob_seen_zs, 1)[:,::-1][:,:f]
    #         feat_noun_seen_zs = np.sort(prob_noun_seen_zs, 1)[:,::-1]
    #         feat_verb_seen_zs = np.sort(prob_verb_seen_zs, 1)[:,::-1]
            feat_seen_train = np.sort(prob_seen_train, 1)[:,::-1][:,:f]

            val_unseen_inds = np.random.choice(np.arange(feat_unseen_train.shape[0]), 300, replace=False)
            val_seen_inds = np.random.choice(np.arange(feat_seen_train.shape[0]), 400, replace=False)
            train_unseen_inds = np.array(list(set(list(np.arange(feat_unseen_train.shape[0]))) - set(list(val_unseen_inds))))
            train_seen_inds = np.array(list(set(list(np.arange(feat_seen_train.shape[0]))) - set(list(val_seen_inds))))

    #         mod_unseen_zs_feat = np.concatenate([feat_noun_unseen_zs, feat_verb_unseen_zs], 1)
    #         mod_seen_zs_feat = np.concatenate([feat_noun_seen_zs, feat_verb_seen_zs], 1)
            gating_train_x = np.concatenate([np.concatenate([feat_unseen_zs[train_unseen_inds, :], feat_unseen_train[train_unseen_inds, :]], 1), np.concatenate([feat_seen_zs[train_seen_inds, :], feat_seen_train[train_seen_inds, :]], 1)], 0)
            gating_train_y = [0]*len(train_unseen_inds) + [1]*len(train_seen_inds)
            gating_val_x = np.concatenate([np.concatenate([feat_unseen_zs[val_unseen_inds, :], feat_unseen_train[val_unseen_inds, :]], 1), np.concatenate([feat_seen_zs[val_seen_inds, :], feat_seen_train[val_seen_inds, :]], 1)], 0)
            gating_val_y = [0]*len(val_unseen_inds) + [1]*len(val_seen_inds)

            train_inds = np.arange(gating_train_x.shape[0])
            np.random.shuffle(train_inds)
        #     val_inds = np.arange(gating_val_x.shape[0])
        #     np.random.shuffle(val_inds)
            model = LogisticRegression(random_state=0, C=1, solver='lbfgs', n_jobs=-1,
                                         multi_class='multinomial', verbose=1, max_iter=5000,
                                         ).fit(gating_train_x[train_inds, :], np.array(gating_train_y)[train_inds])
            prob = model.predict_proba(gating_val_x)
            best = 0
            bestT = 0
            for t in range(25, 75, 1):
                y = prob[:, 0] > t/100
                acc = np.sum((1 - y) == gating_val_y)/len(gating_val_y)
#                 print(acc)
                if acc > best:
                    best = acc
                    bestT = t/100
            fin_val_acc += best
            pred_train = model.predict(gating_train_x)
            train_acc = np.sum(pred_train == gating_train_y)/len(gating_train_y)
            fin_train_acc += train_acc
        print('thresh', bestT)
        print(fin_val_acc/1)
        print(fin_train_acc/1)
        

12
5


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


thresh 0.73
0.8085714285714286
0.8238071570576541


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.3s finished


In [9]:
import pickle as pkl
with open('../../tf_vaegan_test/gating_model_t5_thresh0.74_seen.pkl', 'wb') as f:
    pkl.dump(model, f)

In [None]:
prob = model.predict_proba(gating_val_x)

In [None]:
best = 0
bestT = 0
for t in range(25, 75, 1):
    y = prob[:, 0] > t/100
    acc = np.sum((1 - y) == gating_val_y)/len(gating_val_y)
    print(acc)
    if acc > best:
        best = acc
        bestT = t/100

In [None]:
bestT

In [None]:
best