In [1]:
%load_ext autoreload
%autoreload 2

import torch
torch.__version__

'1.6.0'

In [2]:
import numpy as np
import torch
import random
import pickle
import sys
import os
import glob
import pdb
sys.path.insert(0, '/home/jupyter/VLP/pythia')
sys.path.insert(0, '/home/jupyter/VLP/')

from pytorch_pretrained_bert.tokenization import BertTokenizer
from pytorch_pretrained_bert.modeling import BertForPreTrainingLossMask
from pytorch_pretrained_bert.optimization import BertAdam
from pathlib import Path
import pandas as pd
from vlp.loader_utils import batch_list_to_batch_tensors
import vlp.seq2seq_loader as seq2seq_loader
import PIL
from vlp.lang_utils import language_eval

from fastai.vision.all import *

from vlp_processor import PreprocessVLP
import pythia.tasks.processors as pythia_proc

from util import *

In [3]:
class ArgDummy(dict):
    def __getattr__(self, attr):
        return self[attr]
args = ArgDummy()
DATA_ROOT = '/mnt/ssd/data'
HATE_FEAT_PATH = Path('/home/jupyter/hateful_features/region_feat_gvd_wo_bgd')

args['bert_model'] = 'bert-base-cased' #Bert pre-trained model selected
args['seed'] = 123 #random seed for initialization
args['len_vis_input'] = 100
args['max_tgt_length'] = 100#20 #maximum length of target sequence
args['region_det_file_prefix'] = 'feat_cls_1000/coco_detection_vg_100dets_gvd_checkpoint_trainval'
args['output_dir'] ='tmp'
args['drop_prob'] = 0.1
args['model_recover_path'] = './checkpoints/vqa2_g2_lr2e-5_batch512_ft_from_s0.75_b0.25/model.19.bin'
args['image_root'] = f'{DATA_ROOT}/flickr30k/region_feat_gvd_wo_bgd/'
args['region_bbox_file'] =f'{DATA_ROOT}/flickr30k/region_feat_gvd_wo_bgd/flickr30k_detection_vg_thresh0.2_feat_gvd_checkpoint_trainvaltest.h5'
args['do_lower_case'] = True
args.region_bbox_file = os.path.join(args.image_root, args.region_bbox_file)
args.region_det_file_prefix = os.path.join(args.image_root, args.region_det_file_prefix)
args.max_seq_length = args.max_tgt_length + args.len_vis_input + 3 # +3 for 2x[SEP] and [CLS]

In [4]:
device = torch.device('cuda')
# fix random seed
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
tokenizer.max_len = args.max_seq_length

In [5]:
class HateStem(torch.nn.Module):
    
    def __init__(self, vlp):
        super(HateStem, self).__init__()
        self.vis_embed = vlp.vis_embed #Linear->ReLU->Linear->ReLU->dropout
        self.vis_pe_embed = vlp.vis_pe_embed #Linear->ReLU->dropout
        self.bert = vlp.bert # pytorch_pretrained_bert.modeling.BertModel
        self.len_vis_input = vlp.len_vis_input
        
    
    def forward(self, vis_feats, vis_pe, input_ids, token_type_ids=None, attention_mask=None):
        vis_feats = self.vis_embed(vis_feats) # image region features
        vis_pe = self.vis_pe_embed(vis_pe) # image region positional encodings

        sequence_output, pooled_output = self.bert(vis_feats, vis_pe, input_ids, token_type_ids,
            attention_mask, output_all_encoded_layers=False, len_vis_input=self.len_vis_input)
        #print(sequence_output.shape, pooled_output.shape)
        vqa2_embed = sequence_output[:, 0]*sequence_output[:, self.len_vis_input+1]
        return vqa2_embed
        #return sequence_output


In [6]:
def create_head(nf, n_out, lin_ftrs=None, ps=0.5, bn_final=False, lin_first=False, ):
    "Model head that takes `nf` features, runs through `lin_ftrs`, and out `n_out` classes."
    lin_ftrs = [nf, 512, n_out] if lin_ftrs is None else [nf] + lin_ftrs + [n_out]
    ps = L(ps)
    if len(ps) == 1: ps = [ps[0]/2] * (len(lin_ftrs)-2) + ps
    actns = [nn.ReLU(inplace=True)] * (len(lin_ftrs)-2) + [None]
    layers = []
    layers = [Flatten()]
    if lin_first: layers.append(nn.Dropout(ps.pop(0)))
    for ni,no,p,actn in zip(lin_ftrs[:-1], lin_ftrs[1:], ps, actns):
        layers += LinBnDrop(ni, no, bn=True, p=p, act=actn, lin_first=lin_first)
    if lin_first: layers.append(nn.Linear(lin_ftrs[-2], n_out))
    if bn_final: layers.append(nn.BatchNorm1d(lin_ftrs[-1], momentum=0.01))
    return nn.Sequential(*layers)

In [7]:
class HateClassifier(torch.nn.Module):
     def __init__(self, stem):
        super(HateClassifier, self).__init__()
        self.stem = stem
        self.classifier = create_head(768,2, ps=.5)
     #def forward(self, id, vis_feats, vis_pe, input_ids, token_type_ids, attention_mask):  
     def forward(self, params):  
        id, vis_feats, vis_pe, input_ids, token_type_ids, attention_mask = params
        embs = self.stem(vis_feats, vis_pe, input_ids, token_type_ids, attention_mask)
        return self.classifier(embs)

In [8]:
def new_model():
    hate_stem = torch.load('checkpoints/lm_stem20drop.pth')
    
    return  HateClassifier(hate_stem).cuda()


In [9]:
path =  Path('/home/jupyter/VLP')
data =pd.read_csv(path/'captioned.csv')
data =data.drop(['caption', 'tex_cap'], axis=1)
data['label'] = 1
data[:3]

Unnamed: 0,id,img,label,text,is_valid
0,42953,img/42953.png,1,its their character not their color that matters,False
1,23058,img/23058.png,1,don't be afraid to love again everyone is not like your ex,False
2,13894,img/13894.png,1,putting bows on your pet,False


In [10]:
df_train = pd.DataFrame(data[:8000])
df_valid = pd.DataFrame(data[8000:])

In [11]:
df_train['is_valid'] = False
df_valid['is_valid'] = True

In [12]:
valid_ids = df_valid.index.tolist()
valid_wrong = np.random.choice(valid_ids,len(valid_ids)//2)
valid_wrong[:5]

array([9534, 9389, 9406, 9346, 9122])

In [13]:
shuffled = np.random.permutation(valid_wrong)
shuffled[:5]

array([9980, 8885, 9719, 9071, 9125])

In [14]:
shuf_texts = df_valid.loc[shuffled]['text'].tolist()
shuf_texts[:5]

['"my name is osama bin laden and this is jackass"',
 'and then i asked mom, can we go play?',
 'all was well in new york until the fire nation attacked',
 'when you got a warrant out for your arrest & you on the run.....',
 'that look when you high as fuck on dem chromosomes']

In [15]:
df_valid.loc[valid_wrong, 'text']= shuf_texts
df_valid.loc[valid_wrong, 'label'] =0

In [16]:
df_valid

Unnamed: 0,id,img,label,text,is_valid
8000,43895,img/43895.png,1,"from here i will see muslims fuck up europe, attack innocent people & europeans waking up",True
8001,62537,img/62537.png,0,what do they have in common? you enjoy pushing them down the stairs,True
8002,76950,img/76950.png,0,if you ever feel down just remember there are people that are down their whole life,True
8003,16374,img/16374.png,0,"the islamic sacred book, believed to be the word of god as dictated to muhammad",True
8004,58732,img/58732.png,1,meanwhile at the isis strip club,True
...,...,...,...,...,...
9995,3869,img/03869.png,0,she represents the new wave of democrats,True
9996,23817,img/23817.png,0,back in my day there were only two genders male and not male,True
9997,56280,img/56280.png,1,little miss muffet sat on her tuffet,True
9998,29384,img/29384.png,0,i'm deporting all mexicans!!! except the ones in baldwin park you fuckers are crazy!!!,True


In [17]:
data = pd.concat([df_train, df_valid])
data

Unnamed: 0,id,img,label,text,is_valid
0,42953,img/42953.png,1,its their character not their color that matters,False
1,23058,img/23058.png,1,don't be afraid to love again everyone is not like your ex,False
2,13894,img/13894.png,1,putting bows on your pet,False
3,37408,img/37408.png,1,i love everything and everybody! except for squirrels i hate squirrels,False
4,82403,img/82403.png,1,"everybody loves chocolate chip cookies, even hitler",False
...,...,...,...,...,...
9995,3869,img/03869.png,0,she represents the new wave of democrats,True
9996,23817,img/23817.png,0,back in my day there were only two genders male and not male,True
9997,56280,img/56280.png,1,little miss muffet sat on her tuffet,True
9998,29384,img/29384.png,0,i'm deporting all mexicans!!! except the ones in baldwin park you fuckers are crazy!!!,True


In [18]:
def load(idx, proc, q=None):
    return load_from_row(data.iloc[idx],proc, tokenizer, q)

In [19]:
region_pref = HATE_FEAT_PATH / 'feat_cls_1000/hateful_vlp_checkpoint_trainval'
bbox_pref = HATE_FEAT_PATH / 'raw_bbox/hateful_vlp_checkpoint_trainval'
id_digits=2

truncate_config={
    'max_len_b': args.max_tgt_length, 'trunc_seg': 'b', 'always_truncate_tail': True}

max_masked = 10
mask_prob = .20
mask_img=True
vis_mask_prob = .20
train_proc = PreprocessVLP(max_masked, mask_prob,
    list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, args.max_seq_length,
    truncate_config=truncate_config,mask_image_regions=mask_img, vis_mask_prob=vis_mask_prob,
    mode="bi", len_vis_input=args.len_vis_input, 
    region_bbox_prefix=str(bbox_pref), region_det_file_prefix=str(region_pref), id_digits=id_digits,
    load_vqa_ann=True)

val_proc = PreprocessVLP(0, 0,
    list(tokenizer.vocab.keys()), tokenizer.convert_tokens_to_ids, args.max_seq_length,
    truncate_config=truncate_config,mask_image_regions=False, vis_mask_prob=0,
    mode="bi", len_vis_input=args.len_vis_input, 
    region_bbox_prefix=str(bbox_pref), region_det_file_prefix=str(region_pref), id_digits=id_digits,
    load_vqa_ann=True)

In [20]:
@typedispatch
def show_batch(x:VLPInput, y, samples, ctxs=None, max_n=10, nrows=None, ncols=None, figsize=None, **kwargs):
    if ctxs is None: ctxs = get_grid(min(len(samples), max_n), nrows=nrows, ncols=ncols, figsize=figsize)
    ctxs = show_batch[object](x, y, samples, ctxs=ctxs, max_n=max_n, **kwargs)
    return ctxs

@typedispatch
def show_results(x:VLPInput, y:TensorCategory, samples, outs, ctxs=None, max_n=10, nrows=None, ncols=None, figsize=None, **kwargs):
    if ctxs is None: ctxs = get_grid(min(len(samples), max_n), nrows=nrows, ncols=ncols, add_vert=1, figsize=figsize)
    for i in range(2):
        ctxs = [b.show(ctx=c, **kwargs) for b,c,_ in zip(samples.itemgot(i),ctxs,range(max_n))]
    ctxs = [r.show(ctx=c, color='green' if b==r else 'red', **kwargs)
            for b,r,c,_ in zip(samples.itemgot(1),outs.itemgot(0),ctxs,range(max_n))]
    return ctxs


@typedispatch
def plot_top_losses(x: VLPInput, y:TensorCategory, samples, outs, raws, losses, nrows=None, ncols=None, figsize=None, **kwargs):
    axs = get_grid(len(samples), nrows=nrows, ncols=ncols, add_vert=1, figsize=figsize, title='Prediction/Actual/Loss/Probability')
    for ax,s,o,r,l in zip(axs, samples, outs, raws, losses):
        s[0].show(ctx=ax, **kwargs)
        ax.set_title(f'{o}/{s[1]} / {l.item():.2f} / {r.max().item():.2f}')

In [21]:
class LoadRow(Transform):
    
    def __init__(self,processor, tokenizer, random_text=False, data = None):
        self.proc = processor
        self.tokenizer = tokenizer
        self.random_text = random_text
        self.data = data
    def encodes(self, x):
        if self.random_text and random.choice((True, False)):
            x = pd.Series(x)
            altloc = random.randint(0, len(self.data)-1)
            alttext = self.data.iloc[altloc].text
            x['text'] = alttext
            x['label'] = 0
        return load_from_row(x, self.proc, self.tokenizer), x.label

In [22]:
train_load = LoadRow(train_proc, tokenizer, True, df_train)
valid_load = LoadRow(val_proc, tokenizer)

In [23]:
train_tl = TfmdLists(df_train, train_load)
valid_tl = TfmdLists(df_valid, valid_load)

In [24]:
dls = DataLoaders.from_dsets(train_tl, valid_tl,bs=40, device=device)

In [25]:


@patch_to(VLPInput)
def show(self, ctx, **kwargs):
    id = self[0].item()
    tit = id_to_text(id, data)
    ctx.text(0,0,tit,ha='left', wrap=True)
    ctx = show_image(PILImage.create(id_to_img_path(id)), ctx=ctx)
    return ctx


In [26]:
#dls.show_batch(dls.valid.one_batch(), 9)

In [27]:
def vlp_splitter(model):
    return L(params(model.stem.vis_embed) + params(model.stem.vis_pe_embed), 
            params(model.stem.bert),
            params(model.classifier))

In [28]:
model = new_model()
learn = Learner(dls, model,metrics=[accuracy, RocAucBinary()], splitter=vlp_splitter, loss_func = nn.CrossEntropyLoss())

In [29]:
learn.fine_tune(10, 1e-3)

epoch,train_loss,valid_loss,accuracy,roc_auc_score,time
0,0.771432,0.685338,0.55,0.564231,02:23


epoch,train_loss,valid_loss,accuracy,roc_auc_score,time
0,0.708316,0.642906,0.619,0.647331,03:10
1,0.669607,0.635611,0.64,0.703256,03:10
2,0.595504,0.569623,0.7015,0.746513,03:10
3,0.550967,0.626213,0.6955,0.77527,03:10
4,0.539172,0.948392,0.5925,0.736549,03:10
5,0.50967,0.71709,0.7015,0.805613,03:10
6,0.472479,0.671963,0.661,0.794498,03:10
7,0.462133,0.819404,0.637,0.773235,03:10
8,0.449377,0.825591,0.6395,0.780354,03:10
9,0.431898,0.832285,0.6395,0.779826,03:10


In [32]:
learn.fit_one_cycle(10, lr_max = 1e-5)

epoch,train_loss,valid_loss,accuracy,roc_auc_score,time
0,0.38872,0.948845,0.6285,0.784084,03:10
1,0.387256,0.855126,0.6625,0.809998,03:10
2,0.406096,0.731548,0.702,0.832674,03:10
3,0.388391,0.754718,0.709,0.825888,03:10
4,0.377293,0.965443,0.6315,0.78038,03:10
5,0.379696,0.852561,0.673,0.808499,03:10
6,0.371715,0.936434,0.6575,0.811397,03:10
7,0.363982,0.978866,0.62,0.790271,03:10
8,0.355349,0.910945,0.649,0.808527,03:10
9,0.34961,0.922885,0.643,0.807917,03:10


In [33]:
learn.save('sim85roc')

Path('models/sim85roc.pth')

In [35]:
preds_t = learn.get_preds(ds_idx=0)

In [38]:
import sklearn
def get_roc(preds):
    preds, y = preds
    probs = F.softmax(preds, dim=1)[:,1]
    
    return sklearn.metrics.roc_auc_score(y, probs)

In [44]:
import PIL
def show_by_idxs(img_idxs):
    n,m = len(img_idxs)//2, 2
    
    _,axs = plt.subplots(n,m, figsize=(10*m,10*n))
    for ax, idx in zip(axs.flatten(), img_idxs.view(-1)):
        row = data.iloc[idx.item()]
        img_path = HATE_IMAGES / row['img']
        ax.imshow(PIL.Image.open(img_path))
        ax.axis('off')
        clr = 'red' if row['label']==1 else 'green'
        txt = f'{row["id"]}: {row["text"][:20]}'
        ax.set_title(txt, color=clr) 

In [47]:
model = learn.model.eval()

In [61]:
bs = 50

In [62]:
first_row = data.iloc[0]

In [63]:
batch = []
for i in range(bs):
    txt = data.text.iloc[i]
    row_data = load_from_row(first_row, val_proc, tokenizer, txt)
    batch.append(row_data)

In [57]:
[x.shape for x in batch[0]]

[torch.Size([]),
 torch.Size([100, 2048]),
 torch.Size([100, 1607]),
 torch.Size([203]),
 torch.Size([203]),
 torch.Size([203, 203])]

In [77]:
b = fa_collate(batch)
[x.shape for x in b]

[torch.Size([50]),
 torch.Size([50, 100, 2048]),
 torch.Size([50, 100, 1607]),
 torch.Size([50, 203]),
 torch.Size([50, 203]),
 torch.Size([50, 203, 203])]

In [78]:
b = tuple([x.cuda() for x in b])

RuntimeError: CUDA error: an illegal memory access was encountered

In [74]:
model(b)

ValueError: not enough values to unpack (expected 6, got 0)