In [1]:
#import docker

In [1]:
import torch, torchvision
import matplotlib.pyplot as plt
import json
import cv2
import numpy as np
from copy import deepcopy

In [2]:
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.structures.image_list import ImageList
from detectron2.data import transforms as T
from detectron2.modeling.box_regression import Box2BoxTransform
from detectron2.modeling.roi_heads.fast_rcnn import FastRCNNOutputs
from detectron2.structures.boxes import Boxes
from detectron2.layers import nms
from detectron2 import model_zoo
from detectron2.config import get_cfg

In [3]:
# if torch.cuda.is_available():
#     device = torch.device("cuda")

In [4]:
# x = torch.rand(10000,256,device=device)
# y = x.to(device)
# print(x[0:5,0:5])

In [5]:
cfg_path = "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"

def load_config_and_model_weights(cfg_path):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(cfg_path))

    # ROI HEADS SCORE THRESHOLD
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

    # Comment the next line if you're using 'cuda'
    cfg['MODEL']['DEVICE']='cpu'

    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(cfg_path)

    return cfg

cfg = load_config_and_model_weights(cfg_path)

In [6]:
def get_model(cfg):
    # build model
    model = build_model(cfg)

    # load weights
    checkpointer = DetectionCheckpointer(model)
    checkpointer.load(cfg.MODEL.WEIGHTS)

    # eval mode
    model.eval()
    return model

model = get_model(cfg)

The checkpoint state_dict contains keys that are not used by the model:
  [35mproposal_generator.anchor_generator.cell_anchors.{0, 1, 2, 3, 4}[0m


In [7]:
import glob
import cv2
import os


# image1 = []
# image2 = []
# image3 = []
# image4 = []

# img_dir1 = '/home/btech/nityanand.mathur/hateful_memes/dataset/train/hateful'
# img_dir2= '/home/btech/nityanand.mathur/hateful_memes/dataset/train/non_hateful'
# img_dir3= '/home/btech/nityanand.mathur/hateful_memes/dataset/val/hateful'
# img_dir4= '/home/btech/nityanand.mathur/hateful_memes/dataset/val/non_hateful'
# image = '/home/btech/nityanand.mathur/hateful_memes/dataset/train/hateful/01235.png'
# img = cv2.imread(image)

# data_path1 = os.path.join(img_dir1,'*g')
# files = glob.glob(data_path1)
# for f1 in files:
#     img1 = cv2.imread(f1)
#     image1.append(img1)



# data_path2 = os.path.join(img_dir2,'*g')
# files = glob.glob(data_path2)
# for f1 in files:
#     img2 = cv2.imread(f1)
#     image2.append(img2)


# data_path3 = os.path.join(img_dir3,'*g')
# files = glob.glob(data_path3)
# for f1 in files:
#     img3= cv2.imread(f1)
#     image3.append(img3)


# data_path4 = os.path.join(img_dir4,'*g')
# files = glob.glob(data_path4)
# for f1 in files:
#     img4 = cv2.imread(f1)
#     image4.append(img4)

# image_bgr1 = []
# for i in image1:
#     img_bgr = cv2.cvtColor(i,cv2.COLOR_RGB2BGR)
#     image_bgr1.append(img_bgr)

# image_bgr2 = []
# for i in image2:
#     img_bgr = cv2.cvtColor(i,cv2.COLOR_RGB2BGR)
#     image_bgr2.append(img_bgr)

# image_bgr3 = []
# for i in image3:
#     img_bgr = cv2.cvtColor(i,cv2.COLOR_RGB2BGR)
#     image_bgr3.append(img_bgr)

# image_bgr4 = []
# for i in image4:
#     img_bgr = cv2.cvtColor(i,cv2.COLOR_RGB2BGR)
#     image_bgr4.append(img_bgr)

# del(image1)
# del(image2)
# del(image3)
# del(image4)

In [8]:
# print(img.shape)

In [9]:
def prepare_image_inputs(cfg, img_list):
    # Resizing the image according to the configuration
    transform_gen = T.ResizeShortestEdge(
                [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
            )
    img_list = [transform_gen.get_transform(img).apply_image(img) for img in img_list]

    # Convert to C,H,W format
    convert_to_tensor = lambda x: torch.Tensor(x.astype("float32").transpose(2, 0, 1))#.to(device=device)

    batched_inputs = [{"image":convert_to_tensor(img), "height": img.shape[0], "width": img.shape[1]} for img in img_list]

    # Normalizing the image
    num_channels = len(cfg.MODEL.PIXEL_MEAN)
    pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).view(num_channels, 1, 1)#.to(device=device)
    pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).view(num_channels, 1, 1)#.to(device=device)
    normalizer = lambda x: (x - pixel_mean) / pixel_std
    images = [normalizer(x["image"]) for x in batched_inputs]

    # Convert to ImageList
    images =  ImageList.from_tensors(images,model.backbone.size_divisibility)
    
    return images, batched_inputs

def get_features(model, images):
    features = model.backbone(images.tensor)
    return features

    
def get_proposals(model, images, features):
    proposals, _ = model.proposal_generator(images, features)
    return proposals


def get_box_features(model, features, proposals):
    features_list = [features[f] for f in ['p2', 'p3', 'p4', 'p5']]
    box_features = model.roi_heads.box_pooler(features_list, [x.proposal_boxes for x in proposals])
    box_features = model.roi_heads.box_head.flatten(box_features)
    box_features = model.roi_heads.box_head.fc1(box_features)
    box_features = model.roi_heads.box_head.fc_relu1(box_features)
    box_features = model.roi_heads.box_head.fc2(box_features)

    box_features = box_features.reshape(1, 1000, 1024) # depends on your config and batch size
    return box_features, features_list


def get_prediction_logits(model, features_list, proposals):
    cls_features = model.roi_heads.box_pooler(features_list, [x.proposal_boxes for x in proposals])
    cls_features = model.roi_heads.box_head(cls_features)
    pred_class_logits, pred_proposal_deltas = model.roi_heads.box_predictor(cls_features)
    return pred_class_logits, pred_proposal_deltas


def get_box_scores(cfg, pred_class_logits, pred_proposal_deltas):
    box2box_transform = Box2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)
    smooth_l1_beta = cfg.MODEL.ROI_BOX_HEAD.SMOOTH_L1_BETA

    outputs = FastRCNNOutputs(
        box2box_transform,
        pred_class_logits,
        pred_proposal_deltas,
        proposals,
        smooth_l1_beta,
    )

    boxes = outputs.predict_boxes()
    scores = outputs.predict_probs()
    image_shapes = outputs.image_shapes

    return boxes, scores, image_shapes


def get_output_boxes(boxes, batched_inputs, image_size):
    proposal_boxes = boxes.reshape(-1, 4).clone()
    scale_x, scale_y = (batched_inputs["width"] / image_size[1], batched_inputs["height"] / image_size[0])
    output_boxes = Boxes(proposal_boxes)

    output_boxes.scale(scale_x, scale_y)
    output_boxes.clip(image_size)

    return output_boxes


def select_boxes(cfg, output_boxes, scores):
    test_score_thresh = cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST
    test_nms_thresh = cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST
    cls_prob = scores.detach().cpu() #added CPU
    cls_boxes = output_boxes.tensor.detach().cpu().reshape(1000,80,4) # added CPU 
    max_conf = torch.zeros((cls_boxes.shape[0]))
    for cls_ind in range(0, cls_prob.shape[1]-1):
        cls_scores = cls_prob[:, cls_ind+1]
        det_boxes = cls_boxes[:,cls_ind,:]
        keep = np.array(nms(det_boxes, cls_scores, test_nms_thresh))
        max_conf[keep] = torch.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep])
    keep_boxes = torch.where(max_conf >= test_score_thresh)[0]
    return keep_boxes, max_conf

def filter_boxes(keep_boxes, max_conf, min_boxes, max_boxes):
    if len(keep_boxes) < min_boxes:
        keep_boxes = np.argsort(max_conf).numpy()[::-1][:min_boxes]
    elif len(keep_boxes) > max_boxes:
        keep_boxes = np.argsort(max_conf).numpy()[::-1][:max_boxes]
    return keep_boxes


def get_visual_embeds(box_features, keep_boxes):
    return box_features[keep_boxes.copy()]


In [None]:
#output4 = []
#f = open("output2.txt", "a")
import json

# Opening JSON file
f = open('hateful_memes/train.jsonl', 'r')
data = json.load(f)

for i in range(1):    
    #reading from json file and converting from RGB to BGR
    img_path = 'hateful_memes/' + data[i]['img']
    print(img_path)
    img = cv2.imread(img_path)
    img_bgr = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

    #loading the image and converting to tensor
    images, batched_inputs = prepare_image_inputs(cfg, [img_bgr])
    features = get_features(model, images)
    features.keys()
    proposals = get_proposals(model, images, features)
    box_features, features_list = get_box_features(model, features, proposals)
    pred_class_logits, pred_proposal_deltas = get_prediction_logits(model, features_list, proposals)
    boxes, scores, image_shapes = get_box_scores(cfg, pred_class_logits, pred_proposal_deltas)
    output_boxes = [get_output_boxes(boxes[i], batched_inputs[i], proposals[i].image_size) for i in range(len(proposals))]
    temp = [select_boxes(cfg, output_boxes[i], scores[i]) for i in range(len(scores))]
    keep_boxes, max_conf = [],[]
    for keep_box, mx_conf in temp:
        keep_boxes.append(keep_box)
        max_conf.append(mx_conf)
    MIN_BOXES=10
    MAX_BOXES=100
    keep_boxes = [filter_boxes(keep_box, mx_conf, MIN_BOXES, MAX_BOXES) for keep_box, mx_conf in zip(keep_boxes, max_conf)]

    from transformers import BertTokenizer, VisualBertModel
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    model1 = VisualBertModel.from_pretrained("uclanlp/visualbert-nlvr2-coco-pre")
    #tokenizing text inputs
    inputs = tokenizer(data[i]['text'], return_tensors="pt") 
    #finding visual embeddings
    visual_embeds = [get_visual_embeds(box_feature, keep_box) for box_feature, keep_box in zip(box_features, keep_boxes)]
    visual_embeds = torch.stack(visual_embeds)
    visual_token_type_ids = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
    visual_attention_mask = torch.ones(visual_embeds.shape[:-1], dtype=torch.long)
    #adding both embeddings
    inputs.update(
        {
            "visual_embeds": visual_embeds,
            "visual_token_type_ids": visual_token_type_ids,
            "visual_attention_mask": visual_attention_mask,
        }
    )
    outputs = model1(**inputs)  
    # f.write(str(outputs))
    #print(outputs)
    last_hidden_states = outputs.last_hidden_state
    #hs.write(str(last_hidden_states))
# print(last_hidden_states)
f.close()

In [None]:
import json
f = open('hateful_memes/test_unseen.jsonl', 'r')
data = json.load(f)


In [11]:
last_hidden_states.shape

torch.Size([1, 110, 768])

In [14]:
print(last_hidden_states[0][0].detach().numpy())

[ 1.70977890e-01  8.93255249e-02  4.29117411e-01  7.05169961e-02
  6.70925975e-02  2.35964090e-01  1.08436227e-01  3.09289359e-02
 -1.10481910e-01 -1.46518881e-02  1.15664676e-01 -1.90246850e-03
 -6.09312728e-02  2.71173596e-01  1.50237873e-01  2.80442625e-01
 -3.19435000e-02  3.48602623e-01 -1.44309357e-01 -2.43468776e-01
  2.34100968e-01 -3.70606519e-02  9.49920192e-02 -1.81320652e-01
  1.03148855e-01 -6.84947371e-02  1.05372965e-01  2.54553169e-01
  2.07433283e-01  1.23621553e-01  4.13727701e-01  1.03580318e-01
  2.03460291e-01 -1.31554246e-01 -7.31589347e-02 -1.80084437e-01
 -8.03028867e-02  6.24556188e-03  2.37772554e-01 -1.38537258e-01
 -7.65928328e-02 -1.48852482e-01 -1.54305063e-02  1.21296272e-02
  4.01995778e-01  1.83955953e-01 -1.37234533e+00 -3.98194697e-03
 -6.11493349e-01  7.13877100e-03  1.44077390e-01  8.97097141e-02
  9.59546641e-02  2.74929821e-01 -9.71964225e-02 -6.48421720e-02
  1.21408574e-01  1.99563876e-01  1.16951875e-01 -6.64323047e-02
  5.73199987e-01 -2.17649

In [2]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

df= pd.read_csv('vbOutput_copy.csv')
features = df.iloc[:, :-1]
labels = df['labels']


In [4]:
lr = LogisticRegression(solver='lbfgs',class_weight='balanced', max_iter=10000)
train_features, test_features, train_labels, test_labels = train_test_split(features, labels)
lr.fit(train_features, train_labels)
lr.score(test_features, test_labels)
pred = lr.predict(test_features)


In [5]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(train_features,train_labels)
rf.score(test_features, test_labels)
haterh
from sklearn.metrics import confusion_matrix
print(confusion_matrix(test_labels,pred))


[[223 139]
 [ 68  92]]
