### Imports

In [30]:
!pip install -U sentence-transformers



In [31]:
import pandas as pd
import torch.nn as nn
import torch
from sentence_transformers import SentenceTransformer
from torch import optim
from torchvision import models, transforms
from PIL import Image
import numpy as np
from torchvision import transforms
import math

### Connecting to drive and reading data

In [32]:
#Connecting to mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
#Read data from TSV files
training_data = pd.read_csv("/content/drive/My Drive/IITG/SMDM/Dataset/all_train.tsv", sep = '\t')
validate_data = pd.read_csv("/content/drive/My Drive/IITG/SMDM/Dataset/all_validate.tsv", sep = '\t')
testing_data = pd.read_csv("/content/drive/My Drive/IITG/SMDM/all_test_public.tsv", sep = '\t')
print(testing_data.columns)

Index(['Unnamed: 0', 'Unnamed: 0.1', 'Unnamed: 0.1.1', 'Unnamed: 0.1.1.1',
       'author', 'clean_title', 'created_utc', 'domain', 'hasImage', 'id',
       'image_url', 'linked_submission_id', 'num_comments', 'score',
       'subreddit', 'title', 'upvote_ratio', '2_way_label', '3_way_label',
       '6_way_label'],
      dtype='object')


### Preprocessing data

In [34]:
#Cleaning data
# ----- Training
training_data.drop(training_data.columns.difference(['title','image_url','hasImage', 'clean_title','2_way_label','3_way_label','6_way_label']), 1, inplace=True)
training_data = training_data[training_data['hasImage'] == True]
training_data = training_data.dropna()

validate_data.drop(training_data.columns.difference(['title','image_url','hasImage', 'clean_title','2_way_label','3_way_label','6_way_label']), 1, inplace=True)
validate_data = training_data[training_data['hasImage'] == True]
validate_data = training_data.dropna()

testing_data.drop(training_data.columns.difference(['title','image_url','hasImage', 'clean_title','2_way_label','3_way_label','6_way_label']), 1, inplace=True)
testing_data = training_data[training_data['hasImage'] == True]
testing_data = training_data.dropna()

  This is separate from the ipykernel package so we can avoid doing imports until
  import sys
  # This is added back by InteractiveShellApp.init_path()


In [35]:
#Get list of text
text = training_data['clean_title'].tolist()

print(text[:6])

['my walgreens offbrand mucinex was engraved with the letters mucinex but in a different order', 'this concerned sink with a tiny hat', 'hackers leak emails from uae ambassador to us', 'puppy taking in the view', 'i found a face in my sheet music too', 'bride and groom exchange vows after fatal shooting at their wedding']


### Text Encoding

In [36]:
class TextEncoding(nn.Module):
    def __init__(self):
        super().__init__()
        self.dense_1 = nn.Linear(384, 512)
        self.sig = nn.Sigmoid()
        self.model =  SentenceTransformer('paraphrase-MiniLM-L6-v2')

    def forward(self,text):
        embeddings = self.model.encode(text)
        k = self.dense_1(torch.as_tensor(embeddings))
        k = self.sig(k)
        return k

### Trying Text input

In [37]:
t = TextEncoding()
text = ['hello How are you']

dense_layout_output = t(text)
print(dense_layout_output)
dense_layout_output.shape



tensor([[0.4979, 0.4677, 0.4011, 0.5140, 0.4900, 0.5424, 0.4844, 0.5645, 0.4595,
         0.4993, 0.5136, 0.4640, 0.4441, 0.5909, 0.4272, 0.5654, 0.4965, 0.4363,
         0.5656, 0.4211, 0.3966, 0.5014, 0.4847, 0.5266, 0.4632, 0.5142, 0.5552,
         0.4266, 0.6011, 0.5327, 0.4863, 0.4790, 0.4919, 0.5262, 0.4649, 0.5501,
         0.5131, 0.4569, 0.4724, 0.4484, 0.5942, 0.4412, 0.3970, 0.5881, 0.5475,
         0.5542, 0.4652, 0.5276, 0.4892, 0.4856, 0.4813, 0.5117, 0.5163, 0.4719,
         0.5222, 0.5191, 0.4970, 0.5659, 0.4303, 0.4694, 0.4377, 0.4874, 0.5510,
         0.5442, 0.5418, 0.4503, 0.4825, 0.4648, 0.5520, 0.4859, 0.5863, 0.4112,
         0.5207, 0.5173, 0.4692, 0.5028, 0.4906, 0.4595, 0.4946, 0.5176, 0.5374,
         0.4553, 0.5537, 0.5571, 0.5042, 0.5039, 0.4176, 0.4814, 0.4605, 0.4670,
         0.4865, 0.5625, 0.5183, 0.4363, 0.5449, 0.4881, 0.4220, 0.5424, 0.5869,
         0.3837, 0.5025, 0.4202, 0.4857, 0.4862, 0.4957, 0.4883, 0.4382, 0.5057,
         0.5616, 0.5000, 0.4

torch.Size([1, 512])

### Image Encoding

In [38]:
imodel = torch.hub.load('pytorch/vision:v0.10.0', 'vgg19', pretrained=True)
#imodel.eval()

Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.10.0


In [39]:
class FeatureExtractor(nn.Module):
  def __init__(self, model):
    super(FeatureExtractor, self).__init__()
		# Extract VGG-19 Feature Layers
    self.features = list(model.features)
    self.features = nn.Sequential(*self.features)
		# Extract VGG-19 Average Pooling Layer
    self.pooling = model.avgpool
  
  def forward(self, x):
    out = self.features(x)
    out = self.pooling(out)
    print(out.shape)
    return out 
 
new_model = FeatureExtractor(imodel)

In [40]:
# Change the device to GPU
device = torch.device('cuda:0' if torch.cuda.is_available() else "cpu")
new_model = new_model.to(device)

In [41]:
#getting the feature vector of an Image
filename = "/content/drive/MyDrive/1000rows/images/1.jpg"
print(filename)
filename = "/content/drive/MyDrive/IITG/SMDM/Dataset/Images/images/"+str(1)+".jpg"
print(filename)
#feature_vec = Img_feature(filename)
#feature_vec.shape

/content/drive/MyDrive/1000rows/images/1.jpg
/content/drive/MyDrive/IITG/SMDM/Dataset/Images/images/1.jpg


In [42]:
class ImageModel(nn.Module):
    def __init__(self):
      super().__init__()

    def forward(self, filename):
      return self.Img_feature(filename)  
    
    def Img_feature(self,filename):
      img = Image.open(filename)
      width,height = img.size
      dim = min(width,height)
      img.size
      preprocess = transforms.Compose([   
        transforms.CenterCrop(dim),
        transforms.Resize(224),
        transforms.ToTensor()  
      ])
      img = preprocess(img)
      img = img.reshape(1, 3, 224, 224)
      img = img.to(device)
    
      feature = new_model(img)
      return feature

In [43]:
image_model = ImageModel()

vgg_output = image_model(filename)
print(vgg_output)


torch.Size([1, 512, 7, 7])
tensor([[[[0.2694, 1.1837, 0.5741,  ..., 0.0000, 0.0000, 0.0000],
          [0.2834, 0.7161, 0.0000,  ..., 0.0000, 0.0000, 0.2228],
          [0.0095, 0.0000, 0.0000,  ..., 0.2375, 2.6152, 2.3209],
          ...,
          [0.2354, 0.0000, 0.0000,  ..., 1.5213, 0.2500, 0.0000],
          [0.7577, 0.0000, 0.0000,  ..., 0.6125, 0.0000, 0.0000],
          [0.9607, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          ...,
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]],

         [[0.1993, 0.9513, 0.2258,  ..., 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
          [0.00

### Final Model

In [44]:
class Model(nn.Module):
    def __init__(self,in_channels = 3, n_filters = 3, filter_sizes = [3,4,5], embedding_dim = 384):
       super().__init__()
       # Input: Image - 224 x 224 x 3 
       self.imageMap = ImageModel() 
       # Output:  128 x 128 x 256

       # Input: Text  
       self.embedding = TextEncoding()
       # Output: 1 x 384
         
       self.flatten = nn.Flatten()
       self.dense1 = nn.Linear(49, 128)
       self.tanh = nn.Tanh()
       self.dense2 = nn.Linear(128,64)
       self.dense3 = nn.Linear(64,2)
       self.conv = nn.Conv2d(in_channels=512, out_channels=1, kernel_size=1, stride=1, padding=0)     
    
    def forward(self, text, filename):
       I = self.imageMap(filename)
       #print('Image map',I.shape)
       k  = self.embedding(text)
       #print('Text Encoding:',k.shape)
       z = self.tiConv(I,k)
       #print('Conv:',z.shape)
       m = self.softmax2D(z)
       #print('softmax:', m.shape)
       I1 = self.elementWiseDot(m,I)
       print('Dot:',I1.shape)
       Ir = self.conv(I1)
       print('Reduced:', Ir.shape)
       flat = self.flatten(Ir)
       #print('flatten:', flat.shape)
       h = 1.7159 * self.tanh(self.dense2(self.dense1(flat)))
       p = 1.7159 * self.tanh(self.dense3(h))
       #return torch.argmax(p)
       return p
    
    def elementWiseDot(self, m , I):
       I1 = torch.zeros((1,512,7,7))
       for i in range(len(I[0])):
          I1[0, i, : , :] = m.mul(I[0, i, : , :])
        
       return I1


    def tiConv(self, I, k):
      output = torch.zeros((7,7))
      for i in range(7):
        for j in range(7):
          output[i,j] = torch.dot(k[0], I[0,:,i,j])
      
      return output

    def softmax2D(self, m):
      sum = 0
      for i in range(len(m)):
        for j in range(len(m[0])):
            sum = sum + math.e ** m[i,j]

      v = torch.div(m, sum)
      return v


In [45]:
model = Model()
print(model(text, filename))
print(model)


torch.Size([1, 512, 7, 7])
Dot: torch.Size([1, 512, 7, 7])
Reduced: torch.Size([1, 1, 7, 7])
tensor([[0.1057, 0.1116]], grad_fn=<MulBackward0>)
Model(
  (imageMap): ImageModel()
  (embedding): TextEncoding(
    (dense_1): Linear(in_features=384, out_features=512, bias=True)
    (sig): Sigmoid()
    (model): SentenceTransformer(
      (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
      (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
    )
  )
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense1): Linear(in_features=49, out_features=128, bias=True)
  (tanh): Tanh()
  (dense2): Linear(in_features=128, out_features=64, bias=True)
  (dense3): Linear(in_features=64, out_features=2, bias=True)
  (conv): Conv2d(512, 1, kernel_size=(1, 1), stride=(1, 1))
)


In [46]:
loss_fn = nn.CrossEntropyLoss()
#optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)


In [47]:
# Training Function 
def train(num_epochs): 
    best_accuracy = 0.0 
     
    print("Begin training...") 
    for epoch in range(num_epochs): 
        running_train_loss = 0.0 
        running_accuracy = 0.0 
        running_vall_loss = 0.0 
        total = 0 
        fullText = training_data['clean_title'].tolist()
        # Training Loop 
        for i in range(len(training_data[0:10])):
            text = []
            text.append(fullText[i]) 
            #print('text', text)
            imagefile = "/content/drive/MyDrive/IITG/SMDM/Dataset/Images/images/"+str(i+1)+".jpg"
            output = training_data['2_way_label'].tolist()[i]
            optimizer.zero_grad()   # zero the parameter gradients  

            optimizer.zero_grad()
            predicted_outputs = model(text, imagefile)
            print('predicted :', predicted_outputs)
            o = []
            o.append(output)
            print('Output', torch.as_tensor(o))
            train_loss = loss_fn(predicted_outputs, torch.as_tensor(o))
            train_loss.backward()
            print(model.dense3.weight.grad)
            optimizer.step()
            running_train_loss +=train_loss.item()
 
        # Calculate training loss value 
        train_loss_value = running_train_loss/len(training_data) 
        
        # Validation Loop 
        with torch.no_grad(): 
            model.eval() 
            for i in range(len(validate_data[0:10])): 
               text = []
               text.append(validate_data['clean_title'].tolist()[i])   # get the input and real species as outputs; data is a list of [inputs, outputs] 
               imagefile =  "/content/drive/MyDrive/IITG/SMDM/Dataset/Images/images/"+str(i+1)+".jpg"  #images of validation data
               output = validate_data['2_way_label'].tolist()[i]
               o = []
               o.append(output)
               predicted_outputs = model(text, imagefile)
               val_loss = loss_fn(predicted_outputs, torch.as_tensor(o)) 
               
               # The label with the highest value will be our prediction 
               _, predicted = torch.max(predicted_outputs, 1) 
               running_vall_loss += val_loss.item()  
               total += 1
               running_accuracy += (predicted == output).sum().item() 
 
        # Calculate validation loss value 
        val_loss_value = running_vall_loss/len(validate_data) 
        
        # Calculate accuracy as the number of correct predictions in the validation batch divided by the total number of predictions done.  
        accuracy = (100 * running_accuracy / total)     
          
        # Print the statistics of the epoch 
        print('Completed training batch', epoch, 'Training Loss is: %.4f' %train_loss_value, 'Validation Loss is: %.4f' %val_loss_value, 'Accuracy is %d %%' % (accuracy))

In [50]:
torch.autograd.set_detect_anomaly(True)
train(2)

Begin training...
torch.Size([1, 512, 7, 7])
Dot: torch.Size([1, 512, 7, 7])
Reduced: torch.Size([1, 1, 7, 7])
predicted : tensor([[nan, nan]], grad_fn=<MulBackward0>)
Output tensor([1])


  File "/usr/lib/python3.7/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.7/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.7/dist-packages/traitlets/config/application.py", line 846, in launch_instance
    app.start()
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelapp.py", line 499, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.7/dist-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
    self._run_once()
  File "/usr/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
    handle._run()
  File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "/usr

RuntimeError: ignored