In [9]:
import torch
# If there's a GPU available...
if torch.cuda.is_available():    
    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")
    print('There are %d GPU(s) available.' % torch.cuda.device_count())
    print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


There are 1 GPU(s) available.
We will use the GPU: Tesla T4


In [10]:
!pip install transformers
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device




device(type='cuda')

In [11]:
# !pip install wget

In [12]:
from transformers import BertModel, BertTokenizer
import torch.nn.functional as F
from torch import nn
import torch
from transformers import BertForSequenceClassification


class BERTContrastive(nn.Module):
    def __init__(self, train=True, dropout=0.1):
        super(BERTContrastive, self).__init__()
        # use pretrained BERT
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear1 = nn.Linear(768, 512)
        self.linear2 = nn.Linear(512, 256)
        print("Done loading model")
        # if train:
        #     self.bert.train()
        # else:
        #     self.bert.eval()
        #     for param in self.bert.parameters():
        #         param.requires_grad = False
        # self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def forward(self, input_ids, masks=None):
        # input_ids = torch.tensor(self.tokenizer.encode(inputs)).unsqueeze(0)  # Batch size 1
        print(input_ids.size(), masks.size())
        _, pooled_output = self.bert(input_ids, attention_mask=masks)
        dropout_output = self.dropout(pooled_output)
        linear_output = F.relu(self.linear1(dropout_output))
        linear_output = F.relu(self.linear2(linear_output))
        # print(pooled_output)
        # last_hidden_states = outputs[0]
        # cls = last_hidden_states[0]
        return linear_output

class BERTClassification(nn.Module):
    def __init__(self, dropout=0.1):
        super(BERTClassification, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, 1)
        self.sigmoid = nn.Sigmoid()
    def forward(self, tokens, masks=None):
        _, pooled_output = self.bert(tokens, attention_mask=masks)
        print(tokens.size(), masks.size(), pooled_output.size())
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        proba = self.sigmoid(linear_output)
        return proba

In [13]:
import numpy as np

import pandas as pd
import torch
from transformers import BertModel, BertTokenizer
from transformers import AutoTokenizer

from torch.utils.data import Dataset, TensorDataset
import pickle


class Preprocessing:
    def __init__(self, file, taskname):
        self.file = file
        self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')

        self.taskname = taskname


    def preprocess(self):
        df = pd.read_csv(self.file, error_bad_lines=False, encoding='latin-1')
        df.dropna(inplace=True)

        abstract1 = list(df['paperAbstract1'])
        abstract2 = list(df['paperAbstract2'])
        labels = torch.tensor(list(df['label'])).unsqueeze(dim=1).float()

        if self.taskname == "Classification":
            encoded_abstract = self.tokenizer(abstract1, abstract2, padding=True, truncation=True, return_tensors="pt")
            # print(encoded_abstract)
            # print(labels.size())
            pickle.dump(encoded_abstract, open("BERTClassificationEncodings.pkl", 'wb'))
            pickle.dump(labels, open("BERTClassificationLabels.pkl", 'wb'))

        else:
            encoded_abstract1 = self.tokenizer(abstract1, padding=True, truncation=True, return_tensors="pt")
            encoded_abstract2 = self.tokenizer(abstract2, padding=True, truncation=True, return_tensors="pt")
            # print(encoded_abstract1, encoded_abstract2)

            # print(encoded_abstract2, encoded_abstract1, labels.size())

            pickle.dump(encoded_abstract1, open("BERTContrastiveEncodings.pkl", 'wb'))
            pickle.dump(encoded_abstract2, open("BERTContrastiveEncodings1.pkl", 'wb'))
            pickle.dump(labels, open("BERTContrastiveLabels.pkl", 'wb'))

        # print(labels, type(labels))
        print("Preprocessing done!!")


# def main():
#     # testing preprocess for contrastive version
#     preprocessCls = Preprocessing('data/test.csv', "Classification")
#     preprocessCls.preprocess()

#     preprocessCls = Preprocessing('data/test.csv', "Contrastive")
#     preprocessCls.preprocess()


# if __name__ == "__main__":
#     main()


In [14]:
import numpy as np

import pandas as pd
import torch
from transformers import BertModel, BertTokenizer, AdamW
from transformers import AutoTokenizer

from torch.utils.data import Dataset, TensorDataset, DataLoader, RandomSampler
import pickle
# from model import *


def contrastiveEuclideanLoss(output1, output2, target, size_average=True):
    distances = (output2 - output1).pow(2).sum(1)  # squared distances
    losses = 0.5 * (target.float() * distances +
                (1 + -1 * target).float() * F.relu(0 - (distances + 0.00000001).sqrt()).pow(2))
    return losses.mean() if size_average else losses.sum()

def trainBERTClassification(encodings, labels):

    model = BERTClassification().to(device)
    optimizer = AdamW(model.parameters(), lr=1e-5)
    dataset = TensorDataset(encodings['input_ids'], encodings['token_type_ids'], encodings['attention_mask'], labels)
    sampler = RandomSampler(dataset)
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=8)
    epochs = 20
    count = 0

    print("Starting to train!!")


    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        batch_loss = 0

        for input_ids, _, attention_mask, labels in dataloader:
            optimizer.zero_grad()

            prob = model(input_ids, attention_mask)
            print(prob)
            loss_func = nn.BCELoss()
            loss = loss_func(prob, labels)
            epoch_loss += loss.item()
            batch_loss += loss.item()
            loss.backward()
            optimizer.step()
            count += 1

            if count % 2000 == 1999:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, count + 1, batch_loss / 2000))
                batch_loss = 0.0

        print("EPOCH Loss ====================", str(epoch_loss))

    print("Training complete!!")


def trainBERTContrastive(encoding1,encoding2, labels): 
    print(str(torch.cuda.memory_allocated(device)/1000000 ) + 'M')

   
    model = BERTContrastive()

    model = model.to(device)
    print(str(torch.cuda.memory_allocated(device)/1000000 ) + 'M')
    optimizer = AdamW(model.parameters(), lr=1e-5)
    print(encoding1['input_ids'].size())
    dataset = TensorDataset(encoding1['input_ids'], encoding1['token_type_ids'], encoding1['attention_mask'], encoding2['input_ids'], encoding2['token_type_ids'], encoding2['attention_mask'], labels)
    sampler = RandomSampler(dataset)
    dataloader = DataLoader(dataset, sampler=sampler, batch_size=8)
    epochs = 20
    count = 0

    print("Starting to train!!")
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        batch_loss = 0

        for input_ids1, _, attention_mask1, input_ids2, _, attention_mask2, labels in dataloader:
            optimizer.zero_grad()
            emd1 = model(input_ids1, attention_mask1)
            emd2 = model(input_ids2, attention_mask2)


            # criterion = nn.CosineEmbeddingLoss()
            # criterion = contrastiveEuclideanLoss
            criterion = nn.MarginRankingLoss()
            loss = criterion(emd1, emd2, 2 * labels - 1)
            epoch_loss += loss.item()
            batch_loss += loss.item()
            loss.backward()
            optimizer.step()
            count += 1

            if count % 2000 == 1999:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, count + 1, batch_loss / 2000))
                batch_loss = 0.0

        print("EPOCH Loss ====================", str(epoch_loss))

    print("Training complete!!")

In [15]:
import numpy as np

import pandas as pd
import torch
import random as rn
from transformers import BertModel, BertTokenizer
from transformers import AutoTokenizer

from torch.utils.data import Dataset, TensorDataset
import pickle

# from train import *
# from preprocess import *


def main():
    
    rn.seed(321)
    np.random.seed(321)
    torch.manual_seed(321)
    torch.cuda.manual_seed(321)

    preprocessed = False

    task = "Classification"
    task = "Contrastive"


    if not preprocessed:
        preprocessCls = Preprocessing('data_dummy.csv', task)
        preprocessCls.preprocess()

        # preprocessCls = Preprocessing('data/data_dummy.csv', task)
        # preprocessCls.preprocess()


    if task == "Classification":
        encodings = pickle.load(open("BERT" + task + "Encodings.pkl", 'rb'))
        labels = pickle.load(open("BERT" + task + "Labels.pkl", 'rb'))
        trainBERTClassification(encodings.to(device), labels.to(device))


    else:
        encodings1 = pickle.load(open("BERT" + task + "Encodings.pkl", 'rb'))
        labels = pickle.load(open("BERT" + task + "Labels.pkl", 'rb'))
        encodings2 = pickle.load(open("BERT" + task + "Encodings.pkl", 'rb'))
        print(str(torch.cuda.memory_allocated(device)/1000000 ) + 'M')


        trainBERTContrastive(encodings1.to(device), encodings2.to(device), labels.to(device))






if __name__ == "__main__":
    main()


Preprocessing done!!
0.0M
1.51808M
Done loading model
441.58208M
torch.Size([76, 416])
Starting to train!!
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size([4, 416]) torch.Size([4, 416])
torch.Size([4, 416]) torch.Size([4, 416])
torch.Size([8, 416]) torch.Size([8, 416])
torch.Size(