In [1]:
from google.colab import drive
drive.mount("/content/drive/")

Mounted at /content/drive/


In [2]:
#!nvidia-smi

In [3]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 4.0 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 74.8 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 61.5 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [4]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import shutil
import sys   

In [5]:
train_path = "/content/drive/MyDrive/Colab Notebooks/HuggingFace/dataset/Train_ML.csv"
test_path = "/content/drive/MyDrive/Colab Notebooks/HuggingFace/dataset/Test_submission_netid.csv"

In [6]:
import os
os.getcwd()

'/content'

In [10]:
train_df = pd.read_csv(train_path,nrows = 10000)
test_df = pd.read_csv(test_path)

In [12]:
test_df.head()

Unnamed: 0.1,Unnamed: 0,ID,TITLE,ABSTRACT,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
0,17910,17911,The Statistical Recurrent Unit,Sophisticated gated recurrent neural network...,,,,,,
1,3892,3893,Stochastic population dynamics in spatially ex...,Spatially extended population dynamics model...,,,,,,
2,11720,11721,Merlin-Arthur with efficient quantum Merlin an...,We introduce a simple sub-universal quantum ...,,,,,,
3,6172,6173,Distribution of the periodic points of the Far...,We expand the cross section of the geodesic ...,,,,,,
4,8155,8156,Learning model-based planning from scratch,Conventional wisdom holds that model-based p...,,,,,,


In [None]:
test_df.drop(['Unnamed: 0','Computer Science', 'Physics',
       'Mathematics', 'Statistics', 'Quantitative Biology',
       'Quantitative Finance'],axis =1, inplace=True)

In [None]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,ID,TITLE,ABSTRACT,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
0,11662,11663,Diffusivities bounds in the presence of Weyl c...,"In this paper, we investigate the behavior o...",0,1,0,0,0,0
1,11075,11076,Harnessing bistability for directional propuls...,"In most macro-scale robotics systems , propu...",1,0,0,0,0,0
2,18483,18484,Triangular Decomposition of Matrices in a Domain,Deterministic recursive algorithms for the c...,1,0,0,0,0,0
3,1980,1981,Theoretical Analysis of Generalized Sagnac Eff...,The Sagnac effect has been shown in inertial...,0,1,0,0,0,0
4,19866,19867,The geometric classification of Leibniz algebras,We describe all rigid algebras and all irred...,0,0,1,0,0,0


In [None]:
# combining 'title' and 'abstract' column to| get more context
train_df['CONTEXT'] = train_df['TITLE'] + ". " + train_df['ABSTRACT']
test_df['CONTEXT'] = test_df['TITLE'] + ". " + test_df['ABSTRACT']


In [None]:
# dropping useless features/columns
train_df.drop(labels=['Unnamed: 0','TITLE', 'ABSTRACT', 'ID'], axis=1, inplace=True)
test_df.drop(labels=['TITLE', 'ABSTRACT', 'ID'], axis=1, inplace=True)

In [None]:
test_df.columns

Index(['CONTEXT'], dtype='object')

In [None]:
train_df.columns

Index(['Computer Science', 'Physics', 'Mathematics', 'Statistics',
       'Quantitative Biology', 'Quantitative Finance', 'CONTEXT'],
      dtype='object')

In [None]:
# rearranging columns
train_df = train_df[['CONTEXT', 'Computer Science', 'Physics', 'Mathematics', 'Statistics',
                     'Quantitative Biology', 'Quantitative Finance',]]

In [None]:
train_df.head()

Unnamed: 0,CONTEXT,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
0,Diffusivities bounds in the presence of Weyl c...,0,1,0,0,0,0
1,Harnessing bistability for directional propuls...,1,0,0,0,0,0
2,Triangular Decomposition of Matrices in a Doma...,1,0,0,0,0,0
3,Theoretical Analysis of Generalized Sagnac Eff...,0,1,0,0,0,0
4,The geometric classification of Leibniz algebr...,0,0,1,0,0,0


In [None]:
train_df.shape, test_df.shape

((10000, 7), (4195, 1))

In [None]:
target_list = ['Computer Science', 'Physics', 'Mathematics', 'Statistics',
       'Quantitative Biology', 'Quantitative Finance']

In [None]:
# hyperparameters
MAX_LEN = 256
TRAIN_BATCH_SIZE = 20 #32,10 for CNN model
VALID_BATCH_SIZE = 20 #32,10 for CNN model
EPOCHS = 2
LEARNING_RATE = 1e-05

In [None]:
from transformers import BertTokenizer, BertModel

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

## Loading/Tokenizing Data in Pytorch

In [None]:
class CustomDataset(torch.utils.data.Dataset):

    def __init__(self, df, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.df = df
        self.title = df['CONTEXT']
        self.targets = self.df[target_list].values
        self.max_len = max_len

    def __len__(self):
        return len(self.title)

    def __getitem__(self, index):
        title = str(self.title[index])
        title = " ".join(title.split())

        inputs = self.tokenizer.encode_plus(
            title,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            return_token_type_ids=True,
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt'
        )

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'token_type_ids': inputs["token_type_ids"].flatten(),
            'targets': torch.FloatTensor(self.targets[index])
        }

In [None]:
train_size = 0.8
train_df2 = train_df.sample(frac=train_size, random_state=200)
val_df = train_df.drop(train_df2.index).reset_index(drop=True)
train_df=train_df2.reset_index(drop=True)

In [None]:
train_dataset = CustomDataset(train_df, tokenizer, MAX_LEN)
valid_dataset = CustomDataset(val_df, tokenizer, MAX_LEN)

In [None]:
train_data_loader = torch.utils.data.DataLoader(train_dataset, 
    batch_size=TRAIN_BATCH_SIZE,
    shuffle=True,
    num_workers=0
)

val_data_loader = torch.utils.data.DataLoader(valid_dataset, 
    batch_size=VALID_BATCH_SIZE,
    shuffle=False,
    num_workers=0
)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

device(type='cuda')

In [None]:
!pip install GPUtil
from GPUtil import showUtilization as gpu_usage
gpu_usage()  

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |


In [None]:
# import torch
# torch.cuda.empty_cache()
# from numba import cuda
# cuda.select_device(0)
# cuda.close()
# cuda.select_device(0)

## Selecting Model 

In [None]:
class LSTMClass(torch.nn.Module):
    def __init__(self):
        super(LSTMClass, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased',return_dict=True)
        self.dropout = torch.nn.Dropout(0.3)
        #self.lstm = nn.LSTM(input_size=768, hidden_size=768, num_layers=1, batch_first=True)
        self.linear = torch.nn.Linear(768, 6) #change number of labels
        #self.relu = nn.ReLU()
    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids, 
            attention_mask=attn_mask, 
            token_type_ids=token_type_ids
        )       
        output_dropout = self.dropout(output.pooler_output)
        #output, hidden = self.lstm(output_dropout)
        #linear_outout = self.linear(output)
        #final_layer = self.relu(linear_output)
        output = self.linear(output_dropout) 
        return output #final_layer for lstm or just use add_module


In [None]:
class MLPClass(torch.nn.Module): #with MLP ...
    def __init__(self):
        super(MLPClass, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased',return_dict=True)
        self.dropout = torch.nn.Dropout(0.3)
                ### 1st hidden layer
        self.linear_1 = torch.nn.Linear(768,100)
        self.linear_1.weight.detach().normal_(0.0, 0.1)
        self.linear_1.bias.detach().zero_()
                ### Output layer
        self.linear_out = torch.nn.Linear(100, 6) #change number of labels
        self.linear_out.weight.detach().normal_(0.0, 0.1)
        self.linear_out.bias.detach().zero_()
    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids, 
            attention_mask=attn_mask, 
            token_type_ids=token_type_ids
        )       
        output_dropout = self.dropout(output.pooler_output)
        out = self.linear_1(output_dropout)
        out = torch.sigmoid(out)
        logits = self.linear_out(out)
        return logits 


In [None]:
class CNNClass(torch.nn.Module):
    def __init__(self):
        super(CNNClass, self).__init__()
        self.bert_model = BertModel.from_pretrained('bert-base-uncased',return_dict=True)
        self.dropout = torch.nn.Dropout(0.3)
        self.Conv1 = nn.Conv1d(10, 10, 1) # same as your batch size
        self.linear = torch.nn.Linear(768, 6)

    def forward(self, input_ids, attn_mask, token_type_ids):
        output = self.bert_model(
            input_ids,
            attention_mask=attn_mask,
            token_type_ids=token_type_ids
        )
        output_dropout = self.dropout(output.pooler_output)
        output = self.Conv1(output_dropout)
        output = self.linear(output)
        return output

### Using LSTM Layer

In [None]:
model = LSTMClass()

In [None]:
model.add_module("lstm",nn.LSTM(input_size=768, hidden_size=768, num_layers=1, batch_first=True)) # it works without using linear after the lstm ... best clean way to implement LSTM
#model.classifier = nn.Linear(768, 6) ... if you want another FULLY CONNECTED LAYER

In [None]:
model.to(device)

LSTMClass(
  (bert_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [None]:
def loss_fn(outputs, targets):
    return torch.nn.BCEWithLogitsLoss()(outputs, targets)
optimizer = torch.optim.Adam(params =  model.parameters(), lr=LEARNING_RATE)

In [None]:
val_targets=[]
val_outputs=[]

In [None]:
def train_model(n_epochs, training_loader, validation_loader, model, 
                optimizer):
   
  # initialize tracker for minimum validation loss
  valid_loss_min = np.Inf
  for epoch in range(1, n_epochs+1):
    train_loss = 0
    valid_loss = 0

    model.train()
    print('############# Epoch {}: Training Start   #############'.format(epoch))
    for batch_idx, data in enumerate(training_loader):
        #print('yyy epoch', batch_idx)
        ids = data['input_ids'].to(device, dtype = torch.long)
        mask = data['attention_mask'].to(device, dtype = torch.long)
        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.float)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = loss_fn(outputs, targets)
        #if batch_idx%5000==0:
         #   print(f'Epoch: {epoch}, Training Loss:  {loss.item()}')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print('before loss data in training', loss.item(), train_loss)
        train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.item() - train_loss))
        #print('after loss data in training', loss.item(), train_loss)
    
    print('############# Epoch {}: Training End     #############'.format(epoch))
    
    print('############# Epoch {}: Validation Start   #############'.format(epoch))
    ######################    
    # validate the model #
    ######################
 
    model.eval()
   
    with torch.no_grad():
      for batch_idx, data in enumerate(validation_loader, 0):
            ids = data['input_ids'].to(device, dtype = torch.long)
            mask = data['attention_mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.float)
            outputs = model(ids, mask, token_type_ids)

            loss = loss_fn(outputs, targets)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.item() - valid_loss))
            val_targets.extend(targets.cpu().detach().numpy().tolist())
            val_outputs.extend(torch.sigmoid(outputs).cpu().detach().numpy().tolist())

      print('############# Epoch {}: Validation End     #############'.format(epoch))
      # calculate average losses
      #print('before cal avg train loss', train_loss)
      train_loss = train_loss/len(training_loader) 
      valid_loss = valid_loss/len(validation_loader)
      # print training/validation statistics 
      print('Epoch: {} \tAvgerage Training Loss: {:.6f} \tAverage Validation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))    
    print('############# Epoch {}  Done   #############\n'.format(epoch))
  return model

In [None]:
trained_model = train_model(EPOCHS, train_data_loader, val_data_loader, model, optimizer)

############# Epoch 1: Training Start   #############
############# Epoch 1: Training End     #############
############# Epoch 1: Validation Start   #############
############# Epoch 1: Validation End     #############
Epoch: 1 	Avgerage Training Loss: 0.000786 	Average Validation Loss: 0.002160
############# Epoch 1  Done   #############

############# Epoch 2: Training Start   #############
############# Epoch 2: Training End     #############
############# Epoch 2: Validation Start   #############
############# Epoch 2: Validation End     #############
Epoch: 2 	Avgerage Training Loss: 0.000489 	Average Validation Loss: 0.001882
############# Epoch 2  Done   #############



## Metrics

In [None]:
def return_whole_vals(x):
  fin = []
  for i in x:
    if i > 0.50: # select your threshold
      fin.append(1)
    else:
      fin.append(0)
  return fin

In [None]:
from sklearn.metrics import classification_report, multilabel_confusion_matrix
val_df.columns = ['CONTEXT','A','B','C','D','E','F'] #change depending on labels
val_df.head()

Unnamed: 0,CONTEXT,A,B,C,D,E,F
0,Triangular Decomposition of Matrices in a Doma...,1,0,0,0,0,0
1,Theoretical Analysis of Generalized Sagnac Eff...,0,1,0,0,0,0
2,On the Complexity of Sampling Nodes Uniformly ...,1,0,0,0,0,0
3,Bayesian Cluster Enumeration Criterion for Uns...,1,0,1,1,0,0
4,Magnetic phase diagram of the iron pnictides i...,0,1,0,0,0,0


In [None]:
val_df.shape[0]

2000

In [None]:
# Create an empty list
y_true =[]
# Iterate over each row
for index, rows in val_df.iterrows():
    # Create list for the current row
    my_list =[rows.A, rows.B, rows.C,rows.D,rows.E,rows.F]   
    # append the list to the final list
    y_true.append(my_list)
# Print the list
print(y_true)

[[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 1, 1, 0, 0], [0, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [1, 1, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 1, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0], [1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0], [0, 0, 1, 1, 0, 0], [0, 1, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0],

In [None]:
testing = val_df['CONTEXT'].tolist()[:val_df.shape[0]]

In [None]:
# validation prediction
y_predict = []
for example in testing:
  encodings = tokenizer.encode_plus(
      example,
      None,
      add_special_tokens=True,
      max_length=MAX_LEN,
      padding='max_length',
      return_token_type_ids=True,
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt'
  )
  trained_model.eval()
  with torch.no_grad():
      input_ids = encodings['input_ids'].to(device, dtype=torch.long)
      attention_mask = encodings['attention_mask'].to(device, dtype=torch.long)
      token_type_ids = encodings['token_type_ids'].to(device, dtype=torch.long)
      output = trained_model(input_ids, attention_mask, token_type_ids) 
      final_output = torch.sigmoid(output).cpu().detach().numpy().tolist()
      y_predict.append([return_whole_vals(x) for x in final_output])

In [None]:
y_pred = [x for listy in y_predict for x in listy]

In [None]:
labels = train_df.drop('CONTEXT',axis =1).columns.tolist() #drop X .. in this case CONTEXT

In [None]:
print(classification_report(y_true, y_pred, target_names=labels))

                      precision    recall  f1-score   support

    Computer Science       0.83      0.87      0.85       832
             Physics       0.94      0.83      0.88       570
         Mathematics       0.82      0.78      0.80       517
          Statistics       0.74      0.84      0.78       476
Quantitative Biology       0.60      0.25      0.35        60
Quantitative Finance       1.00      0.39      0.56        23

           micro avg       0.83      0.82      0.82      2478
           macro avg       0.82      0.66      0.70      2478
        weighted avg       0.83      0.82      0.82      2478
         samples avg       0.84      0.85      0.83      2478



  _warn_prf(average, modifier, msg_start, len(result))


## Testing Model

In [None]:
test_df.head()

Unnamed: 0,CONTEXT
0,The Statistical Recurrent Unit. Sophisticate...
1,Stochastic population dynamics in spatially ex...
2,Merlin-Arthur with efficient quantum Merlin an...
3,Distribution of the periodic points of the Far...
4,Learning model-based planning from scratch. ...


In [None]:
testing = test_df['CONTEXT'].tolist()

In [None]:
# testing
y_predict = []
for example in testing:
  encodings = tokenizer.encode_plus(
      example,
      None,
      add_special_tokens=True,
      max_length=MAX_LEN,
      padding='max_length',
      return_token_type_ids=True,
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt'
  )
  trained_model.eval()
  with torch.no_grad():
      input_ids = encodings['input_ids'].to(device, dtype=torch.long)
      attention_mask = encodings['attention_mask'].to(device, dtype=torch.long)
      token_type_ids = encodings['token_type_ids'].to(device, dtype=torch.long)
      output = trained_model(input_ids, attention_mask, token_type_ids) 
      final_output = torch.sigmoid(output).cpu().detach().numpy().tolist()
      y_predict.append([return_whole_vals(x) for x in final_output])

In [None]:
test = [x for listy in y_predict for x in listy]

In [None]:
sub_df = pd.DataFrame(test, columns=['Computer Science','Physics','Mathematics','Statistics','Quantitative Biology','Quantitative Finance'])

In [None]:
sub_df.sample(5) #if test has ID put here 

Unnamed: 0,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
4139,1,0,0,0,0,0
2748,0,0,1,0,0,0
3455,1,0,0,1,0,0
2013,0,0,1,0,0,0
2915,0,1,0,0,0,0


In [None]:
test_checking = pd.read_csv(test_path)
test_checking.head()

Unnamed: 0.1,Unnamed: 0,ID,TITLE,ABSTRACT,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
0,17910,17911,The Statistical Recurrent Unit,Sophisticated gated recurrent neural network...,,,,,,
1,3892,3893,Stochastic population dynamics in spatially ex...,Spatially extended population dynamics model...,,,,,,
2,11720,11721,Merlin-Arthur with efficient quantum Merlin an...,We introduce a simple sub-universal quantum ...,,,,,,
3,6172,6173,Distribution of the periodic points of the Far...,We expand the cross section of the geodesic ...,,,,,,
4,8155,8156,Learning model-based planning from scratch,Conventional wisdom holds that model-based p...,,,,,,


In [None]:
sub_df['TITLE'] = test_checking['TITLE']
sub_df['ABSTRACT'] = test_checking['ABSTRACT']
sub_df['ID'] = test_checking['ID']


In [None]:
sub_df = sub_df[['ID','TITLE','ABSTRACT', 'Computer Science', 'Physics', 'Mathematics', 'Statistics',
                     'Quantitative Biology', 'Quantitative Finance',]]

In [None]:
sub_df.head()

Unnamed: 0,ID,TITLE,ABSTRACT,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
0,17911,The Statistical Recurrent Unit,Sophisticated gated recurrent neural network...,1,0,0,1,0,0
1,3893,Stochastic population dynamics in spatially ex...,Spatially extended population dynamics model...,0,1,0,0,1,0
2,11721,Merlin-Arthur with efficient quantum Merlin an...,We introduce a simple sub-universal quantum ...,1,0,0,0,0,0
3,6173,Distribution of the periodic points of the Far...,We expand the cross section of the geodesic ...,0,0,1,0,0,0
4,8156,Learning model-based planning from scratch,Conventional wisdom holds that model-based p...,1,0,0,0,0,0


In [None]:
sub_df.to_csv('Test_submission_netid.csv')

In [None]:
!cp Test_submission_netid_Final_2.csv "drive/My Drive/"

### References 
https://curiousily.com/posts/multi-label-text-classification-with-bert-and-pytorch-lightning/

https://github.com/theartificialguy/NLP-with-Deep-Learning/blob/master/BERT/Multi%20Label%20Text%20Classification%20using%20BERT%20PyTorch/bert_multilabel_pytorch_standard.ipynb

https://github.com/christianversloot/machine-learning-articles/blob/main/creating-a-multilayer-perceptron-with-pytorch-and-lightning.md