In [1]:
pip install transformers

Note: you may need to restart the kernel to use updated packages.


In [2]:
GOOGLE_COLAB = False
if GOOGLE_COLAB:
    from google.colab import drive
    import sys
    drive.mount("/content/drive")
    sys.path.insert(0,"/content/drive/My Drive/Twitter_SA-v1.1")

In [3]:
import deep_learning_modules
import torch
from torch.utils.data import DataLoader
import pandas as pd

BASE_PATH = "drive/MyDrive/Twitter_SA-v1.2/" if GOOGLE_COLAB else ""

  from .autonotebook import tqdm as notebook_tqdm


## Load Model

In [4]:
# 1- Select pretrained model parameters: bertweet or x_distil_bert_l6h256
model_params = deep_learning_modules.bertweet_model_params
# 2- BiLSTM on top of bert or just mean (BiLSTMTransferLearningClassifier or TransferLearningClassifier)
dl_model = deep_learning_modules.TransferLearningClassifier
# 3- freeze bert model or not (freeze_pretrained true or false)
freeze_pretrained = False
# 4- trained model path
model_path = BASE_PATH+"models/model_bertweet_large-epoch_1"

In [5]:
# set model path and model params

model = dl_model(model_params,freeze_pretrained)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.load_state_dict(torch.load(model_path,map_location=device))
model.eval()

Some weights of the model checkpoint at vinai/bertweet-large were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaModel were not initialized from the model checkpoint at vinai/bertweet-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use 

TransferLearningClassifier(
  (base_model): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
            

In [6]:
test_df = pd.read_csv(BASE_PATH+"data/test_cleaned.txt")
test_set = deep_learning_modules.TokenizedDataset(test_df,model_params,False)
test_loader = DataLoader(
    test_set,
    batch_size=1
)

res = model.get_predictions(test_loader,device)
res = pd.DataFrame(res,columns=["text","Prediction"])
res["Prediction"] = (res["Prediction"] *2)-1
res["Id"] = res.index + 1
res=res.reindex(["text","Id","Prediction"],axis=1)
res

100%|██████████| 10000/10000 [32:43<00:00,  5.09it/s]


Unnamed: 0,text,Id,Prediction
0,sea doo professional sea scooter ( sport with ...,1,-1
1,<user> shuck well i work all week so now i can...,2,-1
2,i cant stay away from bug thats my baby,3,1
3,<user> no ma'am ! ! ! laughing out loud Instan...,4,1
4,whenever i fall asleep watching the television...,5,-1
...,...,...,...
9995,had a nice time w / my friend lastnite,9996,1
9996,<user> no it's not ! please stop !,9997,-1
9997,not without my daughter ( dvd two-time oscar (...,9998,-1
9998,<user> have fun in class sweetcheeks,9999,1


In [7]:
res.to_csv(BASE_PATH+"submission-{}.csv".format(model_params.name),index=False)