### GPU info

In [1]:
import torch

# If there's a GPU available...
if torch.cuda.is_available():    

    # Tell PyTorch to use the GPU.    
    device = torch.device("cuda")

    print('There are %d GPU(s) available.' % torch.cuda.device_count())

    print('We will use the GPU:', torch.cuda.get_device_name(0))
    !nvidia-smi

# If not...
else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")

There are 1 GPU(s) available.
We will use the GPU: Tesla T4
Mon Oct 10 15:07:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   69C    P8    13W /  70W |      3MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+---------------------------

### Load the data from google drive

In [2]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


### Libraries installation 

In [3]:
!pip install nlp
!pip install transformers
!pip install sh
!pip install absl-py


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting nlp
  Downloading nlp-0.4.0-py3-none-any.whl (1.7 MB)
[K     |████████████████████████████████| 1.7 MB 37.8 MB/s 
Collecting xxhash
  Downloading xxhash-3.0.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[K     |████████████████████████████████| 212 kB 64.6 MB/s 
Installing collected packages: xxhash, nlp
Successfully installed nlp-0.4.0 xxhash-3.0.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.22.2-py3-none-any.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 38.0 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 61.1 MB/s 
Collecting huggingface-hub<1.0,>=0.9.0
  Downloading huggingface

### Importing libraries

In [4]:
from absl import app, flags, logging

In [5]:
import torch as th
import pandas as pd
import tensorflow as tf
from torch.utils.data import Dataset ,DataLoader
import numpy as np
import matplotlib.pyplot as plt


In [6]:
import nlp
import transformers
import sh

### Assign the current device into device variable

In [7]:
import torch

if torch.cuda.is_available():       
    device = torch.device("cuda")
    print(f'There are {torch.cuda.device_count()} GPU(s) available.')
    print('Device name:', torch.cuda.get_device_name(0))

else:
    print('No GPU available, using the CPU instead.')
    device = torch.device("cpu")


There are 1 GPU(s) available.
Device name: Tesla T4


### Impoert the cleaned data

In [9]:
path = '/content/drive/MyDrive/Colab Notebooks/CleanedTweetsWithEmoji.csv'
data = pd.read_csv(path)
data[data['text'].isnull()]['text']
data.head()

Unnamed: 0,id,text,sentiment
0,1221875106206638080,والله حسب الأرقام سيكون مخيب للآمال ولكن الأهل...,Positive
1,1221884257490042887,الزعل بيغير ملامحك بيغير نظرة العين بيغير شكلك...,Neutral
2,1226422627436310528,الحب الحقيقي هو اقتسام بعض نفسك مع شخص أخر أقر...,Positive
3,1221880820815798277,النهضة في فتيل 😂,Positive
4,1221884400377499651,ليس حبا في ايران بقدر ماهو نكايه بترامب وحزبه,Neutral


### Convert labels into numbers

In [10]:
class2idx = {'Positive':2, 'Negative':0, 'Neutral':1}

data.sentiment= data.sentiment.map(class2idx)

In [11]:
data

Unnamed: 0,id,text,sentiment
0,1221875106206638080,والله حسب الأرقام سيكون مخيب للآمال ولكن الأهل...,2
1,1221884257490042887,الزعل بيغير ملامحك بيغير نظرة العين بيغير شكلك...,1
2,1226422627436310528,الحب الحقيقي هو اقتسام بعض نفسك مع شخص أخر أقر...,2
3,1221880820815798277,النهضة في فتيل 😂,2
4,1221884400377499651,ليس حبا في ايران بقدر ماهو نكايه بترامب وحزبه,1
...,...,...,...
54995,1255211270388809728,تصنيف دايم للتخصصات الصحيه بالامس تحدثونا عن ا...,1
54996,1255071490757754880,اتمنى تحلون المشكله وش يعني لما نلغي الطلب يرد...,1
54997,1254877579045220352,السلام عليكم عندي استفسار يا الغالي كل ما اجي ...,1
54998,1254759371193618432,حاب استفسر اذا منشآتي من ضمن المنشآت المستثناة...,1


In [22]:
data.dropna(inplace=True)

class weights based on the majority class

In [23]:
#class weights 
Nu,Po,Ne=data['sentiment'].value_counts()
Nu_W,Po_w,Ne_W = Nu/Nu ,Nu/Po,Nu/Ne
print(Nu_W,Po_w,Ne_W)


1.0 4.235487528344671 4.235967796802359


### Building the dataset

In [24]:
class ARSADatasets(Dataset):
    def __init__(self,data):
        #data loading 
        #delimiter to cut the text using it ,skip row we dont want the first row which is the header
        #
        xy = data
        #we dont want the first column so we use 1:
        self.x = xy['text'].values
        #all the samples with only the first colmun
        self.y = xy['sentiment'].values#n_samples ,1
        #row numbers
        self.n_samples = xy.shape[0]
        
        
    def __getitem__(self,index):
        #dataset[0]
        #return tuples
        return self.x[index],self.y[index]
    def __len__(self):
        #len(dataset)
        return  self.n_samples

In [25]:
dataset = ARSADatasets(data)
#(dataset[:][1] == 0).sum()

### Set configuration for the BERT model

In [26]:
#parapmters 
Config = {
'debug' : False,
'epochs' : 10,
'batch_size' : 8,
'lr' : 1e-2,
'momentum' :.9,
'model' :'bert-base-uncased',
'seq_length' : 32,
'percent' : 5 ,
}


### Tokenizer 

In [27]:
from transformers import BertTokenizer

# Load the BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('UBC-NLP/MARBERT')

### Encode tweets

In [28]:
# Encode our concatenated data
encoded_tweets = [tokenizer.encode(sent, add_special_tokens=True) for sent in dataset[:][0]]

# Find the maximum length
max_len = max([len(sent) for sent in encoded_tweets])
print('Max length: ', max_len)

Max length:  70


### Preprocessing for BERT

In [29]:


# Create a function to tokenize a set of texts
def preprocessing_for_bert(data):
    """Perform required preprocessing steps for pretrained BERT.
    @param    data (np.array): Array of texts to be processed.
    @return   input_ids (torch.Tensor): Tensor of token ids to be fed to a model.
    @return   attention_masks (torch.Tensor): Tensor of indices specifying which
                  tokens should be attended to by the model.
                      """
# Create empty lists to store outputs
    input_ids = []
    attention_masks = []

    # For every sentence...
    for sent in data:
        # `encode_plus` will:
        #    (1) Tokenize the sentence
        #    (2) Add the `[CLS]` and `[SEP]` token to the start and end
        #    (3) Truncate/Pad sentence to max length
        #    (4) Map tokens to their IDs
        #    (5) Create attention mask
        #    (6) Return a dictionary of outputs
        encoded_sent = tokenizer.encode_plus(
            text=sent,  # Preprocess sentence
            add_special_tokens=True,        # Add `[CLS]` and `[SEP]`
            max_length=max_len,                  # Max length to truncate/pad
            pad_to_max_length=True,         # Pad sentence to max length
            #return_tensors='pt',           # Return PyTorch tensor
            return_attention_mask=True      # Return attention mask
            )
        
        # Add the outputs to the lists
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))

    # Convert lists to tensors
    input_ids = torch.tensor(input_ids)
    attention_masks = torch.tensor(attention_masks)

    return input_ids, attention_masks


### get the input ID and the masks of the data

In [30]:
input_ids, attention_masks = preprocessing_for_bert(dataset[:][0])

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


### Splitting the dataset

In [31]:
from sklearn.model_selection import train_test_split

X = dataset[:][0]
y = dataset[:][1]

X_train, X_val, y_train, y_val =\
    train_test_split(X, y, test_size=0.2, random_state=2020)

### Preview and preprocess the data

In [32]:
# Specify `MAX_LEN`
MAX_LEN = 57

# Print sentence 0 and its encoded token ids
token_ids = list(preprocessing_for_bert([X[0]])[0].squeeze().numpy())
print('Original: ', X[0])
print('Token IDs: ', token_ids)

# Run function `preprocessing_for_bert` on the train set and the validation set
print('Tokenizing data...')
train_inputs, train_masks = preprocessing_for_bert(X_train)
val_inputs, val_masks = preprocessing_for_bert(X_val)



Original:  والله حسب الأرقام سيكون مخيب للآمال ولكن الأهلي قدها برجاله في الملعب 💚 🤍 
Token IDs:  [2, 2188, 4295, 14036, 5867, 94955, 87867, 2659, 3013, 17419, 13413, 1021, 1947, 6298, 796, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Tokenizing data...


### Show the first 100 words of the vocabularies

In [40]:
print(list(tokenizer.vocab.keys())[100:200])

['ρ', 'м', 'ֆ', 'ב', 'ו', 'ט', 'י', 'ל', 'ם', 'ע', 'פ', 'ױ', '،', '؏', '؛', '؟', 'ء', 'ا', 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'و', 'ى', 'ي', '٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨', '٩', '٪', '٫', '٬', '٭', 'ٱ', 'ٲ', 'ٳ', 'ٹ', 'ٺ', 'ٻ', 'پ', 'چ', 'ڈ', 'ډ', 'ڑ', 'ړ', 'ڕ', 'ژ', 'ڛ', 'ڤ', 'ڨ', 'ک', 'ڪ', 'ګ', 'گ', 'ڱ', 'ڳ', 'ڵ', 'ڷ', 'ں', 'ڼ', 'ھ', 'ہ', 'ۃ', 'ۄ', 'ۅ', 'ۆ', 'ۈ', 'ۉ', 'ۊ', 'ی', 'ۍ', 'ێ']


### transform the data into tensors and used 

In [43]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler

# Convert other data types to torch.Tensor
train_labels = th.tensor(y_train)
val_labels = th.tensor(y_val)

# For fine-tuning BERT, the authors recommend a batch size of 16 or 32.
batch_size = 16

# Create the DataLoader for our training set
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

# Create the DataLoader for our validation set
val_data = TensorDataset(val_inputs, val_masks, val_labels)
val_sampler = SequentialSampler(val_data)
val_dataloader = DataLoader(val_data, sampler=val_sampler, batch_size=batch_size)


In [44]:
%%time
import torch
import torch.nn as nn
from transformers import BertModel
from transformers import AutoTokenizer, AutoModel

# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in, H, D_out = 768, 50, 3

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('UBC-NLP/MARBERT')

        # Instantiate an one-layer feed-forward classifier
        self.classifier = nn.Sequential(
            nn.Linear(D_in, H),
            nn.ReLU(),
            #nn.Dropout(0.5),
            nn.Linear(H, D_out)
        )

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits


CPU times: user 33.3 ms, sys: 2.02 ms, total: 35.3 ms
Wall time: 35.7 ms


In [45]:
from transformers import AdamW, get_linear_schedule_with_warmup

def initialize_model(epochs=4):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(freeze_bert=False)
    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      lr=1e-5,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_dataloader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

### Weighted Random Sampling

In [46]:
import random
import time

# Specify loss function
class_weights = torch.FloatTensor([Nu_W,Po_w,Ne_W]).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights)

def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_dataloader, val_dataloader=None, epochs=4, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    PATH = "model.pt"
    for epoch_i in range(epochs):
        # =======================================
        #               Training
        # =======================================
        # Print the header of the result table
        print(f"{'Epoch':^7} | {'Batch':^7} | {'Train Loss':^12} | {'Val Loss':^10} | {'Val Acc':^9} | {'Elapsed':^9}")
        print("-"*70)

        # Measure the elapsed time of each epoch
        t0_epoch, t0_batch = time.time(), time.time()

        # Reset tracking variables at the beginning of each epoch
        total_loss, batch_loss, batch_counts = 0, 0, 0

        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for step, batch in enumerate(train_dataloader):
            batch_counts +=1
            # Load batch to GPU
            b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)
            
            # Zero out any previously calculated gradients
            model.zero_grad()
 
            # Perform a forward pass. This will return logits.
            logits = model(b_input_ids, b_attn_mask)
            # Compute loss and accumulate the loss values

            loss = loss_fn(logits, b_labels)
            
            batch_loss += loss.item()
            total_loss += loss.item()

            # Perform a backward pass to calculate gradients
            loss.backward()

            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

            # Print the loss values and time elapsed for every 20 batches
            if (step % 20 == 0 and step != 0) or (step == len(train_dataloader) - 1):
                # Calculate time elapsed for 20 batches
                time_elapsed = time.time() - t0_batch

                # Print training results
                print(f"{epoch_i + 1:^7} | {step:^7} | {batch_loss / batch_counts:^12.6f} | {'-':^10} | {'-':^9} | {time_elapsed:^9.2f}")

                # Reset batch tracking variables
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()

        # Calculate the average loss over the entire training data
        avg_train_loss = total_loss / len(train_dataloader)

        print("-"*70)
        
        torch.save({
            'epoch': epoch_i,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            }, PATH)
        # =======================================
        #               Evaluation
        # =======================================
        if evaluation == True:
            # After the completion of each training epoch, measure the model's performance
            # on our validation set.
            val_loss, val_accuracy = evaluate(model, val_dataloader)

            # Print performance over the entire training data
            time_elapsed = time.time() - t0_epoch
            
            print(f"{epoch_i + 1:^7} | {'-':^7} | {avg_train_loss:^12.6f} | {val_loss:^10.6f} | {val_accuracy:^9.2f} | {time_elapsed:^9.2f}")
            print("-"*70)
        print("\n")
    
    print("Training complete!")


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_accuracy = []
    val_loss = []

    # For each batch in our validation set...
    for batch in val_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask, b_labels = tuple(t.to(device) for t in batch)

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)

        # Compute loss
        loss = loss_fn(logits, b_labels)
        val_loss.append(loss.item())

        # Get the predictions
        preds = torch.argmax(logits, dim=1).flatten()

        # Calculate the accuracy rate
        accuracy = (preds == b_labels).cpu().numpy().mean() * 100
        val_accuracy.append(accuracy)

    # Compute the average accuracy and loss over the validation set.
    val_loss = np.mean(val_loss)
    val_accuracy = np.mean(val_accuracy)

    return val_loss, val_accuracy

In [None]:
set_seed(42)    # Set seed for reproducibility
bert_classifier, optimizer, scheduler = initialize_model(epochs=20)
model = BertForSequenceClassification.from_pretrained(model_path, num_labels=len(lab2ind))
train(bert_classifier, train_dataloader, val_dataloader, epochs=20, evaluation=True)


Some weights of the model checkpoint at UBC-NLP/MARBERT were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

 Epoch  |  Batch  |  Train Loss  |  Val Loss  |  Val Acc  |  Elapsed 
----------------------------------------------------------------------
   1    |   20    |   0.814190   |     -      |     -     |   4.65   
   1    |   40    |   0.619689   |     -      |     -     |   4.24   
   1    |   60    |   0.587433   |     -      |     -     |   4.23   
   1    |   80    |   0.552677   |     -      |     -     |   4.23   
   1    |   100   |   0.546345   |     -      |     -     |   4.23   
   1    |   120   |   0.478782   |     -      |     -     |   4.22   
   1    |   140   |   0.525266   |     -      |     -     |   4.23   
   1    |   160   |   0.511907   |     -      |     -     |   4.23   
   1    |   180   |   0.500985   |     -      |     -     |   4.23   
   1    |   200   |   0.490149   |     -      |     -     |   4.22   
   1    |   220   |   0.448812   |     -      |     -     |   4.23   
   1    |   240   |   0.482199   |     -      |     -     |   4.23   


In [None]:
import torch.nn.functional as F

def bert_predict(model, test_dataloader):
    """Perform a forward pass on the trained BERT model to predict probabilities
    on the test set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    all_logits = []

    # For each batch in our test set...
    for batch in test_dataloader:
        # Load batch to GPU
        b_input_ids, b_attn_mask = tuple(t.to(device) for t in batch)[:2]

        # Compute logits
        with torch.no_grad():
            logits = model(b_input_ids, b_attn_mask)
        all_logits.append(logits)
    
    # Concatenate logits from each batch
    all_logits = torch.cat(all_logits, dim=0)

    # Apply softmax to calculate probabilities
    probs = F.softmax(all_logits, dim=1).cpu().numpy()

    return probs

In [None]:
# Compute predicted probabilities on the test set
probs = bert_predict(bert_classifier, val_dataloader)
val_dataloader
print(probs)
print(val_dataloader
)
# Evaluate the Bert classifier
#evaluate_roc(probs, y_val)

[[9.2447626e-06 7.2547649e-05 9.9991810e-01]
 [1.3426711e-05 9.9998009e-01 6.5340469e-06]
 [9.9987268e-01 8.7135195e-05 4.0225426e-05]
 ...
 [6.0642969e-06 9.9998391e-01 1.0047169e-05]
 [1.9133284e-05 9.9997449e-01 6.3569355e-06]
 [1.7589331e-05 9.9997544e-01 6.9821544e-06]]
<torch.utils.data.dataloader.DataLoader object at 0x7fe1a87f4490>


In [None]:
t_label = torch.tensor([])
for b in val_dataloader:
  _,_,b_labels = b
  t_label = torch.cat((t_label,b_labels),dim=-1)
preds = torch.argmax(torch.from_numpy(probs), dim=1).flatten()


In [None]:
print((t_label ==preds).sum() /  len(t_label))
torch.save(bert_classifier)

tensor(0.7752)


In [None]:
%cd /content/drive/MyDrive/Colab Notebooks

/content/drive/MyDrive/Colab Notebooks


In [None]:
#save model 
model = bert_classifier
path = 'model_with_20_epochs.pt'
torch.save({
            
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            
            }, path)



In [None]:
trained_model 

### Testing 

In [None]:
#reading test data 
test_path ='/content/drive/MyDrive/Colab Notebooks/CleanedTestTweetsWithEmoji.csv'
t_df = pd.read_csv(test_path,encoding='utf-8')
X_test = t_df['Text'].values 

In [None]:
#prepare for BERT 
test_inputs,test_masks = preprocessing_for_bert(X_test)
# Create the DataLoader for our validation set
test_data = TensorDataset(test_inputs, test_masks)
test_sampler = SequentialSampler(test_data)
test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)


In [None]:
test_probs=bert_predict(bert_classifier, test_dataloader)
test_preds = torch.argmax(torch.from_numpy(test_probs), dim=1).flatten()


In [None]:
test_prediction = test_preds.numpy()

In [None]:
t_df['sentiment'] = test_prediction


In [None]:
t_df['sentiment'] = t_df['sentiment'].map({2:1, 0:-1, 1:0})

In [None]:
t_df = t_df.drop(columns=['Text'])

In [None]:
t_df.to_csv('test_for_submitFullDataTrainingEmojiandOnlyWeightedClassesLast.csv',index=False)

In [None]:
df = pd.read_csv('test_for_submitFullDataTrainingEmoji.csv')
df

### Save the model into the drive

In [None]:
PATH = '/content/drive/MyDrive/BestModelASALast.p'
torch.save(bert_classifier.state_dict(), PATH)
