In [None]:
pip install transformers

In [None]:
GOOGLE_COLAB=False
if GOOGLE_COLAB:
    from google.colab import drive
    import sys
    drive.mount("/content/drive")
    sys.path.insert(0,"/content/drive/My Drive/Twitter_SA-v1.2")

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import pickle
import torch
import deep_learning_modules
from tqdm.auto import tqdm
BASE_PATH = "drive/MyDrive/Twitter_SA-v1.2/" if GOOGLE_COLAB else ""

# Import pre-processed data

In [None]:
df_train = pd.read_csv(BASE_PATH+"data/train_cleaned.txt")
df_train.label = (df_train.label + 1)/2
train_df, valid_df = train_test_split(df_train, test_size=0.2, random_state=2112)

In [None]:
# 1- Select pretrained model parameters: bertweet or x_distil_bert_l6h256
model_params = deep_learning_modules.bertweet_model_params
# 2- BiLSTM on top of bert or just mean (BiLSTMTransferLearningClassifier or TransferLearningClassifier)
dl_model = deep_learning_modules.TransferLearningClassifier
# 3- freeze bert model or not (freeze_pretrained true or false)
freeze_pretrained = False

In [None]:
train_dataset = deep_learning_modules.TokenizedDataset(train_df,model_params,True)
valid_dataset = deep_learning_modules.TokenizedDataset(valid_df,model_params,True)

In [None]:
train_loader = DataLoader(
    train_dataset,
    batch_size=32,
    shuffle=True
)

valid_loader = DataLoader(
    valid_dataset,
    batch_size=32
)

# Model Train

In [None]:
# Instantiate model
model = dl_model(model_params,freeze_pretrained=freeze_pretrained)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

In [None]:
epochs = 4

In [None]:
train_stats, validation_stats = model.start_train_loop(train_loader,valid_loader,epochs,device,BASE_PATH+"models/")

# Display Results

In [None]:
# Display floats with two decimal places.

# Create a DataFrame from our training statistics.
df_stats = pd.DataFrame(data=list(zip(train_stats, validation_stats)),columns=["train loss","validation loss"])

print(train_stats)
# Display the table.
df_stats

In [None]:
import matplotlib.pyplot as plt

import seaborn as sns

# Use plot styling from seaborn.
sns.set(style='darkgrid')

# Increase the plot size and font size.
sns.set(font_scale=1.5)
plt.rcParams["figure.figsize"] = (12,6)

# Plot the learning curve.
plt.plot(df_stats['train loss'], 'b-o', label="Training")
plt.plot(df_stats['validation loss'], 'g-o', label="Validation")

# Label the plot.
plt.title("Training & Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.xticks([1, 2, 3, 4])

plt.show()