In [2]:
# download ktrain
!pip install ktrain



In [4]:
#import ktrain and the ktrain.text modules
import ktrain
from ktrain import text

In [6]:
# read the dataset
import pandas as pd
from sklearn.model_selection import train_test_split

train_dataset = pd.read_csv("datasets/train.csv")
test_dataset = pd.read_csv("datasets/test.csv")
valid_dataset = pd.read_csv("datasets/val.csv")

list_dataset = [train_dataset, test_dataset, valid_dataset]

dataset = pd.concat(list_dataset)

Xfeatures = dataset['Text']
ylabels = dataset['Emotion']

#  Split Data
x_train, x_test, y_train, y_test = train_test_split(Xfeatures, ylabels, test_size=0.1, random_state=41)

# convert pandas series to list
x_train = x_train.tolist()
x_test = x_test.tolist()
y_train = y_train.tolist()
y_test = y_test.tolist()

In [7]:
# Create a Transformer instance
MODEL_NAME = 'distilbert-base-uncased'
MAX_LEN = 256 # maximum sequence length. The longest sequence in our training set is 47.
class_names = ['anger', 'fear', 'joy', 'love', 'sadness', 'surprise']
t = text.Transformer(MODEL_NAME, MAX_LEN, class_names=class_names)

In [8]:
# Preprocess the Datasets
trn = t.preprocess_train(x_train, y_train)
val = t.preprocess_test(x_test, y_test)

preprocessing train...
language: en
train sequence lengths:
	mean : 19
	95percentile : 41
	99percentile : 52




In [9]:
# Create a Model and Wrap in Learner
model = t.get_classifier()
learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=12)

In [10]:
# Estimate a Good Learning Rate
learner.lr_find(show_plot=True, max_epochs=2)

simulating training for different learning rates... this may take a few moments...
Epoch 1/2


: 

: 

In [None]:
# Train Model
learner.fit_onecycle(5e-5, 6)

In [None]:
# Evaluate/Inspect Model
learner.validate(class_names = class_names)

In [None]:
# the one we got most wrong
learner.view_top_losses(n=1, preproc=t)

In [None]:
# Make Predictions on New Data
predictor = ktrain.get_predictor(learner.model, preproc=t)
predictor.predict("I can't imagine how hard it must be for you.")

# Save Model

In [None]:
# save Predictor (i.e., model and Preprocessor instance) after partially training
predictor.save("/saved_model")

# Load Model

In [None]:
# reload Predictor and extract model
#model = ktrain.load_predictor('/content/saved_model')