# **Install ktrain and tensorflow gpu**

In [None]:
!pip install ktrain

In [None]:
!pip install tensorflow-gpu

# **Import packages**

In [None]:
import pandas as pd
import ktrain
from ktrain import text
from sklearn.model_selection import train_test_split


In [None]:
import tensorflow as tf
from tensorflow import keras


# **Read dataset as dataframe**

In [None]:
df = pd.read_csv('/content/drive/My Drive/goemotions_aug_dairai_train_cleaned.csv')

In [None]:
df = df[:100000]

In [None]:
df.head()

Unnamed: 0.1,Unnamed: 0,anger,augmented,cleaned_processed,datasource,fear,joy,sadness
0,206509,0.0,0,i feel that some korea guy are handsome and so...,dairai,0.0,1.0,0.0
1,375242,0.0,0,i put my pen to paper and made a list of thing...,dairai,0.0,0.0,0.0
2,166570,1.0,0,i wish i only had to feel the pain of the pett...,dairai,0.0,0.0,0.0
3,200580,0.0,0,i feel passionate about this journey and stand...,dairai,0.0,1.0,0.0
4,300766,0.0,0,i feel like i have convinced myself of these f...,dairai,0.0,1.0,0.0


In [None]:
label = ['anger', 'fear', 'joy', 'sadness']

In [None]:
df = df.rename(columns={'cleaned_processed': 'text'})

# **split the sub-dataset as trn and val**

In [None]:
train, test = train_test_split(df, test_size=0.33, random_state=42)

In [None]:
train_x = train['text']

In [None]:
train_y = train[label]

In [None]:
test_x = test['text']

In [None]:
test_y = test[label]

# **get the model: albert-base-v2**


1.   use text.transformer to get the transformer instance
2.   preprocess train and test dataset, which will automaticly encode the text, and recognize if it is a multi-label task. the trn and val are made to fit the model you given.
3.   get the classifier(the real model)
4.   wrap it up into a ktrain learner
5.   use the ktrain learner to train the model



In [None]:
MODEL_NAME = 'albert-base-v2'

In [None]:
t = text.Transformer(MODEL_NAME, maxlen=50, class_names=label)

In [None]:
trn = t.preprocess_train(train_x.values, train_y.values)

preprocessing train...
language: en
train sequence lengths:
	mean : 20
	95percentile : 42
	99percentile : 53


Is Multi-Label? True


In [None]:
val = t.preprocess_test(test_x.values, test_y.values)

preprocessing test...
language: en
test sequence lengths:
	mean : 20
	95percentile : 42
	99percentile : 54


In [None]:
model = t.get_classifier()

In [None]:
tbCallBack = keras.callbacks.TensorBoard(log_dir='./logs/', write_graph=True, write_images=True)

In [None]:
learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=16)

In [None]:
learner.fit_onecycle(3e-5, 5, callbacks=[tbCallBack])



begin training using onecycle policy with max lr of 3e-05...
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f5e553a2d30>

In [None]:
model.summary()

Model: "tf_albert_for_sequence_classification_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
albert (TFAlbertMainLayer)   multiple                  11683584  
_________________________________________________________________
dropout_9 (Dropout)          multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  3076      
Total params: 11,686,660
Trainable params: 11,686,660
Non-trainable params: 0
_________________________________________________________________


# **Save Model as a predictor**

In [None]:
predictor = ktrain.get_predictor(learner.model, preproc=t)

In [None]:
predictor.predict('love surface love lavender')

[('anger', 0.0013034324),
 ('fear', 0.00096694403),
 ('joy', 0.9978543),
 ('sadness', 0.00090368226)]

In [None]:
predictor.save('/content/drive/My Drive/albert_model_on_first100000_goemotions')

# **load model and predict**
if you want to train it again, use predictor.model to get the model, and wrap it into a ktrain learner, continue the trian process

In [None]:
p = ktrain.load_predictor('/content/drive/My Drive/albert_model_on_first100000_goemotions')

In [None]:
p.predict('how are you BERT, why are you so training so slow')

[('anger', 0.25451094),
 ('fear', 0.4445131),
 ('joy', 0.013134955),
 ('sadness', 0.08884486)]

In [None]:
p.predict('bert is so slow, it annoys me a lot')

[('anger', 0.9948118),
 ('fear', 0.00075041334),
 ('joy', 0.001371897),
 ('sadness', 0.0020669135)]

In [None]:
p.predict('you are always late, never do that again')

[('anger', 0.30835918),
 ('fear', 0.32899058),
 ('joy', 0.012022821),
 ('sadness', 0.3909705)]

In [None]:
p.predict('I will be to school tomorrow')

[('anger', 0.5374949),
 ('fear', 0.044114694),
 ('joy', 0.09671777),
 ('sadness', 0.454767)]