# Training the Explainable Classifiers

## Importing Explainable Classifiers

... and numpy, pandas and sklearn

In [1]:
import sys
sys.path.append("../classifiers/")
from ExplainableClassifier import ExplainableClassifier
from ExplainableLSTM import ExplainableLSTM
from ExplainableSVM import ExplainableSVM
from ExplainableNaiveBayes import ExplainableNaiveBayes
from ExplainableAttentionLSTM import ExplainableAttentionLSTM

import sklearn
import numpy as np
import pandas as pd

Using TensorFlow backend.


## Loading the GloVe Wordvectors

In [2]:
glove = ExplainableLSTM.load_glove_wordvectors('../wordvectors/glove.6B.50d.txt')

Load word embeddings
Word embeddings loaded.


# 20newsgroups

### Loading the Training Dataset

In [3]:
data_ng = sklearn.datasets.fetch_20newsgroups(subset='train')
X_ng = np.array([text.partition('\n\n')[2] for text in data_ng.data])
y_ng = np.array(data_ng.target)
data_ng_test = sklearn.datasets.fetch_20newsgroups(subset='test')
X_ng_test = np.array([text.partition('\n\n')[2] for text in data_ng_test.data])
y_ng_test = np.array(data_ng_test.target)
label_mapping_ng = [data_ng.target_names]
del data_ng, data_ng_test


## LSTM (20ng)

In [4]:
lstm_ng = ExplainableLSTM(glove, lstm_units=50, max_input_size=300)

In [5]:
lstm_ng.label_mapping = label_mapping_ng

In [22]:
lstm_ng.train(X_ng, y_ng, epochs=15, learning_rate=0.005)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [28]:
lstm_ng.evaluate(X_ng_test, y_ng_test)

[0.6671534784917684]

In [23]:
# lstm_ng.export_model('../trained_models/lstm_ng')

In [4]:
lstm_ng2 = ExplainableLSTM.import_model('../trained_models/lstm_ng', glove)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


## Atention LSTM (20ng)

In [7]:
att_lstm_ng = ExplainableAttentionLSTM(glove, lstm_units=50, max_input_size=300)

In [8]:
att_lstm_ng.label_mapping = label_mapping_ng

In [9]:
att_lstm_ng.train(X_ng, y_ng, epochs=10, learning_rate=0.005)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [11]:
att_lstm_ng.evaluate(X_ng_test, y_ng_test, metric='f1')

[(0.5182479519381343, 0.5077471649873475, 0.5052878122188977)]

In [29]:
# att_lstm_ng.export_model('../trained_models/att_lstm_ng')

In [10]:
att_lstm_ng2 = ExplainableAttentionLSTM.import_model('../trained_models/att_lstm_ng', glove)

Instructions for updating:
Deprecated in favor of operator or tf.math.divide.


## SVM  (20ng)

In [17]:
svm_ng = ExplainableSVM()
svm_ng.label_mapping = label_mapping_ng

In [18]:
svm_ng.train(X_ng, y_ng)

In [21]:
svm_ng.evaluate(X_ng_test, y_ng_test)

[0.8035050451407328]

In [14]:
# svm_ng.export_model('../trained_models/svm_ng')

In [2]:
svm_ng2 = ExplainableSVM.import_model('../trained_models/svm_ng')

## Naive Bayes (20ng)

In [22]:
naive_ng = ExplainableNaiveBayes()

In [23]:
naive_ng.train(X_ng, y_ng)

In [24]:
naive_ng.evaluate(X_ng_test[:5000], y_ng_test[:5000])

[0.725]

In [9]:
# naive_ng.export_model('../trained_models/naive_ng')

In [13]:
naive_ng2 = ExplainableNaiveBayes.import_model('../trained_models/naive_ng')

# Toxic Comments

In [7]:
glove = ExplainableLSTM.load_glove_wordvectors('../wordvectors/tc_custom_trained_vectors.txt')

Load word embeddings
Word embeddings loaded.


In [8]:
data_tc = pd.read_csv('../datasets/toxic_comments/train_custom.csv')
X_tc = data_tc.comment_text.to_numpy()
y_tc = data_tc[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].to_numpy()

data_tc_test = pd.read_csv('../datasets/toxic_comments/test_custom.csv')
X_tc_test = data_tc_test.comment_text.to_numpy()
y_tc_test = data_tc_test[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].to_numpy()

del data_tc, data_tc_test

## LSTM (Toxic Comments)

In [6]:
lstm_tc = ExplainableLSTM(glove, lstm_units=50, max_input_size=250)

In [6]:
lstm_tc.train(X_tc, y_tc, learning_rate=0.005, epochs=6)

Instructions for updating:
Colocations handled automatically by placer.
Train on 181074 samples, validate on 20120 samples
Instructions for updating:
Use tf.cast instead.
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [7]:
lstm_tc.evaluate(X_tc_test, y_tc_test, metric='f1')

[(0.8104395604395604, 0.6866852886405959, 0.7434475806451613),
 (0.5652173913043478, 0.14285714285714285, 0.22807017543859645),
 (0.8300198807157058, 0.6827473426001636, 0.7492148945715568),
 (0.5833333333333334, 0.29577464788732394, 0.39252336448598135),
 (0.7094017094017094, 0.664, 0.6859504132231405),
 (0.6744186046511628, 0.4416243654822335, 0.5337423312883436)]

In [21]:
# lstm_tc.export_model('../trained_models/lstm_tc')

In [9]:
lstm_tc2 = ExplainableLSTM.import_model('../trained_models/lstm_tc', glove)

## Attention LSTM (Toxic Comments)

In [22]:
att_lstm_tc = ExplainableAttentionLSTM(glove, lstm_units=50, max_input_size=250)

In [23]:
att_lstm_tc.train(X_tc, y_tc)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [24]:
att_lstm_tc.evaluate(X_tc_test, y_tc_test, metric='f1')

[(0.845372460496614, 0.6973929236499069, 0.7642857142857142),
 (0.5833333333333334, 0.07692307692307693, 0.13592233009708737),
 (0.8614900314795383, 0.6713000817661489, 0.7545955882352942),
 (0.8928571428571429, 0.352112676056338, 0.5050505050505051),
 (0.776361529548088, 0.5955555555555555, 0.6740442655935613),
 (0.6577181208053692, 0.49746192893401014, 0.5664739884393064)]

In [25]:
# att_lstm_tc.export_model('../trained_models/att_lstm_tc')

In [18]:
att_lstm_tc2 = ExplainableAttentionLSTM.import_model('../trained_models/att_lstm_tc', glove)

## SVM (Toxic Comments)

In [8]:
svm_tc = ExplainableSVM()

In [9]:
svm_tc.train(X_tc[:150000], y_tc[:150000])

In [10]:
svm_tc.evaluate(X_tc_test[:50000], y_tc_test[:50000], metric='f1')

[(0.8296605122096486, 0.6485102420856611, 0.7279853671282989),
 (0.5227272727272727, 0.25274725274725274, 0.34074074074074073),
 (0.8450413223140496, 0.6688470973017171, 0.7466910086718394),
 (0.46875, 0.2112676056338028, 0.2912621359223301),
 (0.7452153110047847, 0.5537777777777778, 0.6353901070882203),
 (0.6161616161616161, 0.3096446700507614, 0.41216216216216217)]

In [19]:
# svm_tc.export_model('../traine_models/svm_tc')

In [20]:
svm_tc2 = ExplainableSVM.import_model('../trained_models/svm_tc')

## Naive Bayes (Toxic Comments)

In [11]:
naive_tc = ExplainableNaiveBayes()

In [12]:
naive_tc.train(X_tc, y_tc)

In [15]:
naive_tc.evaluate(X_tc_test, y_tc_test, metric='f1')

[(0.6903131115459883, 0.6568901303538175, 0.6731870229007633),
 (0.2553191489361702, 0.5274725274725275, 0.34408602150537626),
 (0.612952968388589, 0.6500408830744072, 0.6309523809523809),
 (0.015384615384615385, 0.014084507042253521, 0.014705882352941176),
 (0.5563492063492064, 0.6231111111111111, 0.5878406708595388),
 (0.24897959183673468, 0.3096446700507614, 0.2760180995475113)]

In [10]:
# naive_tc.export_model('../trained_models/naive_tc')

In [21]:
naive_tc2 = ExplainableNaiveBayes.import_model('../trained_models/naive_tc')

----


----



----

----

----

----