In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import random
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.linear_model import Perceptron
from sklearn.calibration import CalibratedClassifierCV

In [2]:
DATA_DIR = "../data/"

In [3]:
#load in the processed and cleaned sentences and tags
sentences = np.load(os.path.join(DATA_DIR,"processed_sents_dl.npy"),allow_pickle=True)
tags = np.load(os.path.join(DATA_DIR,"processed_tags_dl.npy"),allow_pickle=True)


Since we face a class imbalance problem for this task, one of the ways to mitigate it to use ensemble methods. We train a bunch of classifiers and then take the average of all the predictions from all classifiers. Our final prediction is the class with the highest probability from the average.

In [4]:
EMBEDDING_DIM=50

In [5]:
# Loading glove embeddings
embeddings_index = {}
f = open("../embeddings/glove.6B.50d.txt", encoding="utf-8")
for line in f:
    values = line.strip().split(' ')
    word = values[0] # the first entry is the word
    coefs = np.asarray(values[1:], dtype='float32') #50d vectors   
    #representing the word
    embeddings_index[word] = coefs
f.close()

In [6]:
#create the data set with X representing the corresponding word embedding for every word in the cleaned data
#y are the BIO tags
X=[]
y=[]
for i,sent in enumerate(sentences):
    tag=tags[i]
    for j,word in enumerate(sent):
        currentTag = tag[j]
        y.append(currentTag)
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            X.append(embedding_vector)
        else:
            X.append([0]*EMBEDDING_DIM)

In [7]:
X = np.array(X)
y= np.array(y)

In [8]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=98)

In [9]:
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(34952, 50)
(11651, 50)
(34952,)
(11651,)


I used knn, random forest, perceptron and the passive aggressive classifier as my individual models in the ensemble methods. Since the perceptron and passive aggressive classifier do not output class probabilities in sklearn I had to wrap them around a CalibratedClassifierCV to do so.

In [10]:
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
y_pred_knn = neigh.predict_proba(X_val)

In [11]:
randomForest = RandomForestClassifier(n_estimators=50)
randomForest.fit(X_train, y_train)
y_pred_rf = randomForest.predict_proba(X_val)

In [12]:
per = Perceptron(verbose=10, max_iter=50)
calibrated_clf_per = CalibratedClassifierCV(base_estimator=per)
calibrated_clf_per.fit(X_train, y_train)
y_pred_per = calibrated_clf_per.predict_proba(X_val)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s


-- Epoch 1
Norm: 16.60, NNZs: 50, Bias: -24.000000, T: 27961, Avg. loss: 0.073596
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 20.14, NNZs: 50, Bias: -22.000000, T: 55922, Avg. loss: 0.080814
Total training time: 0.02 seconds.
-- Epoch 3
Norm: 19.04, NNZs: 50, Bias: -20.000000, T: 83883, Avg. loss: 0.079962
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 18.60, NNZs: 50, Bias: -21.000000, T: 111844, Avg. loss: 0.079043
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 21.67, NNZs: 50, Bias: -23.000000, T: 139805, Avg. loss: 0.079652
Total training time: 0.04 seconds.
-- Epoch 6
Norm: 20.96, NNZs: 50, Bias: -28.000000, T: 167766, Avg. loss: 0.072823
Total training time: 0.04 seconds.
Convergence after 6 epochs took 0.04 seconds
-- Epoch 1
Norm: 12.53, NNZs: 50, Bias: -17.000000, T: 27961, Avg. loss: 0.065313
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 12.41, NNZs: 50, Bias: -20.000000, T: 55922, Avg. loss: 0.065487
Total training time: 0.02 seconds.
-- Epoch 3
Norm:

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.3s remaining:    0.0s


Norm: 12.10, NNZs: 50, Bias: -21.000000, T: 55922, Avg. loss: 0.088342
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 15.55, NNZs: 50, Bias: -22.000000, T: 83883, Avg. loss: 0.082144
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 15.20, NNZs: 50, Bias: -23.000000, T: 111844, Avg. loss: 0.089349
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 18.64, NNZs: 50, Bias: -22.000000, T: 139805, Avg. loss: 0.082687
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 18.99, NNZs: 50, Bias: -21.000000, T: 167766, Avg. loss: 0.087528
Total training time: 0.04 seconds.
-- Epoch 7
Norm: 18.91, NNZs: 50, Bias: -23.000000, T: 195727, Avg. loss: 0.085445
Total training time: 0.05 seconds.
-- Epoch 8
Norm: 18.87, NNZs: 50, Bias: -22.000000, T: 223688, Avg. loss: 0.088071
Total training time: 0.05 seconds.
Convergence after 8 epochs took 0.05 seconds
-- Epoch 1
Norm: 18.38, NNZs: 50, Bias: -29.000000, T: 27961, Avg. loss: 0.103785
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 19.64, N

[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s


-- Epoch 1
Norm: 18.62, NNZs: 50, Bias: -25.000000, T: 27961, Avg. loss: 0.076488
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 20.23, NNZs: 50, Bias: -23.000000, T: 55922, Avg. loss: 0.076380
Total training time: 0.02 seconds.
-- Epoch 3
Norm: 19.42, NNZs: 50, Bias: -24.000000, T: 83883, Avg. loss: 0.077738
Total training time: 0.03 seconds.
-- Epoch 4
Norm: 20.98, NNZs: 50, Bias: -22.000000, T: 111844, Avg. loss: 0.080466
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 20.85, NNZs: 50, Bias: -21.000000, T: 139805, Avg. loss: 0.078993
Total training time: 0.04 seconds.
-- Epoch 6
Norm: 23.13, NNZs: 50, Bias: -25.000000, T: 167766, Avg. loss: 0.075402
Total training time: 0.04 seconds.
Convergence after 6 epochs took 0.05 seconds
-- Epoch 1
Norm: 13.15, NNZs: 50, Bias: -13.000000, T: 27961, Avg. loss: 0.065231
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 15.51, NNZs: 50, Bias: -19.000000, T: 55922, Avg. loss: 0.062996
Total training time: 0.02 seconds.
-- Epoch 3
Norm:

[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.3s remaining:    0.0s


Norm: 15.64, NNZs: 50, Bias: -26.000000, T: 139805, Avg. loss: 0.094701
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 16.51, NNZs: 50, Bias: -23.000000, T: 167766, Avg. loss: 0.100928
Total training time: 0.03 seconds.
Convergence after 6 epochs took 0.03 seconds
-- Epoch 1
Norm: 13.83, NNZs: 50, Bias: -22.000000, T: 27961, Avg. loss: 0.082798
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 13.96, NNZs: 50, Bias: -23.000000, T: 55922, Avg. loss: 0.083402
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 13.69, NNZs: 50, Bias: -24.000000, T: 83883, Avg. loss: 0.086077
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 12.99, NNZs: 50, Bias: -23.000000, T: 111844, Avg. loss: 0.083772
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 15.44, NNZs: 50, Bias: -22.000000, T: 139805, Avg. loss: 0.082921
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 15.58, NNZs: 50, Bias: -20.000000, T: 167766, Avg. loss: 0.083868
Total training time: 0.03 seconds.
Convergence after 6 epoch

[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


-- Epoch 1
Norm: 19.73, NNZs: 50, Bias: -20.000000, T: 27962, Avg. loss: 0.082932
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 21.36, NNZs: 50, Bias: -23.000000, T: 55924, Avg. loss: 0.081939
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 20.50, NNZs: 50, Bias: -22.000000, T: 83886, Avg. loss: 0.080901
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 21.81, NNZs: 50, Bias: -24.000000, T: 111848, Avg. loss: 0.080510
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 24.74, NNZs: 50, Bias: -23.000000, T: 139810, Avg. loss: 0.083768
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 23.34, NNZs: 50, Bias: -23.000000, T: 167772, Avg. loss: 0.080411
Total training time: 0.04 seconds.
-- Epoch 7
Norm: 20.74, NNZs: 50, Bias: -25.000000, T: 195734, Avg. loss: 0.080871
Total training time: 0.05 seconds.
-- Epoch 8
Norm: 22.22, NNZs: 50, Bias: -19.000000, T: 223696, Avg. loss: 0.081983
Total training time: 0.05 seconds.
Convergence after 8 epochs took 0.05 seconds
-- Epoch 1
Nor

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.1s remaining:    0.0s



Total training time: 0.05 seconds.
Convergence after 8 epochs took 0.05 seconds
-- Epoch 1
Norm: 21.45, NNZs: 50, Bias: -20.000000, T: 27962, Avg. loss: 0.186506
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 23.89, NNZs: 50, Bias: -23.000000, T: 55924, Avg. loss: 0.199105
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 26.28, NNZs: 50, Bias: -19.000000, T: 83886, Avg. loss: 0.190756
Total training time: 0.01 seconds.
-- Epoch 4
Norm: 24.93, NNZs: 50, Bias: -23.000000, T: 111848, Avg. loss: 0.191049
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 32.03, NNZs: 50, Bias: -25.000000, T: 139810, Avg. loss: 0.188767
Total training time: 0.02 seconds.
-- Epoch 6
Norm: 35.18, NNZs: 50, Bias: -20.000000, T: 167772, Avg. loss: 0.195769
Total training time: 0.03 seconds.
Convergence after 6 epochs took 0.03 seconds
-- Epoch 1
Norm: 13.79, NNZs: 50, Bias: -24.000000, T: 27962, Avg. loss: 0.057178
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 17.75, NNZs: 50, Bias: -24.000000, 

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.3s remaining:    0.0s


Norm: 17.59, NNZs: 50, Bias: -25.000000, T: 139810, Avg. loss: 0.107970
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 19.99, NNZs: 50, Bias: -26.000000, T: 167772, Avg. loss: 0.106328
Total training time: 0.03 seconds.
Convergence after 6 epochs took 0.03 seconds
-- Epoch 1
Norm: 18.83, NNZs: 50, Bias: -22.000000, T: 27962, Avg. loss: 0.137189
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 21.26, NNZs: 50, Bias: -19.000000, T: 55924, Avg. loss: 0.133075
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 20.50, NNZs: 50, Bias: -22.000000, T: 83886, Avg. loss: 0.133954
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 21.54, NNZs: 50, Bias: -22.000000, T: 111848, Avg. loss: 0.133343
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 22.06, NNZs: 50, Bias: -17.000000, T: 139810, Avg. loss: 0.136895
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 25.93, NNZs: 50, Bias: -16.000000, T: 167772, Avg. loss: 0.134856
Total training time: 0.03 seconds.
-- Epoch 7
Norm: 22.62, N

[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s


-- Epoch 1
Norm: 20.38, NNZs: 50, Bias: -24.000000, T: 27962, Avg. loss: 0.077575
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 17.02, NNZs: 50, Bias: -25.000000, T: 55924, Avg. loss: 0.080674
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 22.06, NNZs: 50, Bias: -24.000000, T: 83886, Avg. loss: 0.077369
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 22.44, NNZs: 50, Bias: -26.000000, T: 111848, Avg. loss: 0.084176
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 22.32, NNZs: 50, Bias: -24.000000, T: 139810, Avg. loss: 0.078185
Total training time: 0.04 seconds.
-- Epoch 6
Norm: 21.32, NNZs: 50, Bias: -22.000000, T: 167772, Avg. loss: 0.079156
Total training time: 0.04 seconds.
Convergence after 6 epochs took 0.04 seconds
-- Epoch 1
Norm: 12.11, NNZs: 50, Bias: -17.000000, T: 27962, Avg. loss: 0.066714
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 13.44, NNZs: 50, Bias: -18.000000, T: 55924, Avg. loss: 0.066477
Total training time: 0.01 seconds.
-- Epoch 3
Norm:

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s



Total training time: 0.05 seconds.
-- Epoch 8
Norm: 21.03, NNZs: 50, Bias: -23.000000, T: 223696, Avg. loss: 0.125174
Total training time: 0.06 seconds.
-- Epoch 9
Norm: 25.11, NNZs: 50, Bias: -13.000000, T: 251658, Avg. loss: 0.123487
Total training time: 0.07 seconds.
-- Epoch 10
Norm: 23.31, NNZs: 50, Bias: -15.000000, T: 279620, Avg. loss: 0.118493
Total training time: 0.07 seconds.
Convergence after 10 epochs took 0.07 seconds
-- Epoch 1
Norm: 18.47, NNZs: 50, Bias: -17.000000, T: 27962, Avg. loss: 0.220132
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 24.26, NNZs: 50, Bias: -21.000000, T: 55924, Avg. loss: 0.212491
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 22.39, NNZs: 50, Bias: -22.000000, T: 83886, Avg. loss: 0.233046
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 22.62, NNZs: 50, Bias: -15.000000, T: 111848, Avg. loss: 0.231130
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 27.45, NNZs: 50, Bias: -22.000000, T: 139810, Avg. loss: 0.225840
Total train

[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.4s remaining:    0.0s


Norm: 14.21, NNZs: 50, Bias: -23.000000, T: 111848, Avg. loss: 0.023841
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 14.47, NNZs: 50, Bias: -24.000000, T: 139810, Avg. loss: 0.022878
Total training time: 0.02 seconds.
-- Epoch 6
Norm: 15.31, NNZs: 50, Bias: -27.000000, T: 167772, Avg. loss: 0.022743
Total training time: 0.03 seconds.
Convergence after 6 epochs took 0.03 seconds
-- Epoch 1
Norm: 13.96, NNZs: 50, Bias: -18.000000, T: 27962, Avg. loss: 0.096673
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 14.14, NNZs: 50, Bias: -19.000000, T: 55924, Avg. loss: 0.100361
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 13.05, NNZs: 50, Bias: -22.000000, T: 83886, Avg. loss: 0.099210
Total training time: 0.01 seconds.
-- Epoch 4
Norm: 11.70, NNZs: 50, Bias: -21.000000, T: 111848, Avg. loss: 0.099881
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 12.84, NNZs: 50, Bias: -21.000000, T: 139810, Avg. loss: 0.098259
Total training time: 0.02 seconds.
-- Epoch 6
Norm: 12.29, N

[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.5s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.



Norm: 19.48, NNZs: 50, Bias: -21.000000, T: 27962, Avg. loss: 0.079966
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 19.18, NNZs: 50, Bias: -24.000000, T: 55924, Avg. loss: 0.078500
Total training time: 0.02 seconds.
-- Epoch 3
Norm: 22.68, NNZs: 50, Bias: -24.000000, T: 83886, Avg. loss: 0.073171
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 23.17, NNZs: 50, Bias: -28.000000, T: 111848, Avg. loss: 0.079827
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 22.81, NNZs: 50, Bias: -24.000000, T: 139810, Avg. loss: 0.081385
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 22.41, NNZs: 50, Bias: -25.000000, T: 167772, Avg. loss: 0.082399
Total training time: 0.04 seconds.
-- Epoch 7
Norm: 23.68, NNZs: 50, Bias: -29.000000, T: 195734, Avg. loss: 0.078714
Total training time: 0.05 seconds.
-- Epoch 8
Norm: 25.04, NNZs: 50, Bias: -24.000000, T: 223696, Avg. loss: 0.084918
Total training time: 0.05 seconds.
Convergence after 8 epochs took 0.05 seconds
-- Epoch 1
Norm: 13.65, 

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:    0.1s remaining:    0.0s


-- Epoch 4
Norm: 27.04, NNZs: 50, Bias: -19.000000, T: 111848, Avg. loss: 0.230682
Total training time: 0.02 seconds.
-- Epoch 5
Norm: 27.35, NNZs: 50, Bias: -18.000000, T: 139810, Avg. loss: 0.227636
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 29.85, NNZs: 50, Bias: -15.000000, T: 167772, Avg. loss: 0.219568
Total training time: 0.04 seconds.
-- Epoch 7
Norm: 34.55, NNZs: 50, Bias: -17.000000, T: 195734, Avg. loss: 0.235954
Total training time: 0.05 seconds.
-- Epoch 8
Norm: 32.31, NNZs: 50, Bias: -16.000000, T: 223696, Avg. loss: 0.225844
Total training time: 0.06 seconds.
Convergence after 8 epochs took 0.06 seconds
-- Epoch 1
Norm: 19.43, NNZs: 50, Bias: -22.000000, T: 27962, Avg. loss: 0.193288
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 25.34, NNZs: 50, Bias: -20.000000, T: 55924, Avg. loss: 0.190061
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 26.24, NNZs: 50, Bias: -19.000000, T: 83886, Avg. loss: 0.196699
Total training time: 0.02 seconds.
-- Epoch 4
Nor

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed:    0.3s remaining:    0.0s


Norm: 16.46, NNZs: 50, Bias: -27.000000, T: 223696, Avg. loss: 0.023851
Total training time: 0.05 seconds.
Convergence after 8 epochs took 0.05 seconds
-- Epoch 1
Norm: 12.02, NNZs: 50, Bias: -18.000000, T: 27962, Avg. loss: 0.096023
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 12.22, NNZs: 50, Bias: -21.000000, T: 55924, Avg. loss: 0.096710
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 17.52, NNZs: 50, Bias: -19.000000, T: 83886, Avg. loss: 0.096128
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 13.18, NNZs: 50, Bias: -20.000000, T: 111848, Avg. loss: 0.098432
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 15.86, NNZs: 50, Bias: -24.000000, T: 139810, Avg. loss: 0.095951
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 14.20, NNZs: 50, Bias: -20.000000, T: 167772, Avg. loss: 0.100165
Total training time: 0.04 seconds.
Convergence after 6 epochs took 0.04 seconds
-- Epoch 1
Norm: 16.12, NNZs: 50, Bias: -20.000000, T: 27962, Avg. loss: 0.079561
Total training t

[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed:    0.4s remaining:    0.0s
[Parallel(n_jobs=1)]: Done  13 out of  13 | elapsed:    0.6s finished


Norm: 13.83, NNZs: 50, Bias: -18.000000, T: 55924, Avg. loss: 0.084448
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 13.73, NNZs: 50, Bias: -19.000000, T: 83886, Avg. loss: 0.084744
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 16.11, NNZs: 50, Bias: -25.000000, T: 111848, Avg. loss: 0.082488
Total training time: 0.03 seconds.
-- Epoch 5
Norm: 17.39, NNZs: 50, Bias: -22.000000, T: 139810, Avg. loss: 0.084999
Total training time: 0.03 seconds.
-- Epoch 6
Norm: 16.93, NNZs: 50, Bias: -22.000000, T: 167772, Avg. loss: 0.085221
Total training time: 0.04 seconds.
Convergence after 6 epochs took 0.04 seconds
-- Epoch 1
Norm: 29.65, NNZs: 50, Bias: 4.000000, T: 27962, Avg. loss: 0.961092
Total training time: 0.01 seconds.
-- Epoch 2
Norm: 34.74, NNZs: 50, Bias: 3.000000, T: 55924, Avg. loss: 0.962623
Total training time: 0.01 seconds.
-- Epoch 3
Norm: 37.98, NNZs: 50, Bias: 2.000000, T: 83886, Avg. loss: 0.954440
Total training time: 0.02 seconds.
-- Epoch 4
Norm: 45.51, NNZs: 50,

In [13]:
pa =PassiveAggressiveClassifier()
calibrated_clf_pa = CalibratedClassifierCV(base_estimator=pa)
calibrated_clf_pa.fit(X_train, y_train)
y_pred_pa = calibrated_clf_pa.predict_proba(X_val)

In [14]:
print(y_pred_knn.shape)
print(y_pred_rf.shape)
print(y_pred_per.shape)
print(y_pred_pa.shape)

(11651, 13)
(11651, 13)
(11651, 13)
(11651, 13)


In [15]:
assert all(neigh.classes_) == all(calibrated_clf_pa.classes_) == all(calibrated_clf_per.classes_) == all(randomForest.classes_)

In [16]:
#as before we are going to evaluate the model on the F1 score using all classes except 'O'
all_classes = list(np.unique(y_train))

new_classes = all_classes.copy()

new_classes.remove('O')

In [17]:
#given the predictions from the 4 models above, calculate the average of all and return the class with the highest probability
def ensemble_predict(pred_knn,pred_rf,pred_per,pred_pa,groundTruth):

    mean_pred = np.mean([pred_knn,pred_rf,pred_per,pred_pa], axis=0)

    return randomForest.classes_[np.argmax(mean_pred)]

In [18]:
#loop over all the predictions from the 4 models and get the final prediction
predictions=[]

for i in range(X_val.shape[0]):
    groundTruth = y_val[i]
    pred_knn,pred_rf,pred_per,pred_pa = y_pred_knn[i],y_pred_rf[i],y_pred_per[i],y_pred_pa[i]
    predictions.append(ensemble_predict(pred_knn,pred_rf,pred_per,pred_pa,groundTruth))
    
predictions=np.array(predictions)

report = classification_report(y_val, predictions, digits=4,labels=new_classes)

  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
#overall ensemble learning didn't do very well having a F1 score of just 0.22
print(report)

                 precision    recall  f1-score   support

  B-corporation     0.9412    0.3265    0.4848        49
B-creative-work     0.0000    0.0000    0.0000        38
        B-group     0.3333    0.0175    0.0333        57
     B-location     0.8125    0.1024    0.1818       127
       B-person     0.8026    0.3742    0.5105       163
      B-product     1.0000    0.1935    0.3243        31
  I-corporation     0.0000    0.0000    0.0000        12
I-creative-work     0.0000    0.0000    0.0000        54
        I-group     0.0000    0.0000    0.0000        27
     I-location     0.4000    0.0308    0.0571        65
       I-person     0.8462    0.1375    0.2366        80
      I-product     0.0000    0.0000    0.0000        42

      micro avg     0.8088    0.1477    0.2497       745
      macro avg     0.4280    0.0985    0.1524       745
   weighted avg     0.5689    0.1477    0.2210       745

