In [0]:
# Uncomment and Execute first 2 cells steps for running in Colab
# from google.colab import drive
# drive.mount('/content/drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive/


**Please note that you may have to change the path below based on the location of the folder**

In [0]:
# %cd '/content/drive/My Drive/fnc-main'

/content/drive/My Drive/Colab Notebooks/fnc-main


In [1]:
#import libraries
from __future__ import print_function
import os
import sys
import numpy as np
import json
import pandas as pd
import time

from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from feature_engineering import refuting_features, polarity_features, hand_features, gen_or_load_feats
from feature_engineering import word_overlap_features, NMF_cos_50, LDA_cos_25
from utils.dataset import DataSet
from utils.generate_test_splits import kfold_split, get_stances_for_folds
from utils.score import report_score, LABELS, score_submission
from utils.system import parse_params, check_version

#Model 2 dependencies
from sklearn.model_selection import train_test_split
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten,BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score,confusion_matrix,f1_score
import matplotlib.pyplot as plt
import random
from random import choice

Using TensorFlow backend.


In [2]:
#setting seed to replicate results
seed=786
np.random.seed(seed)
from tensorflow import set_random_seed
set_random_seed(seed)

In [0]:
#save features in dataframe
train_feature_data =  pd.DataFrame(columns=['headline','body_id','stance'])
comp_feature_data =  pd.DataFrame(columns=['headline','body_id','stance'])

In [0]:
root_path="./hierarchicalModel-data/"

In [0]:
#genereate features to train
def generate_features(stances,dataset,name):
    h, b, y = [],[],[]
    rows = []
    for stance in stances:
        row = []
        y.append(LABELS.index(stance['Stance']))
        h.append(stance['Headline'])
        b.append(dataset.articles[stance['Body ID']])
        row.append(stance['Headline'])
        row.append(dataset.articles[stance['Body ID']])
        row.append(LABELS.index(stance['Stance']))
        rows.append(row)

    X_overlap = gen_or_load_feats(word_overlap_features, h, b, "features/overlap."+name+".npy")
    X_refuting = gen_or_load_feats(refuting_features, h, b, "features/refuting."+name+".npy")
    X_polarity = gen_or_load_feats(polarity_features, h, b, "features/polarity."+name+".npy")
    X_hand = gen_or_load_feats(hand_features, h, b, "features/hand."+name+".npy")

    ######Topic Modelling - New Features Added######
    X_NMF = gen_or_load_feats(NMF_cos_50, h, b, "features/nmf."+name+".npy")
    X_LDA = gen_or_load_feats(LDA_cos_25, h, b, "features/lda-25."+name+".npy")

    X = np.c_[X_hand, X_polarity, X_refuting, X_overlap, X_NMF, X_LDA]
    if(name == "competition"):
        if not (os.path.isfile(root_path+'comp_feature_data.csv')):
            comp_feature_data['stance'] = y
            comp_feature_data['headline'] = h
            comp_feature_data['body_id'] = b
            for i in range(0,X.shape[1]):
                comp_feature_data[i] = X[:,i]

    if(name == "full"):
        if not (os.path.isfile(root_path+'train_feature_data.csv')):
            train_feature_data['stance'] = y
            train_feature_data['headline'] = h
            train_feature_data['body_id'] = b
            for i in range(0,X.shape[1]):
                train_feature_data[i] = X[:,i]
    return X,y

In [0]:
#Load the training dataset and generate folds
d = DataSet()

X_full,y_full = generate_features(d.stances,d,"full")
#for binary classification - related and unrelated
y_full = [x if x==3 else 2 for x in y_full]

#removing folds return train and holdout split - check if distribution same - does it matter
folds,hold_out = kfold_split(d,n_folds=10)
fold_stances, hold_out_stances = get_stances_for_folds(d,folds,hold_out)

X_holdout,y_holdout = generate_features(hold_out_stances,d,"holdout")
y_holdout = [x if x==3 else 2 for x in y_holdout]

#load training data
X_train, y_train = generate_features(fold_stances, d, "train_n")
y_train = [x if x==3 else 2 for x in y_train]

# Load the competition dataset
competition_dataset = DataSet("competition_test")
X_competition, y_competition = generate_features(competition_dataset.stances, competition_dataset, "competition")
y_competition = [x if x==3 else 2 for x in y_competition]

Reading dataset
Total stances: 49972
Total bodies: 1683
Reading dataset
Total stances: 25413
Total bodies: 904


Classifier 1 training (XGBoost) starts here :

In [0]:
#Train classifier on 2 classes
param = {'eta':1, 'objectve' : "binary:logistic" , 'n_estimators':150, 'seed':10}

clf = XGBClassifier(**param)
start = int(round(time.time()*1000))
end = int(round(time.time()*1000))
train_time = end - start
clf.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_holdout, y_holdout)], verbose=True)


[0]	validation_0-error:0.019033	validation_1-error:0.04417
[1]	validation_0-error:0.018538	validation_1-error:0.042819
[2]	validation_0-error:0.017918	validation_1-error:0.042507
[3]	validation_0-error:0.017869	validation_1-error:0.042195
[4]	validation_0-error:0.017893	validation_1-error:0.041052
[5]	validation_0-error:0.017596	validation_1-error:0.040636
[6]	validation_0-error:0.01715	validation_1-error:0.040948
[7]	validation_0-error:0.0172	validation_1-error:0.040532
[8]	validation_0-error:0.017571	validation_1-error:0.041052
[9]	validation_0-error:0.017323	validation_1-error:0.041364
[10]	validation_0-error:0.017299	validation_1-error:0.041052
[11]	validation_0-error:0.01715	validation_1-error:0.039493
[12]	validation_0-error:0.017249	validation_1-error:0.041052
[13]	validation_0-error:0.017125	validation_1-error:0.039701
[14]	validation_0-error:0.01715	validation_1-error:0.038765
[15]	validation_0-error:0.017076	validation_1-error:0.039805
[16]	validation_0-error:0.017026	validat

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, eta=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=150, n_jobs=1,
              nthread=None, objective='binary:logistic',
              objectve='binary:logistic', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=1, seed=10, silent=None,
              subsample=1, verbosity=1)

In [0]:
#predict on train and holdout
y_pred_train = clf.predict(X_train)
y_pred = clf.predict(X_holdout)
y_pred_onfull = clf.predict(X_full)

In [0]:
#save features for training data
if not (os.path.isfile(root_path+'train_feature_data.csv')):
  train_feature_data['predicted_stance'] = y_pred_onfull
  train_feature_data.to_csv(root_path+'train_feature_data.csv', index = False)
  #check file
  feature_df = pd.read_csv(root_path+'train_feature_data.csv')
  print("train data file size : ", feature_df.shape)
  print("train data file: ", feature_df.head())

In [0]:
#get scores for binary classification: all Related mapped to 'discuss' class
predicted = [LABELS[int(a)] for a in y_pred_train]
actual = [LABELS[int(a)] for a in y_train]
print("Scores on the train set")
report_score(actual,predicted)
print("")
print("")

predicted = [LABELS[int(a)] for a in y_pred]
actual = [LABELS[int(a)] for a in y_holdout]
print("Scores on the dev set")
report_score(actual,predicted)
print("")
print("")

test_pred = clf.predict(X_competition)
predicted = [LABELS[int(a)] for a in test_pred]
actual = [LABELS[int(a)] for a in y_competition]

print("Scores on the test set")
report_score(actual,predicted)

Scores on the train set
-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |     0     |     0     |     0     |     0     |
-------------------------------------------------------------
| disagree  |     0     |     0     |     0     |     0     |
-------------------------------------------------------------
|  discuss  |     0     |     0     |   10544   |    159    |
-------------------------------------------------------------
| unrelated |     0     |     0     |    361    |   29286   |
-------------------------------------------------------------
Score: 17865.5 out of 18114.75	(98.62404946245464%)


Scores on the dev set
-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |     0     |     0 

96.31370024675464

In [0]:
#save features of competition dataset
if not (os.path.isfile(root_path+'comp_feature_data.csv')):
    comp_feature_data['predicted_stance'] = test_pred
    comp_feature_data.to_csv(root_path+'comp_feature_data.csv', index = False)
    #check file
    feature_df = pd.read_csv(root_path+'comp_feature_data.csv')
    print("comp data file size : ", feature_df.shape)
    print("comp data file: ", feature_df.head())

print("train time: ",train_time)

train time:  0


Hierarchical Architecture along with code forClassifier 2 (BERT + DNN ) starts here : 

In [0]:
# Getting BERT Embeddings for Train and Test Data
df_train=pd.read_csv(root_path+"Train_BERT.csv")
df_test=pd.read_csv(root_path+"Test_BERT.csv")
df_train=df_train.drop(["Unnamed: 0"],axis=1)
df_test=df_test.drop(["Unnamed: 0"],axis=1)

In [0]:
# Filtering BERT Embeddings only for Training Data for 3 Classes i.e. Dropping Rows for related class because they are not used to train DNN model. 
df_related=df_train[df_train["Stance"]!=4]
df_train=df_related

In [0]:
# Separating Stance from Embeddings for Training Data.
X=df_train.drop(["Stance"],axis=1)
y=df_train["Stance"]
# Separating Stance from Embeddings for Testing Data.
X_comp=df_test.drop(["Stance"],axis=1)
y_comp=df_test["Stance"]

In [0]:
# Training and Validation Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=seed)

In [0]:
y_gold=y_comp


In [0]:
## Data Cleaning because of label offset in input files
y_train=np.where(y_train==1, 0, y_train) 
y_train=np.where(y_train==2, 1, y_train)
y_train=np.where(y_train==3, 2, y_train)

y_test=np.where(y_test==1, 0, y_test) 
y_test=np.where(y_test==2, 1, y_test)
y_test=np.where(y_test==3, 2, y_test)

y_t=y_train
y_val=y_test
## Encdoding label for multiclass classification with DNN in Keras
y_train = keras.utils.to_categorical(y_train, 3)
y_test = keras.utils.to_categorical(y_test, 3)


In [0]:
# Training XG Boost Classifier on BERT Embeddings for 3 class classification
model_xg = XGBClassifier()
model_xg.fit(X_train, y_t)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0,
              learning_rate=0.1, max_delta_step=0, max_depth=3,
              min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='multi:softprob', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample=1, verbosity=1)

In [0]:
# Prediction of Training and Validation Results
y_pred_xg_train = model_xg.predict(X_train)
y_pred_xg_val = model_xg.predict(X_test)


In [0]:
print("Training Confusion Matrix for 3 Class prediction by XGBoost+BERT \n",confusion_matrix(y_t,y_pred_xg_train),"\nF1 Score Train ",f1_score(y_t,y_pred_xg_train,average='macro'))
print("Validation Confusion Matrix for 3 Class prediction by XGBoost+BERT\n",confusion_matrix(y_val,y_pred_xg_val),"\nF1 Score Train ",f1_score(y_val,y_pred_xg_val,average='macro'))

Training Confusion Matrix for 3 Class prediction by XGBoost+BERT 
 [[1725   72  806]
 [ 181  189  225]
 [ 328   32 5841]] 
F1 Score Train  0.6775083941333011
Validation Confusion Matrix for 3 Class prediction by XGBoost+BERT
 [[ 646   35  395]
 [  93   61   91]
 [ 162   23 2523]] 
F1 Score Train  0.6237703376578914


In [0]:
y_comp_xg = model_xg.predict(X_comp)

In [0]:
# Model 1 for BERT + DNN
model = Sequential()
model.add(Dense(768))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(300))
model.add(BatchNormalization())
model.add(Activation('relu'))
# model.add(Dropout(0.3))
# model.add(Dense(200))
# model.add(BatchNormalization())
# model.add(Activation('relu'))
model.add(Dense(3))
model.add(Activation('softmax'))

In [0]:
# Model2 BERT+DNN with different class weights
model2 = Sequential()
model2.add(Dense(768))
model2.add(BatchNormalization())
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(500))
model2.add(BatchNormalization())
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(500))
model2.add(Activation('relu'))
model2.add(Dropout(0.3))
model2.add(Dense(500))
model2.add(BatchNormalization())
model2.add(Activation('relu'))
model2.add(Dense(3))
model2.add(Activation('softmax'))

In [0]:
# Initiate adam optimizer and compile both models
opt = keras.optimizers.adam()

model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model2.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [0]:
#Defining Class weights for model2
class_weight = {0: 1.,
                1: 2.,
                2: 1.}

In [0]:
# Code to train model 1 and save best check point
filepath=root_path+"weights.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history=model.fit(X_train.values, y_train,
              batch_size=32,
              callbacks=callbacks_list,
              epochs=20,
              validation_data=(X_test.values, y_test),
              shuffle=True)

Train on 9399 samples, validate on 4029 samples
Epoch 1/20

Epoch 00001: val_loss improved from -inf to 0.52896, saving model to ./hierarchicalModel-data/weights.best.hdf5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.52896
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.52896
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.52896
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.52896
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.52896
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.52896
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.52896
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.52896
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.52896
Epoch 11/20

Epoch 00011: val_loss did not improve from 0.52896
Epoch 12/20

Epoch 00012: val_loss did not improve from 0.52896
Epoch 13/20

Epoch 00013: val_loss did not improve from 0.52896
Epoch 14/20

Epoch 00014: val_loss did not improve from 0.52896
Epoc

In [0]:
#Training for Model2 and Saving best weights
filepath=root_path+"weights2.best.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]
history2=model2.fit(X_train.values, y_train,
              batch_size=32,
              callbacks=callbacks_list,
              epochs=20,
              validation_data=(X_test.values, y_test),
              shuffle=True,class_weight=class_weight)

Train on 9399 samples, validate on 4029 samples
Epoch 1/20

Epoch 00001: val_loss improved from -inf to 0.64078, saving model to ./hierarchicalModel-data/weights2.best.hdf5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.64078
Epoch 3/20

Epoch 00003: val_loss did not improve from 0.64078
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.64078
Epoch 5/20

Epoch 00005: val_loss did not improve from 0.64078
Epoch 6/20

Epoch 00006: val_loss did not improve from 0.64078
Epoch 7/20

Epoch 00007: val_loss did not improve from 0.64078
Epoch 8/20

Epoch 00008: val_loss did not improve from 0.64078
Epoch 9/20

Epoch 00009: val_loss did not improve from 0.64078
Epoch 10/20

Epoch 00010: val_loss did not improve from 0.64078
Epoch 11/20

Epoch 00011: val_loss did not improve from 0.64078
Epoch 12/20

Epoch 00012: val_loss did not improve from 0.64078
Epoch 13/20

Epoch 00013: val_loss did not improve from 0.64078
Epoch 14/20

Epoch 00014: val_loss did not improve from 0.64078
Epo

In [0]:
# Loading Best weights into the mode
model.load_weights(root_path+"weights.best.hdf5")
model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
model2.load_weights(root_path+"weights2.best.hdf5")
model2.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [0]:
#Predictions  for Train, Validation and Test data for both models
p_model1=model.predict_classes(X_comp.values)
p_model2=model2.predict_classes(X_comp.values)
p_model1_train=model.predict_classes(X_train.values)
p_model1_val=model.predict_classes(X_test.values)
p_model2_train=model2.predict_classes(X_train.values)
p_model2_val=model2.predict_classes(X_test.values)

In [0]:
# Printing Confusion matrix and F1 Score for all the models 
print("Training Confusion Matrix for 3 Class prediction by BERT+DNN Model1 \n",confusion_matrix(y_t,p_model1_train),"\nF1 Score Train ",f1_score(y_t,p_model1_train,average='macro'))
print("Validation Confusion Matrix for 3 Class prediction by BERT+DNN Model1\n",confusion_matrix(y_val,p_model1_val),"\nF1 Score Train ",f1_score(y_val,p_model1_val,average='macro'))
print("Training Confusion Matrix for 3 Class prediction by BERT+DNN Model2\n",confusion_matrix(y_t,p_model2_train),"\nF1 Score Train ",f1_score(y_t,p_model2_train,average='macro'))
print("Validation Confusion Matrix for 3 Class prediction by BERT+DNN Model2\n",confusion_matrix(y_val,p_model2_val),"\nF1 Score Train ",f1_score(y_val,p_model2_val,average='macro'))
print("Training Confusion Matrix for 3 Class prediction by BERT+XGboost Model1 \n",confusion_matrix(y_t,y_pred_xg_train),"\nF1 Score Train ",f1_score(y_t,y_pred_xg_train,average='macro'))
print("Validation Confusion Matrix for 3 Class prediction by BERT+XGBoost Model1\n",confusion_matrix(y_val,y_pred_xg_val),"\nF1 Score Train ",f1_score(y_val,y_pred_xg_val,average='macro'))

Training Confusion Matrix for 3 Class prediction by BERT+DNN Model1 
 [[1698  188  717]
 [ 162  266  167]
 [ 493  124 5584]] 
F1 Score Train  0.6734299286846528
Validation Confusion Matrix for 3 Class prediction by BERT+DNN Model1
 [[ 660   79  337]
 [  84   92   69]
 [ 211   66 2431]] 
F1 Score Train  0.6361649508957462
Training Confusion Matrix for 3 Class prediction by BERT+DNN Model2
 [[1731  143  729]
 [ 248  183  164]
 [ 904   92 5205]] 
F1 Score Train  0.612924739907069
Validation Confusion Matrix for 3 Class prediction by BERT+DNN Model2
 [[ 707   42  327]
 [ 119   56   70]
 [ 404   39 2265]] 
F1 Score Train  0.5833173790464009
Training Confusion Matrix for 3 Class prediction by BERT+XGboost Model1 
 [[1725   72  806]
 [ 181  189  225]
 [ 328   32 5841]] 
F1 Score Train  0.6775083941333011
Validation Confusion Matrix for 3 Class prediction by BERT+XGBoost Model1
 [[ 646   35  395]
 [  93   61   91]
 [ 162   23 2523]] 
F1 Score Train  0.6237703376578914


In [0]:
# Predictions from Classifier 1 or Stage 1 or Relatedness layer
df_classifier1=pd.read_csv(root_path+"comp_feature_data.csv")
p_classifier1=df_classifier1["predicted_stance"].values
y_gold=df_classifier1["stance"].values

**Models Tested Individually and with Ensembles**

In [0]:
### Classfier1 + Classifier2(BERT+DNN Model1)
final_label=[]
for i,item in enumerate(p_classifier1):
  if item==3:
    final_label.append(item) 
  else:
    final_label.append(p_model1[i])
    
    
final_label=np.array(final_label) 
report_score([LABELS[e] for e in y_gold],[LABELS[e] for e in final_label])

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1113    |    17     |    667    |    106    |
-------------------------------------------------------------
| disagree  |    303    |    28     |    259    |    107    |
-------------------------------------------------------------
|  discuss  |   1206    |    139    |   2954    |    165    |
-------------------------------------------------------------
| unrelated |    30     |     2     |    139    |   18178   |
-------------------------------------------------------------
Score: 9287.25 out of 11651.25	(79.7103315094947%)


79.7103315094947

In [0]:
### Classfier1 + Classifier2(BERT+DNN Model 2)
final_label2=[]
for i,item in enumerate(p_classifier1):
  if item==3:
    final_label2.append(item) 
  else:
    final_label2.append(p_model2[i])
    
final_label2=np.array(final_label2) 
 
report_score([LABELS[e] for e in y_gold],[LABELS[e] for e in final_label2])

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1067    |    17     |    713    |    106    |
-------------------------------------------------------------
| disagree  |    286    |    29     |    275    |    107    |
-------------------------------------------------------------
|  discuss  |   1255    |    90     |   2954    |    165    |
-------------------------------------------------------------
| unrelated |    46     |     0     |    125    |   18178   |
-------------------------------------------------------------
Score: 9253.5 out of 11651.25	(79.42066301898937%)


79.42066301898937

In [0]:
### Classfier1 + Classifier2(BERT+XGBoost)
final_label3=[]
for i,item in enumerate(p_classifier1):
  if item==3:
    final_label3.append(item) 
  else:
    final_label3.append(y_comp_xg[i])
    
final_label3=np.array(final_label3)
report_score([LABELS[e] for e in y_gold],[LABELS[e] for e in final_label3])

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |    763    |     0     |   1034    |    106    |
-------------------------------------------------------------
| disagree  |    189    |     0     |    401    |    107    |
-------------------------------------------------------------
|  discuss  |    687    |     0     |   3612    |    165    |
-------------------------------------------------------------
| unrelated |    25     |     0     |    146    |   18178   |
-------------------------------------------------------------
Score: 9497.25 out of 11651.25	(81.5127132281944%)


81.5127132281944

**BEST PERFORMING ENSEMBLE in terms of F1 Score performance. Codalab results reflects this**

In [0]:
### Ensemble1
final_label4=[]
for i,item in enumerate(p_classifier1):
  if item==3:
    final_label4.append(item)
  else:
    if final_label[i]==1 or final_label[i]==1 :
      final_label4.append(1)
    else:
      final_label4.append(final_label3[i])
    
final_label4=np.array(final_label4) 
report_score([LABELS[e] for e in y_gold],[LABELS[e] for e in final_label4])

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |    759    |    17     |   1021    |    106    |
-------------------------------------------------------------
| disagree  |    184    |    28     |    378    |    107    |
-------------------------------------------------------------
|  discuss  |    606    |    139    |   3554    |    165    |
-------------------------------------------------------------
| unrelated |    24     |     2     |    145    |   18178   |
-------------------------------------------------------------
Score: 9471.75 out of 11651.25	(81.29385259092372%)


81.29385259092372

In [0]:
### Ensemble2
final_label5=[]
for i,item in enumerate(p_classifier1):
  if item==3:
    final_label5.append(item)
  else:
    if final_label[i]==1 or final_label2[i]==1 :
      final_label5.append(choice([final_label[i],final_label2[i]]))
    else:
      final_label5.append(choice([final_label[i],final_label2[i],final_label2[i]]))
    
final_label5=np.array(final_label5) 
report_score([LABELS[e] for e in y_gold],[LABELS[e] for e in final_label5])

-------------------------------------------------------------
|           |   agree   | disagree  |  discuss  | unrelated |
-------------------------------------------------------------
|   agree   |   1068    |    16     |    713    |    106    |
-------------------------------------------------------------
| disagree  |    292    |    28     |    270    |    107    |
-------------------------------------------------------------
|  discuss  |   1208    |    116    |   2975    |    165    |
-------------------------------------------------------------
| unrelated |    44     |     1     |    126    |   18178   |
-------------------------------------------------------------
Score: 9269.25 out of 11651.25	(79.55584164789185%)


79.55584164789185