In [None]:
import pandas as pd 
import numpy as np 
import pickle
import xgboost as xgb
from sklearn.metrics import precision_recall_fscore_support
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.experimental import enable_hist_gradient_boosting 
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from tqdm import tqdm
import bma_python
from sklearn.metrics import accuracy_score, classification_report

In [None]:
with open('../data/features_training_sarc_twitter.p', 'rb') as handle:
    train = pickle.load(handle)

In [None]:
X_train_embed = train['bert_embed']

X_train_pp = np.concatenate([train['bert_embed'], train['emoji']['emoji'],
                             np.expand_dims(train['emoji']['emoji_positive'], axis = 1), np.expand_dims(train['emoji']['emoji_negative'], axis = 1),
                         train['punc'], train['onom'], train['init']], axis = 1)

X_train_pos = np.concatenate([train['pos'], train['bert_embed']], axis = 1)

X_train_pp_pos = np.concatenate([train['emoji']['emoji'],np.expand_dims(train['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(train['emoji']['emoji_negative'], axis = 1), train['pos'],train['punc'],
                                 train['onom'], train['init'], train['bert_embed']], axis = 1)

X_train_pp_pos_pol = np.concatenate([train['emoji']['emoji'],np.expand_dims(train['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(train['emoji']['emoji_negative'], axis = 1), train['pos'],train['punc'],
                                 train['onom'], train['init'], train['bert_embed'], train['polarity']], axis = 1)

y = train['label']

In [None]:
xgb_model = xgb.XGBClassifier(colsample_bytree = 0.7776353921686654, learning_rate =  0.063062124248497, max_depth= 9, min_child_weight= 4, n_estimators= 173, subsample= 0.834149882785828)
randomf_model = RandomForestClassifier(max_depth = 18, min_samples_leaf = 8, min_samples_split = 2, n_estimators = 193)
hist_model =  HistGradientBoostingClassifier(learning_rate = 0.09137860709617293,max_depth = 23, min_samples_leaf = 16) #pos, polarity, pp random
logi_model = LogisticRegression(C =  6.3851824328733695, penalty = 'l2',max_iter=10000)  #pos, polarity, bayes
ada_model = AdaBoostClassifier(learning_rate = 0.9679679358717436, n_estimators = 158) # pos pp bayes

In [None]:
model_diz = {'XGB': xgb_model, 'Random F': randomf_model, 'Hist': hist_model, 'Logistic': logi_model, 'Ada': ada_model}

In [None]:
outputs = bma_python.BMA(model_diz, X_train_pp_pos_pol, y)

In [None]:
import pickle
with open('../data/bma_results_sarcasm.p', 'wb') as fp:
    pickle.dump(outputs, fp, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('../data/bma_results_sarcasm.p', 'rb') as handle:
    outputs = pickle.load(handle)

In [None]:
xgb_model.fit(X_train_pp_pos_pol, y)
logi_model.fit(X_train_pp_pos_pol, y)

In [None]:
randomf_model.fit(X_train_pp_pos_pol, y)

## Test Sarcasm

In [None]:
with open('../data/ghosh_test_sarc.p', 'rb') as handle:
    ghosh_test = pickle.load(handle)
with open('../data/riloff_test_sarc.p', 'rb') as handle:
    riloff_test = pickle.load(handle)

In [None]:
ghosh_pos = np.concatenate([ghosh_test['pos'], ghosh_test['bert_embed']], axis = 1)
ghosh_pp_pos_pol = np.concatenate([ghosh_test['emoji']['emoji'],np.expand_dims(ghosh_test['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(ghosh_test['emoji']['emoji_negative'], axis = 1), ghosh_test['pos'],ghosh_test['punc'],
                                 ghosh_test['onom'], ghosh_test['init'], ghosh_test['bert_embed'], ghosh_test['polarity']], axis = 1)
ghosh_pp_pos = np.concatenate([ghosh_test['emoji']['emoji'],np.expand_dims(ghosh_test['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(ghosh_test['emoji']['emoji_negative'], axis = 1), ghosh_test['pos'],ghosh_test['punc'],
                                 ghosh_test['onom'], ghosh_test['init'], ghosh_test['bert_embed']], axis = 1)

y_ghosh = ghosh_test['label']

riloff_pos =  np.concatenate([riloff_test['pos'], riloff_test['bert_embed']], axis = 1)
riloff_pp_pos_pol = np.concatenate([riloff_test['emoji']['emoji'],np.expand_dims(riloff_test['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(riloff_test['emoji']['emoji_negative'], axis = 1), riloff_test['pos'],riloff_test['punc'],
                                 riloff_test['onom'], riloff_test['init'], riloff_test['bert_embed'], riloff_test['polarity']], axis = 1)

riloff_pp_pos = np.concatenate([riloff_test['emoji']['emoji'],np.expand_dims(riloff_test['emoji']['emoji_positive'], axis = 1), 
                                 np.expand_dims(riloff_test['emoji']['emoji_negative'], axis = 1), riloff_test['pos'],riloff_test['punc'],
                                 riloff_test['onom'], riloff_test['init'], riloff_test['bert_embed']], axis = 1)

y_riloff = riloff_test['label']

In [None]:
out1 = bma_python.inference_bma(xgb_model, riloff_pp_pos_pol, outputs['Weights'].loc['XGB'])
out2 = bma_python.inference_bma(randomf_model, riloff_pp_pos_pol, outputs['Weights'].loc['Random F'])
out3 = bma_python.inference_bma(logi_model, riloff_pp_pos_pol, outputs['Weights'].loc['Logistic'])

In [None]:
sum_test = np.argmax(np.sum((out1, out2), axis = 0), axis = -1)

In [None]:
print(classification_report(y_riloff, sum_test))

In [None]:
accuracy_score(y_riloff, sum_test)

In [None]:
out1 = bma_python.inference_bma(xgb_model, ghosh_pp_pos_pol, outputs['Weights'].loc['XGB'])
out2 = bma_python.inference_bma(randomf_model, ghosh_pp_pos_pol, outputs['Weights'].loc['Random F'])
out3 = bma_python.inference_bma(logi_model, ghosh_pp_pos_pol, outputs['Weights'].loc['Logistic'])

In [None]:
sum_test = np.argmax(np.sum((out1, out2), axis = 0), axis = -1)

In [None]:
accuracy_score(y_ghosh, sum_test)

In [None]:
print(classification_report(y_ghosh, sum_test))