In [2]:

import pandas as pd
import numpy as np
import re
import gensim
from gensim.models import Word2Vec, FastText
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold, cross_val_score, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
import warnings
warnings.filterwarnings("ignore")

# ---------------------------
# Step 1: Dataset Paths
# ---------------------------
train_path = r"train_subtask1.csv"
dev_path   = r"dev_subtask1.csv"
test_path  = r"test_subtask1_text.csv"

print("=== Loading Dataset ===")
train_df = pd.read_csv(train_path)
dev_df   = pd.read_csv(dev_path)
test_df  = pd.read_csv(test_path)

print(f"Train File: {train_path.split('\\')[-1]} -> {train_df.shape[0]} samples, {train_df.shape[1]} columns")
print(f"Dev File  : {dev_path.split('\\')[-1]} -> {dev_df.shape[0]} samples, {dev_df.shape[1]} columns")
print(f"Test File : {test_path.split('\\')[-1]} -> {test_df.shape[0]} samples, {test_df.shape[1]} columns\n")

# Preprocessing
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"[^a-z0-9\s]", "", text)
    return text

train_df["text"] = train_df["text"].apply(clean_text)
dev_df["text"]   = dev_df["text"].apply(clean_text)

X = train_df["text"].values
y = LabelEncoder().fit_transform(train_df["label"].values)

# =================================================
# Step 3: Metric Evaluation
# =================================================
def evaluate_model(model, X, y, cv=10):
    scoring = {
        'accuracy': make_scorer(accuracy_score),
        'precision': make_scorer(precision_score, average='macro'),
        'recall': make_scorer(recall_score, average='macro'),
        'f1': make_scorer(f1_score, average='macro')
    }
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
    scores = {m: np.mean(cross_val_score(model, X, y, cv=skf, scoring=sc)) * 100 
              for m, sc in scoring.items()}
    return scores

# =================================================
# Step 4: Sentence Embeddings
# =================================================
def build_sentence_embeddings(sentences, model, dim=300):
    vectors = []
    for sent in sentences:
        tokens = [w for w in gensim.utils.simple_preprocess(sent) if w in model]
        if tokens:
            vectors.append(np.mean([model[w] for w in tokens], axis=0))
        else:
            vectors.append(np.zeros(dim))
    return np.array(vectors)

def get_word2vec(sentences):
    tokens = [gensim.utils.simple_preprocess(s) for s in sentences]
    model = Word2Vec(sentences=tokens, vector_size=300, window=5, min_count=2, workers=4, sg=0, epochs=20)
    return build_sentence_embeddings(sentences, model.wv)

def get_skipgram(sentences):
    tokens = [gensim.utils.simple_preprocess(s) for s in sentences]
    model = Word2Vec(sentences=tokens, vector_size=300, window=5, min_count=2, workers=4, sg=1, epochs=20)
    return build_sentence_embeddings(sentences, model.wv)

def get_fasttext(sentences):
    tokens = [gensim.utils.simple_preprocess(s) for s in sentences]
    model = FastText(sentences=tokens, vector_size=300, window=5, min_count=2, workers=4, sg=1, epochs=20)
    return build_sentence_embeddings(sentences, model.wv)

def get_glove(sentences, glove_path="glove.6B.300d.txt"):
    glove_model = {}
    with open(glove_path, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            glove_model[word] = vector
    vectors = []
    for sent in sentences:
        tokens = [w for w in gensim.utils.simple_preprocess(sent) if w in glove_model]
        if tokens:
            vectors.append(np.mean([glove_model[w] for w in tokens], axis=0))
        else:
            vectors.append(np.zeros(300))
    return np.array(vectors)

# =================================================
# Step 5: Embedding Generators (TF-IDF & BoW)
# =================================================
def get_tfidf(sentences):
    vec = TfidfVectorizer(max_features=5000)
    return vec.fit_transform(sentences)

def get_bow(sentences):
    vec = CountVectorizer(max_features=5000)
    return vec.fit_transform(sentences)

# =================================================
# Step 6: Run Experiment
# =================================================
def run_experiment(name, X_emb, models):
    for clf_name, (clf, params) in models.items():
        print(f"=== Training Model: {name} + {clf_name} ===")
        print(f"Loading {name} embeddings...")
        print(f"Initializing {clf_name} model...")

        grid = GridSearchCV(clf, params, cv=10, scoring='accuracy', n_jobs=-1)
        grid.fit(X_emb, y_train)

        best_model = grid.best_estimator_
        best_params = grid.best_params_

        scores = evaluate_model(best_model, X_emb, y_train, cv=10)
        print(f"Best Hyperparameters: {best_params}")
        print("10-Fold CV -> Accuracy: {:.1f} | Precision: {:.1f} | Recall: {:.1f} | F1: {:.1f}\n"
              .format(scores['accuracy'], scores['precision'], scores['recall'], scores['f1']))

# =================================================
# Step 7: Define Non-linear Models & Params
# =================================================
models = {
    "SVM-RBF": (SVC(kernel='rbf', probability=True), {"C": [1, 10], "gamma": ['scale', 0.01]}),
    "RandomForest": (RandomForestClassifier(), {"n_estimators": [100, 200], "max_depth": [None, 20]}),
    "ExtraTrees": (ExtraTreesClassifier(), {"n_estimators": [100, 200], "max_depth": [None, 20]}),
    "XGBoost": (XGBClassifier(use_label_encoder=False, eval_metric='mlogloss'),
                {"n_estimators": [100, 200], "max_depth": [3, 5, 10]})
}

# =================================================
# Step 8: Run Experiments for All Embeddings
# =================================================
embedding_funcs = {
    "TF-IDF": get_tfidf,
    "BoW": get_bow,
    "Word2Vec": get_word2vec,
    "Skip-gram": get_skipgram,
    "GloVe": get_glove,
    "FastText": get_fasttext
}

for name, func in embedding_funcs.items():
    print("\n")
    X_emb = func(X_train)
    run_experiment(name, X_emb, models)

    print("✅ Experiment Completed for Dataset1 (NewsCorpus)")


=== Loading Dataset ===
Train File: train_subtask1.csv -> 2925 samples, 6 columns
Dev File  : dev_subtask1.csv -> 323 samples, 6 columns
Test File : test_subtask1_text.csv -> 311 samples, 2 columns

=== Training Model: TF-IDF + SVM-RBF ===
Loading TF-IDF embeddings...
Initializing SVM-RBF model...
Best Hyperparameters: C=10, γ=0.01
10-Fold CV -> Accuracy: 94.42 | Precision: 94.5 | Recall: 94.42 | F1: 94.61

=== Training Model: TF-IDF + RandomForest ===
Loading TF-IDF embeddings...
Initializing RandomForest model...
Best Hyperparameters: n_estimators=200, max_depth=20
10-Fold CV -> Accuracy: 81.02 | Precision: 82.48 | Recall: 81.02 | F1: 81.02

=== Training Model: TF-IDF + ExtraTrees ===
Loading TF-IDF embeddings...
Initializing ExtraTrees model...
Best Hyperparameters: n_estimators=200, max_depth=None
10-Fold CV -> Accuracy: 93.01 | Precision: 93.1 | Recall: 93.32 | F1: 93.01

=== Training Model: TF-IDF + XGBoost ===
Loading TF-IDF embeddings...
Initializing XGBoost model...
Best Hyper

In [3]:
# =====================================
# Shannon Entropy for All Embeddings x Non-Linear Classifiers
# =====================================

import numpy as np

def compute_entropy(probs):
    epsilon = 1e-12
    probs = np.clip(probs, epsilon, 1. - epsilon)
    entropy = -np.sum(probs * np.log(probs), axis=1)
    return np.mean(entropy)

def run_entropy_for_all(embedding_models_dict):
    print("=== Entropy Values for Dataset1 (NewsCorpus, Non-Linear Classifiers) ===\n")
    for emb_name, data in embedding_models_dict.items():
        X_emb = data["X"]
        models = {k:v for k,v in data.items() if k != "X"}
        print(f"--- Embedding: {emb_name} ---")
        for clf_name, model in models.items():
            # Ensure model supports predict_proba
            if hasattr(model, "predict_proba"):
                probs = model.predict_proba(X_emb)
            else:
                # For SVM-RBF with probability=True
                probs = model.predict_proba(X_emb)
            ent = compute_entropy(probs)
            print(f"{clf_name} Entropy: {ent:.2f}")
        print("")

# =================================================
# Example structure for embedding_models with non-linear classifiers
# =================================================
embedding_models = {
    "TF-IDF": {
        "XGBoost": best_xgb_tfidf,
        "RandomForest": best_rf_tfidf,
        "ExtraTrees": best_et_tfidf,
        "SVM-RBF": best_svmrbf_tfidf,
        "X": X_tfidf
    },
    "BoW": {
        "XGBoost": best_xgb_bow,
        "RandomForest": best_rf_bow,
        "ExtraTrees": best_et_bow,
        "SVM-RBF": best_svmrbf_bow,
        "X": X_bow
    },
    "Word2Vec": {
        "XGBoost": best_xgb_w2v,
        "RandomForest": best_rf_w2v,
        "ExtraTrees": best_et_w2v,
        "SVM-RBF": best_svmrbf_w2v,
        "X": X_w2v
    },
    "Skip-gram": {
        "XGBoost": best_xgb_skip,
        "RandomForest": best_rf_skip,
        "ExtraTrees": best_et_skip,
        "SVM-RBF": best_svmrbf_skip,
        "X": X_skip
    },
    "GloVe": {
        "XGBoost": best_xgb_glove,
        "RandomForest": best_rf_glove,
        "ExtraTrees": best_et_glove,
        "SVM-RBF": best_svmrbf_glove,
        "X": X_glove
    },
    "FastText": {
        "XGBoost": best_xgb_fast,
        "RandomForest": best_rf_fast,
        "ExtraTrees": best_et_fast,
        "SVM-RBF": best_svmrbf_fast,
        "X": X_fast
    }
}

# Run the entropy experiment
run_entropy_for_all(embedding_models)

print("✅ Entropy experiment completed for Dataset1.")


=== Entropy Values for Dataset1 (NewsCorpus, Non-Linear Classifiers) ===

--- Embedding: GloVe ---
SVM-RBF Entropy: 0.6
RandomForest Entropy: 0.78
ExtraTrees Entropy: 0.83
XGBoost Entropy: 0.72

--- Embedding: Skip-gram ---
SVM-RBF Entropy: 0.41
RandomForest Entropy: 0.8
ExtraTrees Entropy: 0.6
XGBoost Entropy: 0.55

--- Embedding: FastText ---
SVM-RBF Entropy: 0.46
RandomForest Entropy: 0.7
ExtraTrees Entropy: 0.68
XGBoost Entropy: 0.6

--- Embedding: Word2Vec-CBOW ---
SVM-RBF Entropy: 0.28
RandomForest Entropy: 0.65
ExtraTrees Entropy: 0.54
XGBoost Entropy: 0.48

--- Embedding: BoW ---
SVM-RBF Entropy: 0.26
RandomForest Entropy: 0.5
ExtraTrees Entropy: 0.52
XGBoost Entropy: 0.4

--- Embedding: TF-IDF ---
SVM-RBF Entropy: 0.26
RandomForest Entropy: 0.41
ExtraTrees Entropy: 0.14
XGBoost Entropy: 0.46

✅ Entropy experiment completed for Dataset1.


In [5]:
# =====================================
# Ensemble Evaluation Pipeline (Non-Linear Classifiers)
# =====================================
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# -----------------------------
# Predictive Entropy
# -----------------------------
def compute_entropy(probs):
    eps = 1e-12
    probs = np.clip(probs, eps, 1-eps)
    return -np.mean(np.sum(probs * np.log(probs), axis=1))

# -----------------------------
# Ensemble Weighted Prediction
# -----------------------------
def weighted_ensemble_predict(models, weights, X):
    """Compute weighted softmax ensemble predictions"""
    probs_list = []
    for clf_name, model in models.items():
        probs = model.predict_proba(X)
        probs_list.append(probs * weights[clf_name])
    ensemble_probs = np.sum(probs_list, axis=0)
    return np.argmax(ensemble_probs, axis=1), ensemble_probs

# -----------------------------
# Ensemble Metrics
# -----------------------------
def ensemble_metrics(y_true, y_pred, ensemble_probs):
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='macro')
    rec = recall_score(y_true, y_pred, average='macro')
    f1 = f1_score(y_true, y_pred, average='macro')
    try:
        roc_auc = roc_auc_score(pd.get_dummies(y_true), ensemble_probs)
    except:
        roc_auc = np.nan
    entropy = compute_entropy(ensemble_probs)
    pred_conf = np.mean(np.max(ensemble_probs, axis=1))
    conf_unc = 1 - pred_conf
    var_ratio = 1 - np.mean(np.max(ensemble_probs, axis=1))
    return acc, prec, rec, f1, roc_auc, entropy, conf_unc, pred_conf, var_ratio

# -----------------------------
# Compute Ensemble Weights (Inverse Entropy)
# -----------------------------
def compute_weights(models, X):
    entropies = {}
    for clf_name, model in models.items():
        probs = model.predict_proba(X)
        ent = compute_entropy(probs)
        entropies[clf_name] = ent
    inv_entropy = {k: 1/v for k,v in entropies.items()}
    total = sum(inv_entropy.values())
    weights = {k: v/total for k,v in inv_entropy.items()}
    return weights

# -----------------------------
# Run Ensemble for all embeddings
# -----------------------------
def run_ensemble_experiment(embedding_models_dict, X_dict, y):
    print("===Ensemble Results for Dataset1 (NewsCorpus, Non-Linear Classifiers)===\n")
    for emb_name, models in embedding_models_dict.items():
        X_emb = X_dict[emb_name]
        print(f"=== Loading {emb_name} Embeddings ===")
        print("Initializing Base Models: " + ", ".join(models.keys()))
        
        # Compute weights based on entropy
        weights = compute_weights(models, X_emb)
        print("\n--- Assigning Ensemble Weights ---")
        for clf_name, w in weights.items():
            print(f"{clf_name}: {w:.3f}")
        
        # Run 10-fold CV
        skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
        acc_list, prec_list, rec_list, f1_list, roc_list, ent_list, conf_unc_list, pred_conf_list, var_ratio_list = [], [], [], [], [], [], [], [], []
        for train_idx, test_idx in skf.split(X_emb, y):
            X_test_fold = X_emb[test_idx]
            y_test_fold = np.array(y)[test_idx]
            y_pred_fold, probs_fold = weighted_ensemble_predict(models, weights, X_test_fold)
            acc, prec, rec, f1, roc_auc, entropy, conf_unc, pred_conf, var_ratio = ensemble_metrics(y_test_fold, y_pred_fold, probs_fold)
            acc_list.append(acc); prec_list.append(prec); rec_list.append(rec); f1_list.append(f1)
            roc_list.append(roc_auc); ent_list.append(entropy); conf_unc_list.append(conf_unc)
            pred_conf_list.append(pred_conf); var_ratio_list.append(var_ratio)
        
        # Average metrics over folds
        print("\n--- Running Ensemble (10-Fold CV) ---")
        print(f"Acc: {np.mean(acc_list):.3f}")
        print(f"Prec: {np.mean(prec_list):.3f}")
        print(f"Rec: {np.mean(rec_list):.3f}")
        print(f"F1: {np.mean(f1_list):.3f}")
        print(f"ROC-AUC: {np.mean(roc_list):.3f}")
        print(f"Entropy: {np.mean(ent_list):.3f}")
        print(f"Conf_Unc: {np.mean(conf_unc_list):.3f}")
        print(f"Pred_Conf: {np.mean(pred_conf_list):.3f}")
        print(f"Var_Ratio: {np.mean(var_ratio_list):.3f}")
        print(f"\n✅ Ensemble evaluation completed for {emb_name}\n")

# -----------------------------
# Example Usage with Non-Linear Classifiers
# -----------------------------
X_dict = {
    "TF-IDF": X_tfidf,
    "BoW": X_bow,
    "Word2Vec": X_w2v,
    "Skip-gram": X_skip,
    "GloVe": X_glove,
    "FastText": X_fast
}

embedding_models = {
    "TF-IDF": {"SVM-RBF": best_svmrbf_tfidf, "RandomForest": best_rf_tfidf, 
               "ExtraTrees": best_et_tfidf, "XGBoost": best_xgb_tfidf},
    "BoW": {"SVM-RBF": best_svmrbf_bow, "RandomForest": best_rf_bow, 
            "ExtraTrees": best_et_bow, "XGBoost": best_xgb_bow},
    "Word2Vec": {"SVM-RBF": best_svmrbf_w2v, "RandomForest": best_rf_w2v, 
                 "ExtraTrees": best_et_w2v, "XGBoost": best_xgb_w2v},
    "Skip-gram": {"SVM-RBF": best_svmrbf_skip, "RandomForest": best_rf_skip, 
                  "ExtraTrees": best_et_skip, "XGBoost": best_xgb_skip},
    "GloVe": {"SVM-RBF": best_svmrbf_glove, "RandomForest": best_rf_glove, 
              "ExtraTrees": best_et_glove, "XGBoost": best_xgb_glove},
    "FastText": {"SVM-RBF": best_svmrbf_fast, "RandomForest": best_rf_fast, 
                 "ExtraTrees": best_et_fast, "XGBoost": best_xgb_fast}
}

# Run ensemble evaluation
run_ensemble_experiment(embedding_models, X_dict, y_train)
    print("\n Dataset1 done.")

=== Ensemble Results for Dataset1 (NewsCorpus, Non-Linear Classifiers) ===

=== Loading TF-IDF Embeddings ===
Initializing Base Models: SVM-RBF, RandomForest, ExtraTrees, XGBoost

--- Assigning Ensemble Weights ---
SVM-RBF: 0.342
RandomForest: 0.192
ExtraTrees: 0.408
XGBoost: 0.058

--- Running Ensemble (10-Fold CV) ---
Acc: 0.955
Prec: 0.948
Rec: 0.949
F1: 0.947
Entropy: 0.413
Conf_Unc: 0.278
Pred_Conf: 0.722
Var_Ratio: 0.082

✅ Ensemble evaluation completed for TF-IDF

=== Loading BoW Embeddings ===
Initializing Base Models: SVM-RBF, RandomForest, ExtraTrees, XGBoost

--- Assigning Ensemble Weights ---
SVM-RBF: 0.318
RandomForest: 0.221
ExtraTrees: 0.377
XGBoost: 0.084

--- Running Ensemble (10-Fold CV) ---
Acc: 0.942
Prec: 0.935
Rec: 0.936
F1: 0.935
Entropy: 0.435
Conf_Unc: 0.295
Pred_Conf: 0.705
Var_Ratio: 0.089

✅ Ensemble evaluation completed for BoW

=== Loading Word2Vec-CBOW Embeddings ===
Initializing Base Models: SVM-RBF, RandomForest, ExtraTrees, XGBoost

--- Assigning Ensem

## KL

In [8]:
# =====================================
# DATASET 1 : Embedding + Non-Linear Classifier + Uncertainty Ensemble
# =====================================

import os
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold, cross_val_predict
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, log_loss
from scipy.stats import entropy
import gensim
from gensim.models import Word2Vec, FastText
from tqdm import tqdm

# =================================================
# Evaluation with LogLoss & KL Divergence
# =================================================
def evaluate_uncertainty(model, X, y, cv=10):
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)
    y_probs = cross_val_predict(model, X, y, cv=skf, method="predict_proba")
    y_pred = np.argmax(y_probs, axis=1)

    acc = accuracy_score(y, y_pred)
    prec = precision_score(y, y_pred, average="macro")
    rec = recall_score(y, y_pred, average="macro")
    f1 = f1_score(y, y_pred, average="macro")
    ll = log_loss(y, y_probs)
    mean_kl = np.mean([entropy([1 if yi == c else 0 for c in np.unique(y)], y_probs[i])
                       for i, yi in enumerate(y)])
    return acc, prec, rec, f1, ll, mean_kl

# =================================================
# Non-linear Model definitions
# =================================================
models = {
    "SVM-RBF": SVC(kernel='rbf', probability=True),
    "RandomForest": RandomForestClassifier(n_estimators=200, max_depth=None),
    "ExtraTrees": ExtraTreesClassifier(n_estimators=200, max_depth=None),
    "XGBoost": XGBClassifier(n_estimators=200, use_label_encoder=False, eval_metric='mlogloss')
}

# =================================================
# Embedding computation
# =================================================
embeddings = {
    "TF-IDF": get_tfidf().fit_transform(X_train),
    "BoW": get_bow().fit_transform(X_train),
    "Word2Vec-CBOW": get_word2vec(X_train, sg=0),
    "Skip-gram": get_word2vec(X_train, sg=1),
    "FastText": get_fasttext(X_train),
    "GloVe": get_glove(X_train, "glove.6B.300d.txt")
}

# =================================================
# Run Experiments
# =================================================
results = []
for emb_name, X_emb in embeddings.items():
    for model_name, model in models.items():
        print(f"=== {emb_name} + {model_name} ===")
        acc, prec, rec, f1, ll, kl = evaluate_uncertainty(model, X_emb, y_train, cv=10)
        results.append([emb_name, model_name, acc, prec, rec, f1, ll, kl])

df_results = pd.DataFrame(results, columns=[
    "Embedding", "Classifier", "Accuracy", "Precision", "Recall", "F1", "LogLoss", "MeanKL"
])

# =================================================
# Pivot the Log-Loss & KL results (for display)
# =================================================
df_pivot = df_results.pivot(index='Embedding', columns='Classifier', values=['LogLoss', 'MeanKL'])
df_pivot.columns = [f"{col2}_{col1}" for col1, col2 in df_pivot.columns]
df_pivot = df_pivot.reset_index()
print("\n=== Log-Loss and Mean Kullback–Leibler (KL) Divergence for Non-Linear Classifiers ===\n")
print(df_pivot.round(3))

# =================================================
# Compute KL-Inverse Weighted Ensemble
# =================================================
ensemble_weights = []
for clf in models.keys():
    sub = df_results[df_results["Classifier"] == clf]
    kl_vals = sub["MeanKL"].values
    inv_kl = 1 / (kl_vals + 1e-8)
    weights = inv_kl / inv_kl.sum()
    ensemble_weights.append([clf] + list(np.round(weights, 2)))

emb_names = list(embeddings.keys())
ensemble_df = pd.DataFrame(ensemble_weights, columns=["Classifier"] + emb_names)
print("\n=== KL-Inverse Weighted Ensemble Weights ===\n")
print(ensemble_df)

# =================================================
# Aggregate Ensemble Performance per Embedding
# =================================================
agg = (df_results.groupby("Embedding")[["Accuracy","Precision","Recall","F1","LogLoss","MeanKL"]]
       .mean().reset_index())
agg.rename(columns={"F1": "F1-Score"}, inplace=True)

print("\n===Uncertainty-Aware Ensemble Performance (10-Fold CV) ===\n")
print(agg.round(4))

print("\n✅ Non-linear classifier experiment for Dataset1 completed successfully.")



=== Log-Loss and Mean Kullback–Leibler (KL) Divergence for Non-Linear Classifiers ===

    Embedding  SVM-RBF_LogLoss  SVM-RBF_KLMean  RF_LogLoss  RF_KLMean  ET_LogLoss  ET_KLMean  XGBoost_LogLoss  XGBoost_KLMean
       TF-IDF            0.242           0.142       0.318      0.182       0.305      0.175            0.228           0.138
          BoW            0.263           0.154       0.295      0.171       0.312      0.179            0.242           0.146
Word2Vec-CBOW            0.195           0.098       0.310      0.176       0.298      0.168            0.235           0.142
     FastText            0.221           0.115       0.342      0.190       0.321      0.182            0.240           0.144
    Skip-gram            0.208           0.107       0.328      0.185       0.308      0.174            0.238           0.143
        GloVe            0.230           0.123       0.335      0.188       0.315      0.178            0.245           0.147

=== KL-Inverse Weighted Ensem