In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import logging
from collections import defaultdict, Counter

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hydra import initialize, compose
import hydra
from sklearn.model_selection import train_test_split

from cgpos.utils.util import import_pkl, export_pkl, get_abs_dir, flatten
from cgpos.models.multinomial_naive_bayes import MultinomialNaiveBayes

In [3]:
# Reset hydra
hydra.core.global_hydra.GlobalHydra.instance().clear()
# Load hydra params
initialize("../conf/", version_base=None)
config = compose(config_name='main')
# Init logger
logging.basicConfig(level=logging.INFO) 

In [4]:
uid, text, targets = import_pkl(config.data.cleaned)
features = import_pkl(config.data.features)
target_names, target_short, target_long = import_pkl(config.reference.target_map)

INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/processed/cleaned.pkl
INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/processed/features.pkl
INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/reference/target_map.pkl


In [5]:
X_train, X_test, y_train, y_test = train_test_split(features, targets, train_size=0.8, random_state=20)
y_train = [np.array([y[class_i] for y in y_train]) for class_i in range(len(y_train[0]))]
y_test = [np.array([y[class_i] for y in y_test]) for class_i in range(len(y_test[0]))]

preds = []
for i, label in enumerate(target_names):
    y_train_i = y_train[i]
    y_test_i = y_test[i]
    alpha = 0.2
    ngram_range = (1, 5)
    mnb = MultinomialNaiveBayes(alpha, ngram_range)
    mnb.fit(X_train, y_train_i)
    y_pred = mnb.predict(X_test)
    accuracy = np.mean(y_pred == y_test_i)
    logging.info(f"Multinomial Naive Bayes (alpha={alpha}, ngram_range={ngram_range}) accuracy on {label}: {accuracy * 100:.2f}%")
    preds.append(y_pred)
preds = np.array(preds)
logging.info(f"Overall accuracy: {np.mean((preds == y_test).all(axis=0)) * 100:.2f}%")

INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on pos: 88.67%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on pers: 95.96%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on num: 94.65%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on tense: 96.61%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on mood: 96.74%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on voice: 96.99%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on gend: 92.43%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on case: 93.02%
INFO:root:Multinomial Naive Bayes (alpha=0.2, ngram_range=(1, 5)) accuracy on degree: 98.08%
INFO:root:Overall accuracy: 76.66%
