In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import logging
from collections import defaultdict, Counter

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from hydra import initialize, compose
import hydra
from sklearn.model_selection import train_test_split

from cgpos.utils.util import import_pkl, export_pkl, get_abs_dir, flatten
from cgpos.models.multinomial_naive_bayes import MultinomialNaiveBayes

In [3]:
# Reset hydra
hydra.core.global_hydra.GlobalHydra.instance().clear()
# Load hydra params
initialize("../conf/", version_base=None)
config = compose(config_name='main')
# Init logger
logging.basicConfig(level=logging.INFO) 

In [4]:
uid, text, targets = import_pkl(config.data.cleaned)
features = import_pkl(config.data.features)
target_map = import_pkl(config.reference.target_map)

INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/processed/cleaned.pkl
INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/processed/features.pkl
INFO:cgpos.utils.util:Importing /home/tejomay/cgpos/data/reference/target_map.pkl


In [5]:
for class_i, class_name in enumerate(target_map[0]):
    alpha = 1
    ngram_range = (1, 5)
    X_train, X_test, y_train, y_test = train_test_split(
        features, np.array([target[class_i] for target in targets]), 
        train_size=0.8, random_state=20
    )
    mnb = MultinomialNaiveBayes(alpha=alpha, ngram_range=ngram_range)
    mnb.fit(X_train, y_train)
    y_pred = mnb.predict(X_test)
    accuracy = np.mean(y_pred == y_test)
    logging.info(f"Multinomial Naive Bayes (alpha={alpha}, ngram_range={ngram_range}) accuracy on {class_name}: {accuracy * 100:.2f}%")

INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on pos: 87.53%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on pers: 96.92%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on num: 94.96%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on tense: 96.80%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on mood: 97.01%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on voice: 97.47%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on gend: 92.31%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on case: 92.96%
INFO:root:Multinomial Naive Bayes (alpha=1, ngram_range=(1, 5)) accuracy on degree: 99.74%
