In [5]:
import pandas as pd
import numpy as np
import pickle

from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [7]:
class Model:
    def __init__(self, X=None, Y=None, file_path="MultinomialNB.pkl"):
        self.vectorizer = None,
        self.model = None,
        if Path.exists(Path(file_path)):
            self.vectorizer, self.model = pickle.load(open(file_path, "rb"))
        else:
            self.model = MultinomialNB()
            x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
            
            # Training Model
            self.model.fit(x_train, y_train)
            
            # Training Results stats
            predicted = self.model.predict(x_test)
            if debug:
                print(f"Accuracy: {accuracy_score(y_test, predicted)}")
                print(f"Classification Report:")
                print(classification_report(y_test, predicted))
                
            pickled_tuple = (self.vectorizer, self.model)
            pickle.dump(pickled_tuple, open(file_path, 'wb'))
            
    def predict(self, test):
        if not isinstance(test, pd.Series):
            test = pd.Series([test])
        test = self.vectorizer.transform(test)
        return self.model.predict(test)
    