# Importing Library And Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mpt

In [2]:
df = pd.read_csv('../dataset/titleDataClean200.csv')
df.shape

(200, 5)

In [3]:
df = df.astype({'label' : 'category'})
df = df.astype({'stemmed' : 'string'})
df.dtypes

title               object
Token               object
stopword            object
stemmed     string[python]
label             category
dtype: object

# Feature Extraction With TF-IDF

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

tf = TfidfVectorizer()
text_tf = tf.fit_transform(df['stemmed'])

# Split Data

In [5]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(text_tf, df['label'], test_size=0.2, random_state=5)

In [7]:
# tf = TfidfVectorizer()

# ctmTr = tf.fit_transform(x_train) 
# X_test_dtm = tf.transform(x_test)


In [6]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import sklearn.naive_bayes as nb

clf = nb.MultinomialNB().fit(x_train, y_train)
predicted_clf = clf.predict(x_test)

In [7]:
print("MultinomialNB Accuracy: ", accuracy_score(y_test, predicted_clf))
print("MultinomialNB Precision: ", precision_score(y_test, predicted_clf, average='weighted'))
print("MultinomialNB Recall: ", recall_score(y_test, predicted_clf, average='weighted'))
print("MultinomialNB f1_score: ", f1_score(y_test, predicted_clf, average='weighted'))


print(f"Confussion matrix: \n {confusion_matrix(y_test,predicted_clf)}")
print("===============================================================")
print(classification_report(y_test, predicted_clf, zero_division=0))

MultinomialNB Accuracy:  0.7
MultinomialNB Precision:  0.8008333333333333
MultinomialNB Recall:  0.7
MultinomialNB f1_score:  0.6300340136054422
Confussion matrix: 
 [[ 7  1  0]
 [ 0 19  0]
 [ 1 10  2]]
              precision    recall  f1-score   support

           0       0.88      0.88      0.88         8
           1       0.63      1.00      0.78        19
           2       1.00      0.15      0.27        13

    accuracy                           0.70        40
   macro avg       0.84      0.68      0.64        40
weighted avg       0.80      0.70      0.63        40



# Save Model And Try to reuse it

## Using Pickle

In [11]:
import joblib as jl
import pickle

In [12]:
with open('../outputModel/modelNaiveBayes', 'wb') as f:
    pickle.dump(clf, f)

In [13]:
with open('../outputModel/modelNaiveBayes', 'rb') as f:
    model = pickle.load(f)

In [14]:
usingModel = model.predict(x_test)


In [15]:
print(accuracy_score(y_test, usingModel))

0.7


# Using Joblib

In [27]:
jl.dump(clf,'../outputModeltitle/modelNaiveBayesJl.pkl')

['../outputModel/modelNaiveBayesJl.joblib']

In [46]:
classifier = jl.load('../outputModel/modelNaiveBayesJl.pkl')

In [60]:
sentence = [['Seorang anak membunuh kakanya sendiri karna tidak di berikan remote TV']]
tf = TfidfVectorizer()
vect = tf.fit_transform(df['stemmed'])
vect.shape

(150, 5600)

In [54]:
usingmodel = classifier.predict(text_tf)

In [55]:
print(usingModel)

['POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'NETRAL' 'POSITIF' 'NETRAL' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'NETRAL' 'POSITIF'
 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'POSITIF']


In [56]:
print(len(usingModel))

30
