# Importing Library And Data

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mpt

In [42]:
df = pd.read_csv('../dataset/data_clean150.csv')
df.shape

(150, 5)

In [4]:
df = df.astype({'Value' : 'category'})
df = df.astype({'stemmed' : 'string'})
df.dtypes

Konten              object
Token               object
stopword            object
stemmed     string[python]
Value             category
dtype: object

# Feature Extraction With TF-IDF

In [30]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer

tf = TfidfVectorizer()
text_tf = tf.fit_transform(df['stemmed'])

In [31]:
x = df['stemmed']
y = df['Value']

# Split Data

In [32]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(text_tf, df['Value'], test_size=0.2, random_state=5)

In [33]:
# tf = TfidfVectorizer()

# ctmTr = tf.fit_transform(x_train) 
# X_test_dtm = tf.transform(x_test)


In [34]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import sklearn.naive_bayes as nb

clf = nb.MultinomialNB().fit(x_train, y_train)
predicted_clf = clf.predict(x_test)

In [35]:
print("MultinomialNB Accuracy: ", accuracy_score(y_test, predicted_clf))
print("MultinomialNB Precision: ", precision_score(y_test, predicted_clf, average='weighted'))
print("MultinomialNB Recall: ", recall_score(y_test, predicted_clf, average='weighted'))
print("MultinomialNB f1_score: ", f1_score(y_test, predicted_clf, average='weighted'))


print(f"Confussion matrix: \n {confusion_matrix(y_test,predicted_clf)}")
print("===============================================================")
print(classification_report(y_test, predicted_clf, zero_division=0))

MultinomialNB Accuracy:  0.7
MultinomialNB Precision:  0.5358024691358025
MultinomialNB Recall:  0.7
MultinomialNB f1_score:  0.6056737588652482
Confussion matrix: 
 [[ 0  0  7]
 [ 0  2  1]
 [ 0  1 19]]
              precision    recall  f1-score   support

     NEGATIF       0.00      0.00      0.00         7
      NETRAL       0.67      0.67      0.67         3
     POSITIF       0.70      0.95      0.81        20

    accuracy                           0.70        30
   macro avg       0.46      0.54      0.49        30
weighted avg       0.54      0.70      0.61        30



  _warn_prf(average, modifier, msg_start, len(result))


# Save Model And Try to reuse it

## Using Pickle

In [11]:
import joblib as jl
import pickle

In [12]:
with open('../outputModel/modelNaiveBayes', 'wb') as f:
    pickle.dump(clf, f)

In [13]:
with open('../outputModel/modelNaiveBayes', 'rb') as f:
    model = pickle.load(f)

In [14]:
usingModel = model.predict(x_test)


In [15]:
print(accuracy_score(y_test, usingModel))

0.7


# Using Joblib

In [27]:
jl.dump(clf,'../outputModel/modelNaiveBayesJl.joblib')

['../outputModel/modelNaiveBayesJl.joblib']

In [46]:
classifier = jl.load('../outputModel/modelNaiveBayesJl.pkl')

In [60]:
sentence = [['Seorang anak membunuh kakanya sendiri karna tidak di berikan remote TV']]
tf = TfidfVectorizer()
vect = tf.fit_transform(df['stemmed'])
vect.shape

(150, 5600)

In [54]:
usingmodel = classifier.predict(text_tf)

In [55]:
print(usingModel)

['POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'NETRAL' 'POSITIF' 'NETRAL' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'NETRAL' 'POSITIF'
 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF' 'POSITIF'
 'POSITIF' 'POSITIF']


In [56]:
print(len(usingModel))

30
