In [2]:
import pandas as pd
from joblib import load
from modul import praproses as pps
from modul import stemnstopword as stm
from modul import normalisasi as nrm
from modul import replace_karakter as rk

In [3]:
def praproses (teks):
    teks = rk.normalisasikarakter(teks)
    teks = pps.case_folding(teks)
    teks = nrm.normalisasi(teks)
    teks = stm.stemmer_kata(teks)
    teks = stm.stop_word(teks)
    return teks

def praproses_exe (teks):
    if type(teks) == list:
        print("list")
        l = list()
        for i in teks:
            l.append(praproses(i))
        return l
    else:
        return [praproses(teks)]

# Load Model

In [4]:
tfidf = load("model/tfidf.w")
svm = load("model/svm.m")
mnb = load("model/mnb.m")

# Load Data Uji

In [8]:
data_uji = pd.read_excel("komentar/data_uji_100.xlsx")

komentar = data_uji.komentar.tolist()
label = data_uji.label.tolist()

#len(data_uji)
data_uji.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,komentar,label
295,407.0,424,siapa blg mslh blg mslah cma ksih sran aja alx...,non spam
296,384.0,250,sllu ajar usaha tahan kualitas costumer neng s...,spam
297,1126.0,103,aasi ane o a uda nenain odu yan sana eui eecay...,spam
298,632.0,16,awal ny ga percaya aku coba nyata hasil cepet ...,spam
299,,4756,film rillis tgl teh fan ily from 3800ft sama k...,non spam


# Melakukan Prediksi terhadap data uji

In [9]:
komentar = praproses(komentar)

hasil_predisksi_svm = svm.predict(tfidf.transform(komentar))
hasil_predisksi_mnb = mnb.predict(tfidf.transform(komentar))

list


In [6]:
# dictt = {
#     "komentar":komentar_praproses,
#     "label":label
# }
# df = pd.DataFrame.from_dict(dictt)
# df.to_excel("komentar/data_uji_1255_bersih.xlsx")
# df

# Confusion Matrix

In [10]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

labels = list(reversed(list(set(label))))
y_true = label

for i in labels:
    print(i)

non spam
spam


## 1. Hasil Prediksi MNB

In [11]:
y_pred = hasil_predisksi_mnb

cf = confusion_matrix(y_true, y_pred, labels=labels)
print(cf)

tn, fp, fn, tp = cf.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision*recall)/(precision+recall))

print(" accuracy:",round(accuracy_score(y_true, y_pred), 2))
print("precision:",precision)
print("   recall:",recall)
print("       f1:",f1)

[[115   6]
 [  4 175]]
 accuracy: 0.97
precision: 0.9668508287292817
   recall: 0.9776536312849162
       f1: 0.9722222222222221


## 2. Hasil Prediksi SVM

In [12]:
y_pred = hasil_predisksi_svm

cf = confusion_matrix(y_true, y_pred, labels=labels)
print(cf)

tn, fp, fn, tp = cf.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision*recall)/(precision+recall))

print(" accuracy:",accuracy_score(y_true, y_pred))
print("precision:",precision)
print("   recall:",recall)
print("       f1:",f1)

[[117   4]
 [  4 175]]
 accuracy: 0.9733333333333334
precision: 0.9776536312849162
   recall: 0.9776536312849162
       f1: 0.9776536312849162
