In [3]:
import pandas as pd
from joblib import load
from modul import praproses as pps
from modul import stemnstopword as stm
from modul import normalisasi as nrm
from modul import replace_karakter as rk

In [4]:
def praproses (teks):
    teks = rk.normalisasikarakter(teks)
    teks = pps.case_folding(teks)
    teks = nrm.normalisasi(teks)
    teks = stm.stemmer_kata(teks)
    teks = stm.stop_word(teks)
    return teks

def praproses_exe (teks):
    if type(teks) == list:
        print("list")
        l = list()
        for i in teks:
            l.append(praproses(i))
        return l
    else:
        return [praproses(teks)]

# Load Model

In [5]:
tfidf = load("model/tfidf.w")
svm = load("model/svm.m")
mnb = load("model/mnb.m")

# Load Data Uji

In [6]:
data_uji = pd.read_excel("komentar/data_uji_100.xlsx")

komentar = data_uji.komentar.tolist()
komentar_praproses = data_uji.komentar.tolist()
label = data_uji.label.tolist()

#len(data_uji)
data_uji.head()

Unnamed: 0,Column1,label,komentar
0,0,spam,Sennengg banngget dehh biisaa kenall sammaa pe...
1,1,spam,😘😍Maaf Kakaa_endorsee yaa@DOKTER_TINGGI_LANGSI...
2,2,spam,Bantu like https://www.instagram.com/p/BmdbxaV...
3,3,spam,𝐒𝐞𝐠𝐞𝐫𝐚 𝐑𝐚𝐢𝐡 𝐊𝐞𝐦𝐞𝐧𝐚𝐧𝐠𝐚𝐧 𝐓𝐞𝐫𝐛𝐞𝐬𝐚𝐫 𝐘𝐚𝐧𝐠 𝐌𝐮𝐝𝐚𝐡 𝐔𝐧𝐭...
4,4,spam,Ａｓｓａｌａｍｕａｌａｉｋｕｍ #pënïnġġï_tërpërċäÿä_müräh #pë...


# Melakukan Prediksi terhadap data uji

In [18]:
komentar = praproses_exe(komentar)

hasil_prediksi_svm = svm.predict(tfidf.transform(komentar))
hasil_prediksi_mnb = mnb.predict(tfidf.transform(komentar))

list


In [20]:
 dictt = {
     "prediksi_mnb":hasil_prediksi_mnb,
     "prediksi_svm":hasil_prediksi_svm,
     "komentar":komentar_praproses,
     "label":label
 }

 df = pd.DataFrame.from_dict(dictt)
 df.to_excel("komentar/data_hasil_prediksi.xlsx")
 df

Unnamed: 0,prediksi_mnb,prediksi_svm,komentar,label
0,spam,spam,Sennengg banngget dehh biisaa kenall sammaa pe...,spam
1,spam,spam,😘😍Maaf Kakaa_endorsee yaa@DOKTER_TINGGI_LANGSI...,spam
2,non spam,spam,Bantu like https://www.instagram.com/p/BmdbxaV...,spam
3,spam,spam,𝐒𝐞𝐠𝐞𝐫𝐚 𝐑𝐚𝐢𝐡 𝐊𝐞𝐦𝐞𝐧𝐚𝐧𝐠𝐚𝐧 𝐓𝐞𝐫𝐛𝐞𝐬𝐚𝐫 𝐘𝐚𝐧𝐠 𝐌𝐮𝐝𝐚𝐡 𝐔𝐧𝐭...,spam
4,spam,spam,Ａｓｓａｌａｍｕａｌａｉｋｕｍ #pënïnġġï_tërpërċäÿä_müräh #pë...,spam
5,spam,spam,MINUS & SILINDER turun hanya dalam 1-2 minggu ...,spam
6,spam,spam,Yang butuh pekerjaan untuk daerah jkarta dan s...,spam
7,spam,spam,"PRORMO SPESIAL RAMADAN, KHUSUS BELI 2 UNIT DAP...",spam
8,spam,spam,"Indihome menyediakan 3 paket, diantaranya. 3P ...",spam
9,spam,spam,Hi Everyone .......💗💗💗 Rumah Idaman bukan tida...,spam


# Confusion Matrix

In [7]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

labels = list(reversed(list(set(label))))
y_true = label

for i in labels:
    print(i)

spam
non spam


## 1. Hasil Prediksi MNB

In [22]:
y_pred = hasil_prediksi_mnb

cf = confusion_matrix(y_true, y_pred, labels=labels)
print(cf)

tn, fp, fn, tp = cf.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision*recall)/(precision+recall))

print(" accuracy:",round(accuracy_score(y_true, y_pred), 2))
print("precision:",precision)
print("   recall:",recall)
print("       f1:",f1)

[[48  2]
 [ 5 45]]
 accuracy: 0.93
precision: 0.9574468085106383
   recall: 0.9
       f1: 0.9278350515463918


## 2. Hasil Prediksi SVM

In [23]:
y_pred = hasil_prediksi_svm

cf = confusion_matrix(y_true, y_pred, labels=labels)
print(cf)

tn, fp, fn, tp = cf.ravel()

precision = tp/(tp+fp)
recall = tp/(tp+fn)
f1 = 2*((precision*recall)/(precision+recall))

print(" accuracy:",accuracy_score(y_true, y_pred))
print("precision:",precision)
print("   recall:",recall)
print("       f1:",f1)

[[49  1]
 [ 1 49]]
 accuracy: 0.98
precision: 0.98
   recall: 0.98
       f1: 0.98
