# 5. Data Extracting

In [1]:
import pandas as pd

data_clean = pd.read_csv('Dataset/ulasanapp_3000_hasillabeling.csv')
data_clean = pd.DataFrame(data_clean)

A = data_clean['text_tokens_stemmed']
B = data_clean['Label']

In [2]:
data_clean = data_clean.drop(columns=['score', 'text_clean', 'text_Stopword', 'text_tokens'])
# data_clean.drop(columns='content')

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Handle missing values by filling them with an empty string

A = A.fillna('')

tfid_vectorizer = TfidfVectorizer()

A_fit_tfid = tfid_vectorizer.fit_transform(A)
A_tfid = tfid_vectorizer.transform(A)

In [4]:
print(A.shape)
print(A_tfid.shape)

(548,)
(548, 1852)


In [5]:
A_tfid.toarray()

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.33261949],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

# 6. Penerapan Algoritma

# a. Naive Bayes

In [6]:
from sklearn.naive_bayes import MultinomialNB

nb = MultinomialNB()
nb.fit(A_tfid, B)

In [7]:
B_pred = nb.predict(A_tfid)
data_clean['Label NB'] = B_pred
data_clean = pd.DataFrame(data_clean)
data_clean.to_csv('Dataset/ulasanapp_3000_hasil_sentimen_NB.csv', index=False)

data_clean.head()

Unnamed: 0,content,text_tokens_stemmed,Label,Label NB
0,"Terletak di depan Lapangan Jetayu, menempati b...",letak lapang jetayu tempat bangun tua khas tin...,positif,positif
1,"Baru pertama kemuseum ini, dan dari beberapa m...",museum museum batik yg kunjung koleksi museum ...,positif,positif
2,Koleksi batik lengkap dan bisa hands-on nyoba ...,koleksi batik lengkap handson nyoba batik muda...,positif,positif
3,Akhirnya kesampean buat mampir ke museum ini. ...,sampean mampir museum htm 7 ribu rupiah unjung...,positif,positif
4,"Suka sekali, tiket nya Murmer cm 7000 per org....",suka tiket nya murmer cm 7000 org bs batik trs...,positif,positif


# b. Support Vector Machine (SVM)

In [8]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

for c in [0.01, 0.05, 0.25, 0.5, 0.75, 1]:
    svm = LinearSVC(C=c)
    svm.fit(A_tfid, B)
    print ("Accuracy for C=%s: %s" % (c, accuracy_score(B, svm.predict(A_tfid))))

svm = LinearSVC(C = 1)
svm.fit(A_tfid, B)

Accuracy for C=0.01: 0.9708029197080292
Accuracy for C=0.05: 0.9708029197080292
Accuracy for C=0.25: 0.9708029197080292
Accuracy for C=0.5: 0.9817518248175182
Accuracy for C=0.75: 0.9927007299270073
Accuracy for C=1: 0.9927007299270073


In [9]:
B_pred = svm.predict(A_tfid)
data_clean['Label SVM'] = B_pred
data_clean.to_csv('Dataset/ulasanapp_3000_hasil_sentimen_SVM.csv', index=False)
data_clean.head()

Unnamed: 0,content,text_tokens_stemmed,Label,Label NB,Label SVM
0,"Terletak di depan Lapangan Jetayu, menempati b...",letak lapang jetayu tempat bangun tua khas tin...,positif,positif,positif
1,"Baru pertama kemuseum ini, dan dari beberapa m...",museum museum batik yg kunjung koleksi museum ...,positif,positif,positif
2,Koleksi batik lengkap dan bisa hands-on nyoba ...,koleksi batik lengkap handson nyoba batik muda...,positif,positif,positif
3,Akhirnya kesampean buat mampir ke museum ini. ...,sampean mampir museum htm 7 ribu rupiah unjung...,positif,positif,positif
4,"Suka sekali, tiket nya Murmer cm 7000 per org....",suka tiket nya murmer cm 7000 org bs batik trs...,positif,positif,positif


In [10]:
data_clean.to_csv('Dataset/ulasanapp_3000_hasil_sentimen_NB_dan_SVM.csv', index=False)

In [11]:
loaddataclean = pd.read_csv('Dataset/ulasanapp_3000_hasil_sentimen_NB_dan_SVM.csv')
loaddataclean.head()

Unnamed: 0,content,text_tokens_stemmed,Label,Label NB,Label SVM
0,"Terletak di depan Lapangan Jetayu, menempati b...",letak lapang jetayu tempat bangun tua khas tin...,positif,positif,positif
1,"Baru pertama kemuseum ini, dan dari beberapa m...",museum museum batik yg kunjung koleksi museum ...,positif,positif,positif
2,Koleksi batik lengkap dan bisa hands-on nyoba ...,koleksi batik lengkap handson nyoba batik muda...,positif,positif,positif
3,Akhirnya kesampean buat mampir ke museum ini. ...,sampean mampir museum htm 7 ribu rupiah unjung...,positif,positif,positif
4,"Suka sekali, tiket nya Murmer cm 7000 per org....",suka tiket nya murmer cm 7000 org bs batik trs...,positif,positif,positif
