In [1]:
import numpy as np
import pandas as pd
import io, os
import nltk, time, re, string, pickle
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from string import punctuation
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory, StopWordRemover, ArrayDictionary
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from sklearn.metrics import confusion_matrix

In [2]:
import ast 
def get_file():
    with open('dataset/normalisasi.txt') as f:
        data_normalisai = f.read()
    normalization_words = ast.literal_eval(data_normalisai)

    with open('dataset/stopwords.txt') as f:
        data_stopwords = f.read()
        stopwords = ast.literal_eval(data_stopwords)

    return normalization_words, stopwords

normalization_words, stopwords = get_file()

In [3]:
def normalisasi(texts):
    finalText = []
    splitted_text = texts.split()
    for text in splitted_text:
        if text in normalization_words:
            finalText.append(normalization_words[text])
        else:
            finalText.append(text)
      
    return " ".join(finalText)

In [4]:
def hapus_stopword(text):
    stopword_factory = stopwords

    sw_dict = ArrayDictionary(stopword_factory)

    temp = StopWordRemover(sw_dict)

    text = temp.remove(text)
    return text

In [5]:
def hapus_duplikasi_kata(text):
    res = []
    text = text.split()
    for i in text:
        if i in res:
            text.remove(i)
        else:
            res.append(i)
    return " ".join(text)

In [6]:
def stemming(text):
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    text = stemmer.stem(text)
    return text

In [7]:
def case_folding(text):
    text = text.lower()
    # remove space in front of and at the end text
    text = text.strip()
    # remove space
    text = re.sub(r'\s+', ' ', text)
    # remove number
    text = re.sub(r"\d+", " ", text)
    # remove punctuation
    for i in text:
        if i in list(string.punctuation):
            text = text.replace(i, " ")

    return text

In [8]:
def preprocessing_data(opinion):
    opinion = case_folding(opinion)
    opinion = normalisasi(opinion)
    opinion = hapus_stopword(opinion)
    opinion = hapus_duplikasi_kata(opinion)
    opinion = stemming(opinion)
    return opinion

In [9]:
train=pd.read_excel("DataKuesioner_Done.xlsx")
train.head()

Unnamed: 0,Aspect,Sentiment,Tempat,Opinion
0,Tempat,Positif,4,Karena tempatnya memadai dan antrian teratur
1,Tempat,Negatif,2,lumayan berdesakan dan tdk menerapkan social d...
2,Tempat,Positif,3,terkadang terlalu ramai
3,Tempat,Positif,4,Tempat bersih dan luas
4,Tempat,Positif,4,Tempat vaksinasi sangat layak karena pihak pen...


In [10]:
test=pd.read_excel("app/dataset/DatasetMedsosPlus.xlsx")
test.head()

Unnamed: 0,Opinion,Aspect,Sentiment
0,Vaksinasi saat ini sudah mudah ditemukan diman...,Informasi,Positif
1,banyak infonya,Informasi,Positif
2,Karena dapat info vaksin hanya dari kenalan yg...,Informasi,Negatif
3,Jd ak puas km ak akhirnya tny ke temenku dan d...,Informasi,Positif
4,Puas krn informasi udh bnyk beredar d twitter ...,Informasi,Positif


In [11]:
# Save to txt for vocabulary
train_vocab = []
for i,row in train.iterrows():
    preprocessedVocab = preprocessing_data(row['Opinion'])
    train_vocab.append(preprocessedVocab)
file_vocab.write(str(train_vocab))

# Experiments

In [12]:
from sklearn.model_selection import KFold 
from sklearn import metrics
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
from sklearn.feature_extraction.text import CountVectorizer

In [14]:
# model = pickle.load(open('app/model/model_gridsearch_2/lgbm_sentiment_K.sav','rb'))
# model_aspect = pickle.load(open('app/model/model_gridsearch/lgbm_aspect_K.sav','rb'))

model = pickle.load(open('app/model/model_gridsearch_2/rf_sentiment_K.sav','rb'))
model_aspect = pickle.load(open('app/model/model_gridsearch_2/rf_aspect_K_2.sav','rb'))

# model = pickle.load(open('app/model/model_gridsearch_2/svm_sentiment_K.sav','rb'))
# model_aspect = pickle.load(open('app/model/model_gridsearch_2/svm_aspect_K.sav','rb'))

# model = pickle.load(open('app/model/model_baru/nb_sentiment_K.sav','rb'))
# model_aspect = pickle.load(open('app/model/model_baru/nb_aspect_K.sav','rb'))

In [14]:
count_vect = CountVectorizer(lowercase='false')
count_vect.fit(train_vocab)

CountVectorizer(lowercase='false')

### Note: 

Stem = stemming
Case = case_folding
N = Normalization / normalisasi
Stop = Stopword Removal / hapus_stopword
D = Duplicate Words Removal / hapus_duplikasi_kata

#  All Prep / Stem + Case + N + Stop + D 

In [15]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = preprocessing_data(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = preprocessing_data(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')

model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')    
    
print("ALL:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

ALL:
:Sentiment:
Accuracy:  0.8494623655913979
Precision:  0.7930992235477894
Recall:  0.8351457840819543
f1_score:  0.8093301007208293
:Aspect:
Accuracy:  0.8261648745519713
Precision:  0.8344511163860249
Recall:  0.8253296295859801
f1_score:  0.8272740879575263






109 26 58 365
[[101   4   4   5   8]
 [  1  79   1  24   2]
 [  0   0  90   0  10]
 [  1  14   0  82   7]
 [  1   3   1  11 109]]


# Stem

In [16]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = stemming(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = stemming(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')  

print("STEM:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    

print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

STEM:
:Sentiment:
Accuracy:  0.8422939068100358
Precision:  0.783799120442616
Recall:  0.8152876280535855
f1_score:  0.796812155519142
:Aspect:
Accuracy:  0.8315412186379928
Precision:  0.8381051467349024
Recall:  0.8312619485922381
f1_score:  0.8324726388374108






103 32 56 367
[[103   2   4   6   7]
 [  1  82   1  22   1]
 [  1   0  89   1   9]
 [  1  10   0  84   9]
 [  0   5   5   9 106]]


# Case Folding + N

In [17]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = case_folding(str(row['Opinion']))
    processedText = normalisasi(processedText)
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = case_folding(str(row['Opinion']))
    processedText = normalisasi(processedText)
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)


X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')         
    
print("CF+N:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')       
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

CF+N:
:Sentiment:
Accuracy:  0.8207885304659498
Precision:  0.7574123989218329
Recall:  0.785973207249803
f1_score:  0.7691047221808431
:Aspect:
Accuracy:  0.8118279569892473
Precision:  0.817004011102382
Recall:  0.8110193609975133
f1_score:  0.8117342984450675






97 38 62 361
[[ 98   6   6   4   8]
 [  2  78   0  21   6]
 [  0   0  88   0  12]
 [  2  12   3  81   6]
 [  0   6   4   7 108]]


#  Stop + D

In [18]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = hapus_stopword(str(row['Opinion']))
    processedText = hapus_duplikasi_kata(processedText)
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = hapus_stopword(str(row['Opinion']))
    processedText = hapus_duplikasi_kata(processedText)
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)


X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')    
    
print("SW+D:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

SW+D:
:Sentiment:
Accuracy:  0.8082437275985663
Precision:  0.7402941176470588
Recall:  0.7575256107171
f1_score:  0.7479004370131104
:Aspect:
Accuracy:  0.7885304659498208
Precision:  0.7918037233037233
Recall:  0.7858826649067188
f1_score:  0.7875002029255362






89 46 61 362
[[100   5   8   2   7]
 [  2  78   2  18   7]
 [  3   0  81   0  16]
 [  3  12   4  76   9]
 [  3   5   4   8 105]]


# Stem + Case + N

In [31]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = stemming(str(row['Opinion']))
    processedText = case_folding(processedText)
    processedText = normalisasi(processedText)
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = stemming(str(row['Opinion']))
    processedText = case_folding(processedText)
    processedText = normalisasi(processedText)
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')

model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')     
    
print("S+CF+N:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

S+CF+N:
:Sentiment:
Accuracy:  0.8530465949820788
Precision:  0.7971111283684087
Recall:  0.8349881796690308
f1_score:  0.8122938579937973
:Aspect:
Accuracy:  0.8297491039426523
Precision:  0.8286510560146925
Recall:  0.828492953530306
f1_score:  0.8278667631062687






108 27 55 368
[[104   4   4   7   3]
 [  1  91   1  14   0]
 [  7   0  88   1   4]
 [  2  16   4  74   8]
 [  3   9   3   4 106]]


# Stem + Stop + D

In [20]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = stemming(str(row['Opinion']))
    processedText = hapus_stopword(processedText)
    processedText = hapus_duplikasi_kata(processedText)
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = stemming(str(row['Opinion']))
    processedText = hapus_stopword(processedText)
    processedText = hapus_duplikasi_kata(processedText)
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')

model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')        
    
print("STEM+SW+D:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')      
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

STEM+SW+D:
:Sentiment:
Accuracy:  0.8440860215053764
Precision:  0.7859493670886076
Recall:  0.8164696611505122
f1_score:  0.7986804291300869
:Aspect:
Accuracy:  0.8082437275985663
Precision:  0.8177699152875559
Recall:  0.8061738742030148
f1_score:  0.8093068939721049






103 32 55 368
[[ 99   3   5   5  10]
 [  2  81   1  20   3]
 [  1   0  85   1  13]
 [  3  13   0  77  11]
 [  0   3   2  11 109]]


#  Case + N + Stop + D

In [21]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = case_folding(str(row['Opinion']))
    processedText = normalisasi(processedText)
    processedText = hapus_stopword(processedText)
    processedText = hapus_duplikasi_kata(processedText)
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = case_folding(str(row['Opinion']))
    processedText = normalisasi(processedText)
    processedText = hapus_stopword(processedText)
    processedText = hapus_duplikasi_kata(processedText)
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)


X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')

model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')      
    
print("CF+N+SW+D:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')  
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

print('\n')
print('\n')
print('\n')
tn, fp, fn, tp = confusion_matrix(y_sentiment_test, pred_values).ravel()
print(tn, fp, fn, tp)
print(confusion_matrix(y_aspect_test,pred_aspect_values))

CF+N+SW+D:
:Sentiment:
Accuracy:  0.8243727598566308
Precision:  0.7655144801815027
Recall:  0.8135539795114264
f1_score:  0.7820034443168771
:Aspect:
Accuracy:  0.7831541218637993
Precision:  0.7904221849966819
Recall:  0.7814177393312983
f1_score:  0.7832823984861899






107 28 70 353
[[ 96   6   7   3  10]
 [  1  76   0  23   7]
 [  0   1  86   0  13]
 [  2  14   3  73  12]
 [  3   4   4   8 106]]


# Normalisasi

In [22]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = normalisasi(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = normalisasi(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')  

print("N:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')     
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

N:
:Sentiment:
Accuracy:  0.8154121863799283
Precision:  0.7518261730840659
Recall:  0.7849487785657998
f1_score:  0.7647237016083805
:Aspect:
Accuracy:  0.8010752688172043
Precision:  0.8065570448132384
Recall:  0.8002118843619993
f1_score:  0.800706772155048


# Case Folding

In [23]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = case_folding(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = case_folding(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')    
    
print("CF:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

CF:
:Sentiment:
Accuracy:  0.8010752688172043
Precision:  0.7324913892078071
Recall:  0.7553191489361701
f1_score:  0.7420056232427366
:Aspect:
Accuracy:  0.7939068100358423
Precision:  0.7977552353159771
Recall:  0.7924580382081532
f1_score:  0.7927687753592786


# Stopwords

In [24]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = hapus_stopword(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = hapus_stopword(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')   
    
print("SW:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')    
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

SW:
:Sentiment:
Accuracy:  0.8064516129032258
Precision:  0.7393776704734978
Recall:  0.7639085894405043
f1_score:  0.7495345125681607
:Aspect:
Accuracy:  0.7939068100358423
Precision:  0.7969810625778871
Recall:  0.7910844303543859
f1_score:  0.7928041214125927


#  Duplicate

In [25]:
X_train_processed=[]
X_test_processed=[]
y_sentiment_train =[]
y_aspect_train =[]
y_sentiment_test=[]
y_aspect_test=[]

for index, row in train.iterrows():
    processedText = hapus_duplikasi_kata(str(row['Opinion']))
    X_train_processed.append(processedText)
    y_sentiment_train.append(row['Sentiment'])
    y_aspect_train.append(row['Aspect'])

for index, row in test.iterrows():
    processedText = hapus_duplikasi_kata(str(row['Opinion']))
    X_test_processed.append(processedText)
    y_sentiment_test.append(row['Sentiment'])
    y_aspect_test.append(row['Aspect'])
    
    
X_train_vect = count_vect.transform(X_train_processed)
X_test_vect = count_vect.transform(X_test_processed)

X_train_vect = X_train_vect.toarray().astype(float)
X_train_vect = np.array(X_train_vect, dtype='float64')

X_test_vect = X_test_vect.toarray().astype(float)
X_test_vect = np.array(X_test_vect, dtype='float64')


model.fit(X_train_vect,y_sentiment_train)
pred_values = model.predict(X_test_vect)

acc = metrics.accuracy_score(y_sentiment_test, pred_values)
precision = metrics.precision_score(y_sentiment_test, pred_values, average='macro')
recall = metrics.recall_score(y_sentiment_test,pred_values, average='macro')
f1 = metrics.f1_score(y_sentiment_test,pred_values, average='macro')    
    
print("D:")
print(":Sentiment:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

model_aspect.fit(X_train_vect,y_aspect_train)
pred_aspect_values = model_aspect.predict(X_test_vect)

acc = metrics.accuracy_score(y_aspect_test, pred_aspect_values)
precision = metrics.precision_score(y_aspect_test, pred_aspect_values, average='macro')
recall = metrics.recall_score(y_aspect_test,pred_aspect_values, average='macro')
f1 = metrics.f1_score(y_aspect_test,pred_aspect_values, average='macro')   
    
print(":Aspect:")
print("Accuracy: ", acc)
print("Precision: ", precision)
print("Recall: ", recall)
print("f1_score: ", f1)

D:
:Sentiment:
Accuracy:  0.8154121863799283
Precision:  0.7506329113924051
Recall:  0.7773837667454688
f1_score:  0.7616561402344706
:Aspect:
Accuracy:  0.7939068100358423
Precision:  0.7962484815065283
Recall:  0.7918512286242942
f1_score:  0.7921259533525017
