In [15]:
import csv
import pandas as pd 

from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

#-------------------------------------------------------------
#Reading file from \Tweet.csv\
def baca_file():

    csvF1 = 'Tweet.csv'
# TAMBAHKAN ENCODING karena utf-8 error coba latin1
    #Open file Tweet.csv to manipulate  
    with open(csvF1,"r",encoding='latin1') as rCsv:
        readCsv = csv.reader(rCsv, delimiter = ';')
        read = []
        for row in readCsv:
            if len(row) != 0:
                read = read + [row]

    rCsv.close()
    return(read)

    #--------------------------------------------------------------
    #Procedure for displaying the result to the console
def tampil_csv(f2):
    df3 = pd.DataFrame(f2)
    print(df3)

    #--------------------------------------------------------------  
    #Function stemming and return the value of feature and target
def stemmingFile(fCsv):

    #---Define a new list for temporary reading---#
    rList = []
    eList =[]
    
    #---initialization a stopword by Sastrawi---#
    facto  = StopWordRemoverFactory()
    stopwords = facto.create_stop_word_remover()

    #---Looping to read line by line csv file---#  
    for idx in fCsv:
        rList.append(stopwords.remove(idx[0]))

        #---change every word in target to new value---#
        if idx[1] == 'Keluhan':
            eList.append('1')
        elif idx[1]== 'Respon':
            eList.append('2')
        else:
            eList.append('3')
        #--- end of IF ---#

        #--- end of looping ---#
    return (rList,eList)#parameter return

    #-------------------------------------------------------------
    #procedure to classify every sample in Tweeter.csv
def classiLogRegressi(lRead, rRead):

    #---setting validation 20% fromm data sample---#
    validation_size = 0.20
    seed = 7
    X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(lRead, rRead, test_size=validation_size, random_state=seed)

    #---TF-IDF vectorizer, collecting value into vector---#
    w = TfidfVectorizer()

    print('Logistic Regresion')
    logistic = LogisticRegression()
    logistic = Pipeline([
    ('xPipe',w),
    ('knn', logistic)])

    logistic.fit(X_train, Y_train)
    predictions = logistic.predict(X_validation)

    print('Akurasi = ', accuracy_score(Y_validation, predictions))
    print('Matrix Confussion')
    print(confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))

    return(logistic)

    #------------------------------------------------------------
def classKNeighborsClassifier(lRead, rRead):

    #---setting validation 20% fromm data sample---#
    validation_size = 0.20
    seed = 7
    X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(lRead, rRead, test_size=validation_size, random_state=seed)

    #---TF-IDF vectorizer, collecting value into vector---#
    w = TfidfVectorizer()
    #   
    #---classification using K-NN---# 
    print('K-Neighborhood ')
    knn = KNeighborsClassifier()
    knn = Pipeline([
    ('xPipe',w),
    ('knn', knn)])

    knn.fit(X_train, Y_train)
    predictions = knn.predict(X_validation)
    print('Akurasi = ', accuracy_score(Y_validation, predictions))
    print('Matrix Confussion')
    print(confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))

    return(knn)

    #-------------------------------------------------------------
def classDecisionTree(lRead, rRead):

    #---setting validation 20% fromm data sample---#
    validation_size = 0.20
    seed = 7
    X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(lRead, rRead, test_size=validation_size, random_state=seed)

    #---TF-IDF vectorizer, collecting value into vector---#
    w = TfidfVectorizer()
    #   
    #---classification using K-NN---# 
    print('Decision Tree')
    deTree = DecisionTreeClassifier()
    deTree = Pipeline([
    ('xPipe',w),
    ('knn', deTree)])

    deTree.fit(X_train, Y_train)
    predictions = deTree.predict(X_validation)
    print('Akurasi = ', accuracy_score(Y_validation, predictions))
    print('Matrix Confussion')
    print(confusion_matrix(Y_validation, predictions))
    print(classification_report(Y_validation, predictions))

    return(deTree)


    #----------------------------------------------------------------
def singleTextLogisticRegression(xText, mknn):   

    x_test =[]
    x_test.append(xText)
    mpredictions = mknn.predict(x_test)

    return(mpredictions)

    #----------------------------------------------------------------

def singleTextKNeighbor(xText, cKboar):   

    x_test =[]
    x_test.append(xText)
    mpredictions = cKboar.predict(x_test)

    return(mpredictions)

    #-----------------------------------------------------------------

def singleTextDecisionTree(xText, dTree):   

    x_test =[]
    x_test.append(xText)
    mpredictions = dTree.predict(x_test)

    return(mpredictions)

    #-----------------------------------------------------------------
def singleTextNaiveBayes(xText, mBayes):   

    x_test =[]
    x_test.append(xText)
    mpredictions = mBayes.predict(x_test)

    return(mpredictions)
    #-----------------------------------------------------------------

def konversiPrediksi(pre):
    tulis = ''
    if pre == '1':
        tulis = 'Keluhan'
    elif pre== '2':
        tulis = 'Respon'
    else:
        tulis = 'Not Keluhan/Respon' 

    return(tulis) 

#-----Program utama----------------------------------------------- 
if __name__ == '__main__': 

    dList, fList = stemmingFile(baca_file())

    #---model logistic regression---

    logRes   = classiLogRegressi(dList, fList)
    Neighbor = classKNeighborsClassifier(dList, fList)
    DesTree  = classDecisionTree(dList, fList)  


    testing = input('Masukkan text tweet = ')

    l = singleTextLogisticRegression(testing, logRes)

    k = singleTextKNeighbor(testing, Neighbor)

    t = singleTextDecisionTree(testing, DesTree)


    print('Prediksi dengan Logistic Regression = ',konversiPrediksi(l))
    print('Prediksi dengan K-Nearest Neighborhood =',konversiPrediksi(k))
    print('Prediksi dengan Decision Tree = ', konversiPrediksi(t))
#----End of Program-------------------------------------------------

Logistic Regresion
Akurasi =  0.8791500664010624
Matrix Confussion
[[477   1  74]
 [  1 164  19]
 [ 72  15 683]]
              precision    recall  f1-score   support

           1       0.87      0.86      0.87       552
           2       0.91      0.89      0.90       184
           3       0.88      0.89      0.88       770

    accuracy                           0.88      1506
   macro avg       0.89      0.88      0.88      1506
weighted avg       0.88      0.88      0.88      1506

K-Neighborhood 
Akurasi =  0.8247011952191236
Matrix Confussion
[[442  22  88]
 [  1 172  11]
 [116  26 628]]
              precision    recall  f1-score   support

           1       0.79      0.80      0.80       552
           2       0.78      0.93      0.85       184
           3       0.86      0.82      0.84       770

    accuracy                           0.82      1506
   macro avg       0.81      0.85      0.83      1506
weighted avg       0.83      0.82      0.82      1506

Decision Tree
A