# Ekstraksi Fitur

In [119]:
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

In [120]:
data = pd.read_csv('keluhan_processed.csv', encoding='ISO-8859-1')
X = data['TweetProcessed']
y = data['Keluhan']

### Vektor dokumen (X1)

In [121]:
vectorizer = CountVectorizer(ngram_range=(2,2))
X1 = vectorizer.fit_transform(X)

### Vektor TF IDF tanpa normalisasi (X2)

In [122]:
vectorizer = TfidfVectorizer(ngram_range=(2,2), norm=None)
X2 = vectorizer.fit_transform(X)

### Vektor TF IDF dengan normalisasi (X3)

In [123]:
vectorizer = TfidfVectorizer(ngram_range=(2,2))
X3 = vectorizer.fit_transform(X)

# _Modeling_

In [124]:
from joblib import dump, load

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y, test_size=0.4, random_state=0)
X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y, test_size=0.4, random_state=1)
X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y, test_size=0.4, random_state=2)

## _Decision tree model_

In [125]:
tree_classifier = DecisionTreeClassifier(random_state=0)

### Train: X1

In [126]:
tree_classifier.fit(X1_train, y1_train)
dump(tree_classifier, 'model/tree/tree1.joblib')

prediction = tree_classifier.predict(X1_test)
print('F1 score: ', f1_score(prediction, y1_test))
print('Accuracy: ', accuracy_score(prediction, y1_test))

F1 score:  0.6994652406417112
Accuracy:  0.8133510461640651


### Train: X2

In [127]:
tree_classifier.fit(X2_train, y2_train)
dump(tree_classifier, 'model/tree/tree2.joblib')

prediction = tree_classifier.predict(X2_test)
print('F1 score: ', f1_score(prediction, y2_test))
print('Accuracy: ', accuracy_score(prediction, y2_test))

F1 score:  0.7233160621761657
Accuracy:  0.8226502822982398


### Train: X3

In [128]:
tree_classifier.fit(X3_train, y3_train)
dump(tree_classifier, 'model/tree/tree3.joblib')

prediction = tree_classifier.predict(X3_test)
print('F1 score: ', f1_score(prediction, y3_test))
print('Accuracy: ', accuracy_score(prediction, y3_test))

F1 score:  0.7170809095716552
Accuracy:  0.8223181667220193


## _SVM model_

In [129]:
svm_classifier = LinearSVC(random_state=0, max_iter=5000)

### Train: X1

In [130]:
svm_classifier.fit(X1_train, y1_train)
dump(svm_classifier, 'model/svm/svm1.joblib')

prediction = svm_classifier.predict(X1_test)
print('F1 score: ', f1_score(prediction, y1_test))
print('Accuracy: ', accuracy_score(prediction, y1_test))

F1 score:  0.7447033898305085
Accuracy:  0.8399202922617071


### Train: X2

In [131]:
svm_classifier.fit(X2_train, y2_train)
dump(svm_classifier, 'model/svm/svm2.joblib')

prediction = svm_classifier.predict(X2_test)
print('F1 score: ', f1_score(prediction, y2_test))
print('Accuracy: ', accuracy_score(prediction, y2_test))

F1 score:  0.7992370052455889
Accuracy:  0.8601793424111591




### Train: X3

In [132]:
svm_classifier.fit(X3_train, y3_train)
dump(svm_classifier, 'model/svm/svm3.joblib')

prediction = svm_classifier.predict(X3_test)
print('F1 score: ', f1_score(prediction, y3_test))
print('Accuracy: ', accuracy_score(prediction, y3_test))

F1 score:  0.7937438905180841
Accuracy:  0.8598472268349385


## _MLP model_

In [133]:
mlp_classifier = MLPClassifier(hidden_layer_sizes=(50, 50), random_state=0)

### Train: X1

In [134]:
mlp_classifier.fit(X1_train, y1_train)
dump(mlp_classifier, 'model/mlp/mlp1.joblib')

prediction = mlp_classifier.predict(X1_test)
print('F1 score: ', f1_score(prediction, y1_test))
print('Accuracy: ', accuracy_score(prediction, y1_test))

F1 score:  0.7086614173228346
Accuracy:  0.8279641315177682


### Train: X2

In [135]:
mlp_classifier.fit(X2_train, y2_train)
dump(mlp_classifier, 'model/mlp/mlp2.joblib')

prediction = mlp_classifier.predict(X2_test)
print('F1 score: ', f1_score(prediction, y2_test))
print('Accuracy: ', accuracy_score(prediction, y2_test))

F1 score:  0.8102658111824015
Accuracy:  0.8625041514447027


### Train: X3

In [137]:
mlp_classifier.fit(X3_train, y3_train)
dump(mlp_classifier, 'model/mlp/mlp3.joblib')

prediction = mlp_classifier.predict(X3_test)
print('F1 score: ', f1_score(prediction, y3_test))
print('Accuracy: ', accuracy_score(prediction, y3_test))

F1 score:  0.7983034872761545
Accuracy:  0.8578545333776154
