## Import Library

In [1]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt_tab')
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer
import re, string, requests
import pandas as pd
import joblib
import re, string, requests, joblib
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
import numpy as np

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


## Melakukan Preprocessing

In [2]:
stop_words = set(stopwords.words('english'))

def get_slang_word_list(url):
    response = requests.get(url)
    slang_dict = {}
    for line in response.text.splitlines():
        if ':' in line:
            key, value = line.split(':', 1)
            slang_dict[key.strip()] = value.strip()
    return slang_dict

slang_url = 'https://raw.githubusercontent.com/lhquan244/SlangWord/main/SlangWord_2/SlangWordOriginal.txt'
slangwords = get_slang_word_list(slang_url)
lemmatizer = WordNetLemmatizer()

def get_wordnet_pos(tag):
    if tag.startswith('J'): return wordnet.ADJ
    elif tag.startswith('V'): return wordnet.VERB
    elif tag.startswith('N'): return wordnet.NOUN
    elif tag.startswith('R'): return wordnet.ADV
    else: return wordnet.NOUN

def processing_text(text):
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#[A-Za-z0-9_]+', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^\w\s]', '', text)
    text = text.replace('\n', ' ').strip().lower()
    text = text.translate(str.maketrans('', '', string.punctuation))

    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    cleaned_tokens = [
        lemmatizer.lemmatize(slangwords.get(word, word), get_wordnet_pos(pos))
        for word, pos in pos_tags
        if word not in stop_words
    ]
    return ' '.join(cleaned_tokens)

## Memuat Model

In [3]:
# TensorFlow model
model_tf = tf.keras.models.load_model('tf_model.h5')
tokenizer = joblib.load('tf_tokenizer.pkl')
label_encoder_tf = joblib.load('tf_label_encoder.pkl')

# Logistic Regression
logreg_model = joblib.load('logreg_model.pkl')
tfidf_logreg = joblib.load('tfidf_vectorizer.pkl')
label_encoder_logreg = joblib.load('label_encoder.pkl')

# SVM model
svm_model = joblib.load('svm_model.pkl')
tfidf_svm = joblib.load('svm_vectorizer.pkl')



## Inference Data

In [4]:
teks_baru = [
    "I love the clean interface and how easy it is to navigate the app.",
    "It keeps freezing whenever I try to join a call. Super frustrating.",
    "Everything works as expected, nothing special but no major issues either.",
    "I regret installing it. Crashes every time I open it.",
    "Audio quality is poor when using Bluetooth headsets.",
    "I’ve just started using the app, so I can’t say much yet.",
    "Just a regular app, not bad but not amazing either.",
]

teks_baru_bersih = [processing_text(t) for t in teks_baru]

## Inference per-Model

In [5]:
# TensorFlow
seqs_tf = tokenizer.texts_to_sequences(teks_baru_bersih)
pad_tf = tf.keras.preprocessing.sequence.pad_sequences(seqs_tf, maxlen=100, padding='post')
pred_tf = model_tf.predict(pad_tf)
label_tf = label_encoder_tf.inverse_transform(np.argmax(pred_tf, axis=1))

# Logistic Regression
X_logreg = tfidf_logreg.transform(teks_baru_bersih)
pred_logreg = logreg_model.predict(X_logreg)

# SVM
X_svm = tfidf_svm.transform(teks_baru_bersih)
pred_svm = svm_model.predict(X_svm)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step


## Display Result

In [6]:
print("=== Inference Comparison ===\n")
for i, teks in enumerate(teks_baru):
    print(f"Teks: {teks}")
    print(f"=> TensorFlow: {label_tf[i]}")
    print(f"=> Logistic Regression: {pred_logreg[i]}")
    print(f"=> SVM: {pred_svm[i]}")
    print("-" * 60)


=== Inference Comparison ===

Teks: I love the clean interface and how easy it is to navigate the app.
=> TensorFlow: positive
=> Logistic Regression: positive
=> SVM: positive
------------------------------------------------------------
Teks: It keeps freezing whenever I try to join a call. Super frustrating.
=> TensorFlow: negative
=> Logistic Regression: negative
=> SVM: negative
------------------------------------------------------------
Teks: Everything works as expected, nothing special but no major issues either.
=> TensorFlow: positive
=> Logistic Regression: neutral
=> SVM: positive
------------------------------------------------------------
Teks: I regret installing it. Crashes every time I open it.
=> TensorFlow: negative
=> Logistic Regression: negative
=> SVM: negative
------------------------------------------------------------
Teks: Audio quality is poor when using Bluetooth headsets.
=> TensorFlow: negative
=> Logistic Regression: negative
=> SVM: negative
-----------

Dari hasil perbandingan inference, ketiga model (TensorFlow, Logistic Regression, dan SVM) menunjukkan performa yang stabil pada teks yang sangat positif atau negatif. Namun, pada teks yang bernuansa netral atau ambigu, perbedaan pendekatan dalam memahami konteks antar model menyebabkan variasi hasil klasifikasi.