# Import Library


In [1]:
# Library Pembantu
import numpy as np

# Library Pre-processing Text
import string
import re

import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.corpus import wordnet

from sklearn.preprocessing import LabelEncoder

# Library Akses Github
import requests
import csv
from io import StringIO

# Library import model
import joblib
import tensorflow as tf

In [2]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

Load Model

In [3]:
# TF-IDF
tfidf = joblib.load("/content/TFIDF_model_sentiment.pkl")
# Logistic Regression
model_lr = joblib.load("/content/LR_model_sentiment.h5")
# Dense Neural Network
model_dnn = tf.keras.models.load_model("/content/DNN_model_sentiment.h5")



# Inference

In [4]:
# Label tiap sentiment
sentiment = ['negative', 'neutral', 'positive']
le_sentiment = LabelEncoder()
le_sentiment.fit(sentiment)

In [5]:
# Fungsi Cleaning Text
def cleaningText(text):
    text = re.sub(r'@[A-Za-z0-9]+', '', text) # menghapus mention
    text = re.sub(r'#[A-Za-z0-9]+', '', text) # menghapus hashtag
    text = re.sub(r'RT[\s]', '', text) # menghapus RT
    text = re.sub(r"http\S+", '', text) # menghapus link
    text = re.sub(r'[0-9]+', '', text) # menghapus angka
    text = re.sub(r'[^\w\s]', '', text) # menghapus karakter selain huruf dan angka

    text = text.replace('\n', ' ') # mengganti baris baru dengan spasi
    text = text.translate(str.maketrans('', '', string.punctuation)) # menghapus semua tanda baca
    text = text.strip() # menghapus karakter spasi dari kiri dan kanan teks
    return text

# Fungsi CaseFolding Text
def casefoldingText(text): # Mengubah semua karakter dalam teks menjadi huruf kecil
    text = text.lower()
    return text

# Fungsi Slangwords
slangwords = dict()
slangwords_url = 'https://raw.githubusercontent.com/bodhwani/NLP-VIT-BOT/master/slangs.csv'

response = requests.get(slangwords_url)

if response.status_code == 200:
    reader = csv.reader(StringIO(response.text), delimiter=',')

    # Skip kolom header
    next(reader)

    for row in reader:
        # Slang kolom 2
        slang = row[1].strip().lower()

        # Meaning kolom 3
        meaning = row[2].strip()

        slangwords[slang] = meaning

def fix_slangwords(text):
    words = text.split()
    fixed_words = []

    for word in words:
        if word.lower() in slangwords:
            fixed_words.append(slangwords[word.lower()])
        else:
            fixed_words.append(word)

    fixed_text = ' '.join(fixed_words)
    return fixed_text

# Fungsi Tokenizing Text
def tokenizingText(text): # Memecah atau membagi string, teks menjadi daftar token
    text = word_tokenize(text)
    return text

# Fungsi Stopwords
listStopwords = set(stopwords.words('english'))

def filteringText(text): # Menghapus stopwords dalam teks
    filtered = []
    for txt in text:
        if txt not in listStopwords:
            filtered.append(txt)
    text = filtered
    return text

# Fungsi Lematizer Text
lemmatizer = WordNetLemmatizer()

def lemmatizingText(text):
    lemmatized = []
    for word in text:
        lemma_word = lemmatizer.lemmatize(word.lower(), pos=wordnet.VERB)  # Mengonversi ke huruf kecil untuk memastikan pemrosesan yang konsisten
        lemmatized.append(lemma_word)
    text = lemmatized
    return text

# Fungsi Join Text
def toSentence(list_words): # Mengubah daftar kata menjadi kalimat
    sentence = ' '.join(word for word in list_words)
    return sentence

# Fungsi semua preprocessing
def preprocessing(text):
    text = cleaningText(text)
    text = casefoldingText(text)
    text = fix_slangwords(text)
    text = tokenizingText(text)
    text = filteringText(text)
    text = lemmatizingText(text)
    text = toSentence(text)
    return text

# Fungsi inference input user
def inference(text):
    input_preprocessing = preprocessing(text)
    input_tfidf = tfidf.transform([input_preprocessing])

    # Prediksi tiap model
    prediksi_LR = model_lr.predict(input_tfidf)
    prediksi_DNN = model_dnn.predict(input_tfidf.toarray())

    # Hasil prediksi
    hasil_prediksi_LR = sentiment[le_sentiment.transform(prediksi_LR)[0]]
    hasil_prediksi_DNN = sentiment[np.argmax(prediksi_DNN)]

    # Output
    print("Hasil prediksi sentiment menggunakan model Logistic Regression:", hasil_prediksi_LR)
    print("Hasil prediksi sentiment menggunakan model Dense Neural Network:", hasil_prediksi_DNN)

Input User

In [6]:
input_user = input("Masukkan review: ")
inference(input_user)

Masukkan review: this app sucks ngl
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 769ms/step
Hasil prediksi sentiment menggunakan model Logistic Regression: negative
Hasil prediksi sentiment menggunakan model Dense Neural Network: negative


In [7]:
input_user = input("Masukkan review: ")
inference(input_user)

Masukkan review: hello my name is nelson
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 296ms/step
Hasil prediksi sentiment menggunakan model Logistic Regression: neutral
Hasil prediksi sentiment menggunakan model Dense Neural Network: neutral


In [8]:
input_user = input("Masukkan review: ")
inference(input_user)

Masukkan review: i love this app
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
Hasil prediksi sentiment menggunakan model Logistic Regression: positive
Hasil prediksi sentiment menggunakan model Dense Neural Network: positive
