## Import needed library
*Before that, do not forget to download required library from requirement.txt*

In [12]:
import numpy as np
import pandas as pd
import re
import string
import emoji
import nltk
import spacy
import joblib
import tkinter as tk
from nltk import pos_tag
from googletrans import Translator
from spellchecker import SpellChecker
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

#nltk.download('stopwords')
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')
#nltk.download('wordnet')

**Declare path to dataset that contains non_english comments and label of (positive , nuetral or negative)**

In [2]:
csv_file_path = 'NLP dataset4.csv'
df = pd.read_csv(csv_file_path)

**Translate comments in dataset from Malay language to english**

In [3]:
# Function to translate text to English
def translate_text_function(text):
    def translate_to_english(text):
        translator = Translator()
        try:
            if text:  # Ensure the text is not None or empty
                translation = translator.translate(text, src='ms', dest='en')
                return translation.text
            else:
                return text
        except Exception as e:
            print(f"Error translating from Malay to English: {e}")
            return text  # Return the original text in case of an error
    
    def auto_translate(text):
        translator = Translator()
        try:
            if text:  # Ensure the text is not None or empty
                translation = translator.translate(text, dest='en')
                return translation.text
            else:
                return text
        except Exception as e:
            print(f"Error auto-translating to English: {e}")
            return text  # Return the original text in case of an error
    
    if text:  # Ensure the text is not None or empty
        auto_translated_text = auto_translate(text)
        malay_translated_text = translate_to_english(auto_translated_text)
        translated_sentence = f"{malay_translated_text}"
    else:
        translated_sentence = text  # Handle None or empty text
    
    return translated_sentence

**Remove empty value then add a column of translated comment**

In [4]:
# Remove rows with None or empty strings in the 'Review' column
df = df.dropna(subset=['Review'])
df = df[df['Review'].str.strip() != '']

# Apply the translation function
df['translated_Review'] = df['Review'].apply(translate_text_function)

Error auto-translating to English: timed out
Error translating from Malay to English: timed out


In [5]:
# Print the entire DataFrame
print(df)

                                                Review     Label  \
0    Barang sudah sampai.tp foto tak Ada sangkutan....  Negative   
1    slow penghantaran and bad service no response ...  Negative   
2    Good price good product quality good job good ...  Positive   
3               penantian yang berbaloi..kasut too bad  Negative   
4    Penghantaran cepat walaupun dari China. 7 hari...  Positive   
..                                                 ...       ...   
244  Terimaksih seller barang sudah sampai..harga t...  Positive   
245                   allhamdulillah kasut soft gitu..   Neutral   
246  Barang dah sampai dalam keadaan yang tidak bai...  Negative   
247  barang dah lama smpai tapi baru ada masa, sorr...   Neutral   
248  Alhamdulillah kasut selamat sampai Cepat juga ...  Positive   

                                     translated_Review  
0    The goods have arrived, but the photos are not...  
1    slow delivery and bad service no response from...  
2    Good pr

**Truns the words into lowercase for easier understand by model in the later process**

In [6]:
df["Lowercase"] = df["translated_Review"].str.lower()
df.head()

Unnamed: 0,Review,Label,translated_Review,Lowercase
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not..."
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...


**Remove punctuation(e.g. full stop, comma, and brackets) as it does not help in machine understand**

In [7]:
PUNCT_TO_REMOVE = string.punctuation
def remove_punctuation(text):
    """custom function to remove the punctuation"""
    return text.translate(str.maketrans('', '', PUNCT_TO_REMOVE))

df["clearpunc"] = df["Lowercase"].apply(lambda text: remove_punctuation(text))
df.head()

Unnamed: 0,Review,Label,translated_Review,Lowercase,clearpunc
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not...",the goods have arrived but the photos are not ...
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery and bad service no response from...
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad,worth the waitshoes too bad
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...,fast delivery even from china it arrived in 7 ...


**these are all the stop words**

In [8]:
", ".join(stopwords.words('english'))

"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mus

**Remove stop word as it does not help in machine understand**

In [9]:
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

df["clearstop"] = df["clearpunc"].apply(lambda text: remove_stopwords(text))
df.head()

Unnamed: 0,Review,Label,translated_Review,Lowercase,clearpunc,clearstop
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not...",the goods have arrived but the photos are not ...,goods arrived photos connecteddelivery slow
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery bad service response seller bad
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad,worth the waitshoes too bad,worth waitshoes bad
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...,fast delivery even from china it arrived in 7 ...,fast delivery even china arrived 7 days beauti...


**Apply lemmatization technique(reduce a given word to its root word) to help machine understanding**

In [10]:
lemmatizer = WordNetLemmatizer()
def lemmatize_words(text):
    return " ".join([lemmatizer.lemmatize(word) for word in text.split()])

df["lemmatized"] = df["clearstop"].apply(lambda text: lemmatize_words(text))
df

Unnamed: 0,Review,Label,translated_Review,Lowercase,clearpunc,clearstop,lemmatized
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not...",the goods have arrived but the photos are not ...,goods arrived photos connecteddelivery slow,good arrived photo connecteddelivery slow
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery bad service response seller bad,slow delivery bad service response seller bad
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad,worth the waitshoes too bad,worth waitshoes bad,worth waitshoes bad
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...,fast delivery even from china it arrived in 7 ...,fast delivery even china arrived 7 days beauti...,fast delivery even china arrived 7 day beautif...
...,...,...,...,...,...,...,...
244,Terimaksih seller barang sudah sampai..harga t...,Positive,"Thank you seller, the goods have arrived...the...","thank you seller, the goods have arrived...the...",thank you seller the goods have arrivedthe pri...,thank seller goods arrivedthe price affordable...,thank seller good arrivedthe price affordable ...
245,allhamdulillah kasut soft gitu..,Neutral,"Alhamdulillah, such soft shoes..","alhamdulillah, such soft shoes..",alhamdulillah such soft shoes,alhamdulillah soft shoes,alhamdulillah soft shoe
246,Barang dah sampai dalam keadaan yang tidak bai...,Negative,The goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,goods arrived bad condition package messy,good arrived bad condition package messy
247,"barang dah lama smpai tapi baru ada masa, sorr...",Neutral,The item took a long time to arrive but just h...,the item took a long time to arrive but just h...,the item took a long time to arrive but just h...,item took long time arrive time sorry late review,item took long time arrive time sorry late review


**Apply Tokenization and Pos_Tags that are crutial steps in Natural Language Processing**

In [13]:
def tokenize_with_pos(text):
    tokens = word_tokenize(text)
    # Apply POS tagging
    pos_tags = pos_tag(tokens)
    # Convert tokens to lowercase
    tokens_lower = [token.lower() for token, _ in pos_tags]
    return tokens_lower, pos_tags

# Apply the tokenization function to the 'lemmatized' column
df[['Tokenized', 'POS_Tags']] = df['lemmatized'].apply(lambda x: pd.Series(tokenize_with_pos(x)) if isinstance(x, str) else (None, None))
df

Unnamed: 0,Review,Label,translated_Review,Lowercase,clearpunc,clearstop,lemmatized,Tokenized,POS_Tags
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not...",the goods have arrived but the photos are not ...,goods arrived photos connecteddelivery slow,good arrived photo connecteddelivery slow,"[good, arrived, photo, connecteddelivery, slow]","[(good, JJ), (arrived, VBD), (photo, JJ), (con..."
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery bad service response seller bad,slow delivery bad service response seller bad,"[slow, delivery, bad, service, response, selle...","[(slow, JJ), (delivery, NN), (bad, JJ), (servi..."
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,"[good, price, good, product, quality, good, jo...","[(good, JJ), (price, NN), (good, JJ), (product..."
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad,worth the waitshoes too bad,worth waitshoes bad,worth waitshoes bad,"[worth, waitshoes, bad]","[(worth, NN), (waitshoes, NNS), (bad, JJ)]"
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...,fast delivery even from china it arrived in 7 ...,fast delivery even china arrived 7 days beauti...,fast delivery even china arrived 7 day beautif...,"[fast, delivery, even, china, arrived, 7, day,...","[(fast, RB), (delivery, NN), (even, RB), (chin..."
...,...,...,...,...,...,...,...,...,...
244,Terimaksih seller barang sudah sampai..harga t...,Positive,"Thank you seller, the goods have arrived...the...","thank you seller, the goods have arrived...the...",thank you seller the goods have arrivedthe pri...,thank seller goods arrivedthe price affordable...,thank seller good arrivedthe price affordable ...,"[thank, seller, good, arrivedthe, price, affor...","[(thank, NN), (seller, NN), (good, JJ), (arriv..."
245,allhamdulillah kasut soft gitu..,Neutral,"Alhamdulillah, such soft shoes..","alhamdulillah, such soft shoes..",alhamdulillah such soft shoes,alhamdulillah soft shoes,alhamdulillah soft shoe,"[alhamdulillah, soft, shoe]","[(alhamdulillah, NN), (soft, JJ), (shoe, NN)]"
246,Barang dah sampai dalam keadaan yang tidak bai...,Negative,The goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,goods arrived bad condition package messy,good arrived bad condition package messy,"[good, arrived, bad, condition, package, messy]","[(good, JJ), (arrived, VBD), (bad, JJ), (condi..."
247,"barang dah lama smpai tapi baru ada masa, sorr...",Neutral,The item took a long time to arrive but just h...,the item took a long time to arrive but just h...,the item took a long time to arrive but just h...,item took long time arrive time sorry late review,item took long time arrive time sorry late review,"[item, took, long, time, arrive, time, sorry, ...","[(item, NN), (took, VBD), (long, JJ), (time, N..."


**Ignore Emoji and correct the spelling using SpellChecker**
*Emoji couldn't provide meaning to machine learning as one emoji may have many other meaning*
     E.g. 'You wouldn't know 😄 emoji that send to you by your wife means happy or angry'

In [14]:
# Function to check if a token contains emoji characters
def contains_emoji(token):
    return any(char in emoji.EMOJI_DATA for char in token)

# Function to correct tokens using SpellChecker, excluding emojis
def correct_spelling(tokens):
    corrected_tokens = []
    spell_checker = SpellChecker()
    
    for token in tokens:
        # Check if the token is an emoji
        if contains_emoji(token):
            # Skip emojis by not adding them to the corrected tokens
            continue
        else:
            # Spell check the token
            corrected_tokens.append(spell_checker.correction(token))
    return corrected_tokens

# Apply the correction function to the 'Tokenized_Lowercase' column
df['Corrected_Tokens'] = df['Tokenized'].apply(correct_spelling)
df

Unnamed: 0,Review,Label,translated_Review,Lowercase,clearpunc,clearstop,lemmatized,Tokenized,POS_Tags,Corrected_Tokens
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"The goods have arrived, but the photos are not...","the goods have arrived, but the photos are not...",the goods have arrived but the photos are not ...,goods arrived photos connecteddelivery slow,good arrived photo connecteddelivery slow,"[good, arrived, photo, connecteddelivery, slow]","[(good, JJ), (arrived, VBD), (photo, JJ), (con...","[good, arrived, photo, None, slow]"
1,slow penghantaran and bad service no response ...,Negative,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery and bad service no response from...,slow delivery bad service response seller bad,slow delivery bad service response seller bad,"[slow, delivery, bad, service, response, selle...","[(slow, JJ), (delivery, NN), (bad, JJ), (servi...","[slow, delivery, bad, service, response, selle..."
2,Good price good product quality good job good ...,Positive,Good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,good price good product quality good job good ...,"[good, price, good, product, quality, good, jo...","[(good, JJ), (price, NN), (good, JJ), (product...","[good, price, good, product, quality, good, jo..."
3,penantian yang berbaloi..kasut too bad,Negative,worth the wait..shoes too bad,worth the wait..shoes too bad,worth the waitshoes too bad,worth waitshoes bad,worth waitshoes bad,"[worth, waitshoes, bad]","[(worth, NN), (waitshoes, NNS), (bad, JJ)]","[worth, None, bad]"
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,Fast delivery even from China. It arrived in 7...,fast delivery even from china. it arrived in 7...,fast delivery even from china it arrived in 7 ...,fast delivery even china arrived 7 days beauti...,fast delivery even china arrived 7 day beautif...,"[fast, delivery, even, china, arrived, 7, day,...","[(fast, RB), (delivery, NN), (even, RB), (chin...","[fast, delivery, even, china, arrived, 7, day,..."
...,...,...,...,...,...,...,...,...,...,...
244,Terimaksih seller barang sudah sampai..harga t...,Positive,"Thank you seller, the goods have arrived...the...","thank you seller, the goods have arrived...the...",thank you seller the goods have arrivedthe pri...,thank seller goods arrivedthe price affordable...,thank seller good arrivedthe price affordable ...,"[thank, seller, good, arrivedthe, price, affor...","[(thank, NN), (seller, NN), (good, JJ), (arriv...","[thank, seller, good, None, price, affordable,..."
245,allhamdulillah kasut soft gitu..,Neutral,"Alhamdulillah, such soft shoes..","alhamdulillah, such soft shoes..",alhamdulillah such soft shoes,alhamdulillah soft shoes,alhamdulillah soft shoe,"[alhamdulillah, soft, shoe]","[(alhamdulillah, NN), (soft, JJ), (shoe, NN)]","[None, soft, shoe]"
246,Barang dah sampai dalam keadaan yang tidak bai...,Negative,The goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,the goods have arrived in a bad condition and ...,goods arrived bad condition package messy,good arrived bad condition package messy,"[good, arrived, bad, condition, package, messy]","[(good, JJ), (arrived, VBD), (bad, JJ), (condi...","[good, arrived, bad, condition, package, messy]"
247,"barang dah lama smpai tapi baru ada masa, sorr...",Neutral,The item took a long time to arrive but just h...,the item took a long time to arrive but just h...,the item took a long time to arrive but just h...,item took long time arrive time sorry late review,item took long time arrive time sorry late review,"[item, took, long, time, arrive, time, sorry, ...","[(item, NN), (took, VBD), (long, JJ), (time, N...","[item, took, long, time, arrive, time, sorry, ..."


**Drop pre-processing columns**

In [16]:
#Done preprocessed
columns_to_drop = ['translated_Review', 'Lowercase', 'clearpunc', 'clearstop', 'lemmatized', 'Tokenized']
new_df = df.drop(columns=columns_to_drop)
new_df

Unnamed: 0,Review,Label,POS_Tags,Corrected_Tokens
0,Barang sudah sampai.tp foto tak Ada sangkutan....,Negative,"[(good, JJ), (arrived, VBD), (photo, JJ), (con...","[good, arrived, photo, None, slow]"
1,slow penghantaran and bad service no response ...,Negative,"[(slow, JJ), (delivery, NN), (bad, JJ), (servi...","[slow, delivery, bad, service, response, selle..."
2,Good price good product quality good job good ...,Positive,"[(good, JJ), (price, NN), (good, JJ), (product...","[good, price, good, product, quality, good, jo..."
3,penantian yang berbaloi..kasut too bad,Negative,"[(worth, NN), (waitshoes, NNS), (bad, JJ)]","[worth, None, bad]"
4,Penghantaran cepat walaupun dari China. 7 hari...,Positive,"[(fast, RB), (delivery, NN), (even, RB), (chin...","[fast, delivery, even, china, arrived, 7, day,..."
...,...,...,...,...
244,Terimaksih seller barang sudah sampai..harga t...,Positive,"[(thank, NN), (seller, NN), (good, JJ), (arriv...","[thank, seller, good, None, price, affordable,..."
245,allhamdulillah kasut soft gitu..,Neutral,"[(alhamdulillah, NN), (soft, JJ), (shoe, NN)]","[None, soft, shoe]"
246,Barang dah sampai dalam keadaan yang tidak bai...,Negative,"[(good, JJ), (arrived, VBD), (bad, JJ), (condi...","[good, arrived, bad, condition, package, messy]"
247,"barang dah lama smpai tapi baru ada masa, sorr...",Neutral,"[(item, NN), (took, VBD), (long, JJ), (time, N...","[item, took, long, time, arrive, time, sorry, ..."


**Train test split**

In [17]:
# Assuming 'Corrected_Tokens' is your preprocessed text column
X = new_df['Corrected_Tokens'].astype(str)
y = new_df['Label']

In [18]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Perform Vectorization for machine understanding**
*Vectorization is the process of converting words into numbers*

In [19]:
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Save the model and vectorizer using joblib
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

['tfidf_vectorizer.pkl']

In [20]:
# Train the Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_vectorized, y_train)

In [21]:
# Make predictions
predictions = nb_model.predict(X_train_vectorized)


**Naive Bayes Model**

In [22]:
# Assuming 'Corrected_Tokens' is your preprocessed text column
X = new_df['Corrected_Tokens'].astype(str)
y = new_df['Label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train the Naive Bayes model
nb_model = MultinomialNB()
nb_model.fit(X_train_vectorized, y_train)

# Save the trained model to a .pkl file
joblib.dump(nb_model, 'Naive_Bayes_model.pkl')

# Make predictions
predictions = nb_model.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Naive Bayes Accuracy: {accuracy}')
print(classification_report(y_test, predictions))


Naive Bayes Accuracy: 0.52
              precision    recall  f1-score   support

    Negative       1.00      0.26      0.41        27
     Neutral       0.57      0.67      0.62        12
    Positive       0.38      1.00      0.55        11

    accuracy                           0.52        50
   macro avg       0.65      0.64      0.53        50
weighted avg       0.76      0.52      0.49        50



**SVM model**

In [23]:
# Train a SVM classifier on the selected features
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_vectorized, y_train)

# Save the trained model to a .pkl file
joblib.dump(svm_model, 'svm_model.pkl')

svm_predictions = svm_model.predict(X_test_vectorized)

svm_accuracy = accuracy_score(y_test, svm_predictions)
print(f'SVM Accuracy: {svm_accuracy}\n')
print(classification_report(y_test, svm_predictions))

SVM Accuracy: 0.72

              precision    recall  f1-score   support

    Negative       1.00      0.56      0.71        27
     Neutral       0.55      0.92      0.69        12
    Positive       0.67      0.91      0.77        11

    accuracy                           0.72        50
   macro avg       0.74      0.79      0.72        50
weighted avg       0.82      0.72      0.72        50



**Logistic Regression Model**

In [24]:
#Logistic Regression model
logreg_model = LogisticRegression()
logreg_model.fit(X_train_vectorized, y_train)

# Save the trained model to a .pkl file
joblib.dump(logreg_model, 'logistic_regression_model.pkl')

# Make predictions
predictions = logreg_model.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Logistic Regression Accuracy: {accuracy}')
print(classification_report(y_test, predictions))

Logistic Regression Accuracy: 0.62
              precision    recall  f1-score   support

    Negative       1.00      0.41      0.58        27
     Neutral       0.48      0.83      0.61        12
    Positive       0.56      0.91      0.69        11

    accuracy                           0.62        50
   macro avg       0.68      0.72      0.62        50
weighted avg       0.78      0.62      0.61        50



**Random Forest Model**

In [25]:
# Train the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_vectorized, y_train)

# Save the trained model to a .pkl file
joblib.dump(rf_model, 'Random_Forest_model.pkl')

# Make predictions
predictions = rf_model.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'Random Forest Accuracy: {accuracy}')
print(classification_report(y_test, predictions))

Random Forest Accuracy: 0.58
              precision    recall  f1-score   support

    Negative       1.00      0.30      0.46        27
     Neutral       0.43      0.83      0.57        12
    Positive       0.58      1.00      0.73        11

    accuracy                           0.58        50
   macro avg       0.67      0.71      0.59        50
weighted avg       0.77      0.58      0.55        50



**KNN Model**

In [26]:
# Train the KNN model
knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train_vectorized, y_train)

# Save the trained model to a .pkl file
joblib.dump(knn_model, 'KNN_model.pkl')

# Make predictions
predictions = knn_model.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print(f'KNN Accuracy: {accuracy}')
print(classification_report(y_test, predictions))

KNN Accuracy: 0.68
              precision    recall  f1-score   support

    Negative       0.84      0.59      0.70        27
     Neutral       0.47      0.75      0.58        12
    Positive       0.75      0.82      0.78        11

    accuracy                           0.68        50
   macro avg       0.69      0.72      0.69        50
weighted avg       0.73      0.68      0.69        50



**ARtificial Nueral Network Model**

In [27]:
# Assuming 'Corrected_Tokens' is your preprocessed text column
X = df['Corrected_Tokens'].astype(str)
y = df['Label']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

# Pad sequences for uniform length
X_train_padded = pad_sequences(X_train_sequences)
X_test_padded = pad_sequences(X_test_sequences, maxlen=X_train_padded.shape[1])

# Build the neural network model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=X_train_padded.shape[1]))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Use 1 neuron for binary classification

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the mod
model.fit(X_train_padded, y_train, epochs=5, batch_size=32, validation_split=0.1)

# Save the trained model to a .pkl file
joblib.dump(model, 'ANN_model.pkl')

# Evaluate the model
predictions = model.predict(X_test_padded)
predictions_binary = np.round(predictions)  # Convert probabilities to binary predictions (0 or 1)

# Convert predictions to original labels
predictions_labels = label_encoder.inverse_transform(predictions_binary.flatten().astype(int))

# Evaluate accuracy
accuracy = accuracy_score(y_test, predictions_binary)
print(f'ANN Accuracy: {accuracy}')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
ANN Accuracy: 0.24


In [28]:
**LSTM Model**

SyntaxError: invalid syntax (3332604788.py, line 1)

In [29]:
# Assuming 'Corrected_Tokens' is your preprocessed text column
X = df['Corrected_Tokens'].astype(str)
y = df['Label']

# Label encoding for the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Tokenization and padding
max_words = 5000
max_len = 50

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_len, padding='post', truncating='post')

# Define the LSTM model
embedding_dim = 50

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=100))
model.add(Dense(units=1, activation='sigmoid'))

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_padded, y_train, epochs=100, batch_size=32, validation_split=0.1, verbose=1)

# Evaluate the model on the test set
y_pred = (model.predict(X_test_padded) > 0.5).astype("int32")
accuracy = accuracy_score(y_test, y_pred)
print(f'LSTM Accuracy: {accuracy}\n')
print(classification_report(y_test, y_pred))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


**Choose the best performence Model as sentiment analysis system's model**

In [32]:
# Load the pre-trained Logistic Regression model and vectorizer
model = joblib.load('logistic_regression_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')

def preprocess_input(text):
    # Translate text to English
    def translate_text_function(text):
        translator = Translator()
        translation = translator.translate(text, dest='en')
        return translation.text

    # Preprocess text
    def preprocess_text(text):
        lowercase_text = text.lower()
        without_punctuation = "".join([char for char in lowercase_text if char not in string.punctuation])
        without_stopwords = " ".join([word for word in without_punctuation.split() if word not in stopwords.words('english')])
        return without_stopwords

    translated_text = translate_text_function(text)
    preprocessed_text = preprocess_text(translated_text)

    return preprocessed_text, translated_text

# Function to predict sentiment
def predict_sentiment(review_text):
    # Preprocess user input
    preprocessed_text, translated_text = preprocess_input(review_text)

    # Vectorize the input
    input_vectorized = vectorizer.transform([preprocessed_text])

    # Make predictions
    confidence = model.predict_proba(input_vectorized)[:, 1]  # Confidence for positive class
    prediction = model.predict(input_vectorized)

    return confidence[0], prediction[0], translated_text

# Function to get user input and display result
def analyze_review():
    user_input = entry.get()
    confidence, result, translated_sentence = predict_sentiment(user_input)
    result_label.config(text=f"Sentiment: {result}")
    confidence_label.config(text=f"Confidence: {confidence:.2%}")
    translated_label.config(text=f"Translated Sentence: {translated_sentence}")

# Create the main window
window = tk.Tk()
window.title("Sentiment Analysis")

# Create an entry widget for user input
entry = tk.Entry(window, width=50)
entry.pack(pady=10)

# Create a button to trigger sentiment analysis
analyze_button = tk.Button(window, text="Analyze", command=analyze_review)
analyze_button.pack(pady=10)

# Create labels to display the result, confidence, and translated sentence
result_label = tk.Label(window, text="")
result_label.pack(pady=10)

confidence_label = tk.Label(window, text="")
confidence_label.pack(pady=10)

translated_label = tk.Label(window, text="")
translated_label.pack(pady=10)

# Start the GUI event loop
window.mainloop()


**Example for testing purpose**
#negative - lambat lah delivery ni, bad kualiti also, sakit hati.

#neutral - baru received, tidak pernah try lagi.

#positive - husband saya like it so much, design dia sangat cantik.