# Ridwan Halim

# Import

In [1]:
import numpy as np
import joblib
from gensim.models import Word2Vec

# Download model from my github: [ridwaanhall](https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/tree/main/01_project/03_model)

In [3]:
!wget -O xgboost_word2vec_model.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/xgboost_word2vec_model.joblib
!wget -O word2vec_model.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/word2vec_model.joblib
!wget -O label_encoder.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/label_encoder.joblib

--2024-07-21 13:15:39--  https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/xgboost_word2vec_model.joblib
Resolving github.com (github.com)... 20.27.177.113
Connecting to github.com (github.com)|20.27.177.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/main/01_project/03_model/xgboost_word2vec_model.joblib [following]
--2024-07-21 13:15:40--  https://raw.githubusercontent.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/main/01_project/03_model/xgboost_word2vec_model.joblib
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1173433 (1.1M) [application/octet-stream]
Saving to: ‘xgboost_word2

# SentimentAnalyzer

In [4]:
class SentimentAnalyzer:
    def __init__(self, model_path, word2vec_path, label_encoder_path):
        self.model = joblib.load(model_path)
        self.word2vec_model = joblib.load(word2vec_path)
        self.label_encoder = joblib.load(label_encoder_path)

    def vectorize(self, text):
        """Vectorize text using Word2Vec."""
        vectors = [self.word2vec_model.wv[word] for word in text.split() if word in self.word2vec_model.wv]
        if len(vectors) == 0:
            return np.zeros(self.word2vec_model.vector_size)
        return np.mean(vectors, axis=0)

    def predict_proba(self, text):
        """Predict the sentiment probabilities for the input text."""
        input_vector = self.vectorize(text).reshape(1, -1)  # Reshape for the model
        probabilities = self.model.predict_proba(input_vector)[0]
        return probabilities

    def get_percentage_predictions(self, text):
        """Get sentiment predictions with percentages."""
        probabilities = self.predict_proba(text)
        class_labels = self.label_encoder.classes_
        percentages = {label: prob * 100 for label, prob in zip(class_labels, probabilities)}
        return percentages

    def print_predictions(self, text):
        """Print sentiment predictions sorted from highest to lowest."""
        percentages = self.get_percentage_predictions(text)
        # Sort
        sorted_percentages = dict(sorted(percentages.items(), key=lambda item: item[1], reverse=True))

        # Determine the predicted class
        predicted_class = max(sorted_percentages, key=sorted_percentages.get)

        print(f"Your text: {text}")
        print(f"Predict: {predicted_class.capitalize()}")
        print("\nDetail:")
        for label, percentage in sorted_percentages.items():
            print(f"{label.capitalize()}: {percentage:.5f}%")

model_path = 'xgboost_word2vec_model.joblib'
word2vec_path = 'word2vec_model.joblib'
label_encoder_path = 'label_encoder.joblib'

analyzer = SentimentAnalyzer(model_path, word2vec_path, label_encoder_path)

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



# Test

## Negative

In [5]:
input_text = "harga barangnya mahal mahal"
analyzer.print_predictions(input_text)

Your text: harga barangnya mahal mahal
Predict: Negatif

Detail:
Negatif: 90.94258%
Positif: 8.83364%
Netral: 0.22378%


In [26]:
input_text = "Pengalaman belanja di Tokopedia kurang memuaskan. Barang yang saya pesan tidak sesuai dengan deskripsi. Pelayanan pelanggan juga lambat dalam merespons keluhan."
analyzer.print_predictions(input_text)

Your text: Pengalaman belanja di Tokopedia kurang memuaskan. Barang yang saya pesan tidak sesuai dengan deskripsi. Pelayanan pelanggan juga lambat dalam merespons keluhan.
Predict: Negatif

Detail:
Negatif: 82.37570%
Positif: 17.58235%
Netral: 0.04196%


## Positve

In [6]:
input_text = "harga barangnya murah murah"
analyzer.print_predictions(input_text)

Your text: harga barangnya murah murah
Predict: Positif

Detail:
Positif: 98.47952%
Negatif: 1.48496%
Netral: 0.03553%


In [8]:
input_text = "Aplikasi Tokopedia sangat mudah digunakan dan selalu memberikan promo menarik! Pengalaman belanja jadi lebih menyenangkan dan hemat. Pengiriman cepat dan customer service juga sangat responsif. Terima kasih, Tokopedia!"
analyzer.print_predictions(input_text)

Your text: Aplikasi Tokopedia sangat mudah digunakan dan selalu memberikan promo menarik! Pengalaman belanja jadi lebih menyenangkan dan hemat. Pengiriman cepat dan customer service juga sangat responsif. Terima kasih, Tokopedia!
Predict: Positif

Detail:
Positif: 99.99403%
Negatif: 0.00457%
Netral: 0.00140%


## Neutral

In [7]:
input_text = "jangan lupa saksikan iklan kami di tokopedia"
analyzer.print_predictions(input_text)

Your text: jangan lupa saksikan iklan kami di tokopedia
Predict: Netral

Detail:
Netral: 60.01596%
Negatif: 30.59819%
Positif: 9.38585%


In [21]:
input_text = "jangan lupa berbelanja"
analyzer.print_predictions(input_text)

# positive but have high neutral predict.

Your text: jangan lupa berbelanja
Predict: Positif

Detail:
Positif: 50.69026%
Netral: 48.54724%
Negatif: 0.76250%


## boleh dicoba lho kakak kakak reviewer :) tinggal run run masukin text. tapi terkadang masih kgak akurat