# Ridwan Halim

# Import

In [1]:
import numpy as np
import joblib
from gensim.models import Word2Vec

# Download model from my github: [ridwaanhall](https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/tree/main/01_project/03_model)

In [3]:
!wget -O randomforest_word2vec_model_new.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/randomforest_word2vec_model_new.joblib
!wget -O word2vec_model_new.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/word2vec_model_new.joblib
!wget -O label_encoder_new.joblib https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/label_encoder_new.joblib

--2024-07-21 16:15:13--  https://github.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/raw/main/01_project/03_model/randomforest_word2vec_model_new.joblib
Resolving github.com (github.com)... 140.82.114.4
Connecting to github.com (github.com)|140.82.114.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/main/01_project/03_model/randomforest_word2vec_model_new.joblib [following]
--2024-07-21 16:15:13--  https://raw.githubusercontent.com/ridwaanhall/Dicoding-Machine-Learning-Intermediate/main/01_project/03_model/randomforest_word2vec_model_new.joblib
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 19428417 (19M) [application/octet-stream]


# SentimentAnalyzer

In [4]:
class SentimentAnalyzer:
    def __init__(self, model_path, word2vec_path, label_encoder_path):
        self.model = joblib.load(model_path)
        self.word2vec_model = joblib.load(word2vec_path)
        self.label_encoder = joblib.load(label_encoder_path)

    def vectorize(self, text):
        """Vectorize text using Word2Vec."""
        vectors = [self.word2vec_model.wv[word] for word in text.split() if word in self.word2vec_model.wv]
        if len(vectors) == 0:
            return np.zeros(self.word2vec_model.vector_size)
        return np.mean(vectors, axis=0)

    def predict_proba(self, text):
        """Predict the sentiment probabilities for the input text."""
        input_vector = self.vectorize(text).reshape(1, -1)  # Reshape for the model
        probabilities = self.model.predict_proba(input_vector)[0]
        return probabilities

    def get_percentage_predictions(self, text):
        """Get sentiment predictions with percentages."""
        probabilities = self.predict_proba(text)
        class_labels = self.label_encoder.classes_
        percentages = {label: prob * 100 for label, prob in zip(class_labels, probabilities)}
        return percentages

    def print_predictions(self, text):
        """Print sentiment predictions sorted from highest to lowest."""
        percentages = self.get_percentage_predictions(text)
        # Sort
        sorted_percentages = dict(sorted(percentages.items(), key=lambda item: item[1], reverse=True))

        # Determine the predicted class
        predicted_class = max(sorted_percentages, key=sorted_percentages.get)

        print(f"Your text: {text}")
        print(f"Predict: {predicted_class.capitalize()}")
        print("\nDetail:")
        for label, percentage in sorted_percentages.items():
            print(f"{label.capitalize()}: {percentage:.5f}%")

model_path = 'randomforest_word2vec_model_new.joblib'
word2vec_path = 'word2vec_model_new.joblib'
label_encoder_path = 'label_encoder_new.joblib'

analyzer = SentimentAnalyzer(model_path, word2vec_path, label_encoder_path)

# Test

## Negative

In [5]:
input_text = "harga barangnya mahal mahal"
analyzer.print_predictions(input_text)

Your text: harga barangnya mahal mahal
Predict: Negatif

Detail:
Negatif: 52.00000%
Positif: 37.00000%
Netral: 11.00000%


In [6]:
input_text = "Pengalaman belanja di Tokopedia kurang memuaskan. Barang yang saya pesan tidak sesuai dengan deskripsi. Pelayanan pelanggan juga lambat dalam merespons keluhan."
analyzer.print_predictions(input_text)

Your text: Pengalaman belanja di Tokopedia kurang memuaskan. Barang yang saya pesan tidak sesuai dengan deskripsi. Pelayanan pelanggan juga lambat dalam merespons keluhan.
Predict: Negatif

Detail:
Negatif: 54.00000%
Positif: 43.00000%
Netral: 3.00000%


## Positve

In [7]:
input_text = "harga barangnya murah murah"
analyzer.print_predictions(input_text)

Your text: harga barangnya murah murah
Predict: Positif

Detail:
Positif: 54.00000%
Negatif: 37.00000%
Netral: 9.00000%


In [8]:
input_text = "Aplikasi Tokopedia sangat mudah digunakan dan selalu memberikan promo menarik! Pengalaman belanja jadi lebih menyenangkan dan hemat. Pengiriman cepat dan customer service juga sangat responsif. Terima kasih, Tokopedia!"
analyzer.print_predictions(input_text)

Your text: Aplikasi Tokopedia sangat mudah digunakan dan selalu memberikan promo menarik! Pengalaman belanja jadi lebih menyenangkan dan hemat. Pengiriman cepat dan customer service juga sangat responsif. Terima kasih, Tokopedia!
Predict: Positif

Detail:
Positif: 99.00000%
Negatif: 1.00000%
Netral: 0.00000%


## Neutral

In [22]:
input_text = "apakah tokopedia punya fitur dark mode?"
analyzer.print_predictions(input_text)

Your text: apakah tokopedia punya fitur dark mode?
Predict: Netral

Detail:
Netral: 39.00000%
Negatif: 33.00000%
Positif: 28.00000%


## boleh dicoba lho kakak kakak reviewer :) tinggal run run masukin text. tapi terkadang masih kgak akurat