# Challenges on the text classfication with the best model

## Import libraries

In [1]:
from text_preprocess import clean_text
from get_more_features import *
from embedding_word import *
from gensim.models import Word2Vec
import pandas as pd
from scipy.sparse import csr_matrix, hstack
import joblib

## Load Data

In [2]:

new_reviews = open('Mini_Project_2_Data/challenge_data.txt', encoding='utf-8').read().splitlines()


## Load necessary components 

In [3]:

# Load the Word2Vec model
model = Word2Vec.load("Deploy_streamlit/word2vec_model.bin")

# Scale the new reviews data using MaxAbsScaler
scaler = joblib.load('Deploy_streamlit/maxabs_scaler.pkl')  


In [11]:
# MLP Model
mlp_model = joblib.load('best_mlp.pkl')  
nb_tfidf = joblib.load('nb_tfidf.pkl')  

## Text Preprocessing and cleanning

In [5]:

# Text Preprocessing for New Reviews
cleaned_new_reviews = [clean_text(review) for review in new_reviews]

# Convert new reviews to Word2Vec embeddings
new_reviews_transformed = text_to_word_embeddings(cleaned_new_reviews, model)

# Convert numeric features for new reviews
new_reviews_numeric_features = pd.DataFrame({
    'count_positive_words': [count_positive_words(review) for review in cleaned_new_reviews],
    'count_negative_words': [count_negative_words(review) for review in cleaned_new_reviews],
    'contain_no': [contain_no(review) for review in cleaned_new_reviews],
    'contain_not': [contain_not(review) for review in cleaned_new_reviews],
    'contain_exclamation': [contain_exclamation(review) for review in cleaned_new_reviews],
    'log_review_length': [log_review_length(review) for review in cleaned_new_reviews],
    'emotion_label': [get_emotion_label(review) for review in cleaned_new_reviews],
    'sentiment_score': [calculate_sentiment_score(review) for review in cleaned_new_reviews],
})

# Concatenate the word embeddings and other features for new reviews
new_reviews_data = hstack([new_reviews_transformed, csr_matrix(new_reviews_numeric_features)])


In [6]:
new_reviews_scaled = scaler.transform(new_reviews_data)

## Predictions

In [7]:
# Make Predictions for each model
mlp_predictions = mlp_model.predict(new_reviews_scaled)

print("MLPClassifier Predictions:", mlp_predictions)


MLPClassifier Predictions: [0 1 1 ... 1 1 0]


## Save Result

In [None]:
# Save predictions to result.txt
with open("group6_mini_project_2_challenge.txt", "w", encoding="utf-8") as file:
    for prediction in mlp_predictions:
        file.write(str(prediction))

