Import Library/ies

In [160]:
import json
import joblib
import pandas as pd
import pickle
import re
import scipy.sparse as sp

In [134]:
def calculate_repetition_features(text):
    if not isinstance(text, str):
        text = ""  # Replace non-string or NaN values with an empty string
    # Count word repetitions
    words = text.split()
    word_count = {word: words.count(word) for word in set(words)}
    word_repetition = sum(count for count in word_count.values() if count > 1)

    # Count letter repetitions
    letter_repetition = len(re.findall(r'(.)\1{2,}', text))  # Counts letters repeated 3 or more times

    return pd.Series([word_repetition, letter_repetition])

Load the model and vectorizer

In [137]:
loaded_model = joblib.load('sentiment_model.pkl')
loaded_vectorizer = joblib.load('tfidf_vectorizer.pkl')

Load the JSON file

In [140]:
df = pd.read_json('reviews.json')

Convert to CSV file

In [143]:
# Save as CSV
df.to_csv('reviews.csv', index=False)

Load CSV data

In [146]:
csv_file_path = 'reviews.csv'
data = pd.read_csv(csv_file_path)

In [128]:
# Assuming 'review' column contains the text
X_new = data['review']

In [150]:
# Ensure all reviews are strings and handle missing values
X_new = data['review'].fillna("").astype(str)

In [152]:
# Extract repetition features
repetition_features_new = X_new.apply(calculate_repetition_features)
repetition_features_new.columns = ['word_repetition', 'letter_repetition']

In [156]:
# Transform the text using the loaded vectorizer
X_tfidf_new = loaded_vectorizer.transform(X_new)

In [162]:
# Combine TF-IDF with repetition features
X_combined_new = sp.hstack([X_tfidf_new, sp.csr_matrix(repetition_features_new.values)])

In [166]:
# Predict the sentiments
predictions = loaded_model.predict(X_combined_new)

In [168]:
# Add predictions to the dataset
data['predicted_sentiment'] = predictions

# Save the results to a CSV
data.to_csv('datasetb_withpredictions.csv', index=False)

print("Predictions saved to 'predicted_sentiments.csv'")

Predictions saved to 'predicted_sentiments.csv'
