In [1]:
import pandas as pd

# Load the reviews data
reviews_df = pd.read_csv('combined-dataset/final_reviews_data.csv')

# Example preprocessing function
def preprocess_text(text):
    return text.lower().strip()

# Apply preprocessing
reviews_df['review'] = reviews_df['review'].apply(preprocess_text)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(max_features=5000)

# Transform the review text
tfidf_matrix = tfidf.fit_transform(reviews_df['review'])

# Convert to DataFrame
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf.get_feature_names_out())


In [None]:
from textblob import TextBlob

# Function to get sentiment
def get_sentiment(review):
    analysis = TextBlob(review)
    return analysis.sentiment.polarity

# Apply sentiment analysis
reviews_df['sentiment'] = reviews_df['review'].apply(get_sentiment)


In [None]:
# Combine TF-IDF with sentiment
combined_features = tfidf_df
combined_features['sentiment'] = reviews_df['sentiment']

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

# Define the model
model = Sequential([
    Input(shape=(combined_features.shape[1],)),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Output layer for regression
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.summary()


In [None]:
# Prepare training data
X = combined_features.values
y = reviews_df['sentiment'].values

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2)


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}, Mean Absolute Error: {mae:.2f}')


In [None]:
# New reviews to be analyzed (example)
new_reviews = [
    "Great product, very satisfied!",
    "The service was terrible.",
    "Average experience, nothing special.",
    "Fantastic! Exceeded expectations.",
    "Not what I expected, quite disappointing."
]

# Preprocess new reviews
new_reviews = [preprocess_text(review) for review in new_reviews]

# Transform new reviews using the same TF-IDF vectorizer
new_tfidf_matrix = tfidf.transform(new_reviews)
new_tfidf_df = pd.DataFrame(new_tfidf_matrix.toarray(), columns=tfidf.get_feature_names_out())

# If you have other features, combine them similarly
# Example: Assuming we have a sentiment column (not usual in new items)
# new_combined_features = new_tfidf_df.assign(sentiment=0)  # Add dummy sentiment if not available
new_combined_features = new_tfidf_df  # Use only TF-IDF for simplicity here


In [None]:
# Predict sentiments
predicted_sentiments = model.predict(new_combined_features)