In [1]:
import pandas as pd

# Load the reviews data
reviews_df = pd.read_csv('combined-dataset/final_reviews_data.csv')

# Example preprocessing function
def preprocess_text(text):
    return text.lower().strip()

# Apply preprocessing
reviews_df['review'] = reviews_df['review'].apply(preprocess_text)


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(max_features=5000)

# Transform the review text
tfidf_matrix = tfidf.fit_transform(reviews_df['review'])

# Convert to DataFrame
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=tfidf.get_feature_names_out())


In [3]:
from textblob import TextBlob

# Function to get sentiment
def get_sentiment(review):
    analysis = TextBlob(review)
    return analysis.sentiment.polarity

# Apply sentiment analysis
reviews_df['sentiment'] = reviews_df['review'].apply(get_sentiment)


In [4]:
# Combine TF-IDF with sentiment
combined_features = tfidf_df
combined_features['sentiment'] = reviews_df['sentiment']

In [5]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

# Define the model
model = Sequential([
    Input(shape=(combined_features.shape[1],)),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='linear')  # Output layer for regression
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
model.summary()


In [7]:
# Prepare training data
X = combined_features.values
y = reviews_df['sentiment'].values

# Train/test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.2)


Epoch 1/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0026 - mae: 0.0357 - val_loss: 0.0014 - val_mae: 0.0275
Epoch 2/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0025 - mae: 0.0349 - val_loss: 0.0010 - val_mae: 0.0237
Epoch 3/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0025 - mae: 0.0354 - val_loss: 8.5949e-04 - val_mae: 0.0216
Epoch 4/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0024 - mae: 0.0342 - val_loss: 0.0010 - val_mae: 0.0245
Epoch 5/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0024 - mae: 0.0342 - val_loss: 0.0010 - val_mae: 0.0233
Epoch 6/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - loss: 0.0024 - mae: 0.0348 - val_loss: 0.0013 - val_mae: 0.0261
Epoch 7/150
[1m484/484[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5

<keras.src.callbacks.history.History at 0x16ed9587850>