In [11]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from textblob import TextBlob

# Load the CSV file into a pandas DataFrame
csv_file_path = 'preprocessing/preprocessed_sample.csv'
data = pd.read_csv(csv_file_path)

# Drop rows with NaN values in the 'corrected_review' column
data = data.dropna(subset=['corrected_review'])

documents = list(data['corrected_review'])

# Calculate sentiment scores for each review
sentiment_scores = [TextBlob(review).sentiment.polarity for review in documents]

data['sentiment_score'] = sentiment_scores

# Create a user-item matrix with reviews as values
user_item_matrix = data.pivot_table(index='user_id', columns='product_id', values='sentiment_score')

# Fill NaN values with 0 (neutral sentiment)
user_item_matrix = user_item_matrix.fillna(0)

# Collaborative Filtering - User-Based
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)

# Matrix Factorization - Singular Value Decomposition (SVD)
svd = TruncatedSVD(n_components=50)
user_factors = svd.fit_transform(user_item_matrix)
item_factors = svd.components_

# Content-Based Filtering (Example: Recommend products with highest average sentiment)
recommended_product = data['product_id'][0]  # Assuming the product ID is in the first row

print("User-Based Collaborative Filtering Similarity Matrix:")
print(user_similarity_df.head())
print("=" * 40)
print("Matrix Factorization - User Factors:")
print(user_factors)
print("=" * 40)
print("Matrix Factorization - Item Factors:")
print(item_factors)
print("=" * 40)
print("Recommended Product based on Average Sentiment:")
print(recommended_product)


Similar Review 28: GoodREAD MORE
Similar Review 93: GoodREAD MORE
Similar Review 141: GoodREAD MORE
Similar Review 151: GoodREAD MORE
Similar Review 180: Very good ✌️😍😍✌️READ MORE
