# Content-Based Filtering

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack

# Load Data
# Load review datasets and concatenate
df1 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv")
df2 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_250-500.csv")
df3 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_500-750.csv")
df4 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv")
df5 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv")
df_test = pd.concat([df1,df2,df3,df4,df5], ignore_index=True)

# Load product information with error handling for encoding
try:
    product_info = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/product_info.csv', encoding='utf-8', low_memory=False)
except UnicodeDecodeError:
    product_info = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/product_info.csv', encoding='ISO-8859-1', low_memory=False)

# Filter to keep only relevant products
df_test = df_test[df_test['product_id'].isin(product_info['product_id'])]
df = product_info[product_info['product_id'].isin(df_test['product_id'])].reset_index(drop=True)

# Preprocessing
# Drop columns with missing values
df = df.dropna(axis=1)

# Combine text fields for feature extraction
df['combined_features'] = df['product_name'] + ' ' + df['brand_name'] + ' ' + df['primary_category'] + ' ' + df['secondary_category']


# TF-IDF Vectorization on combined text features
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])

# Standardize numeric features
numeric_features = df[['rating', 'loves_count', 'price_usd']].values
scaler = StandardScaler()
numeric_features_scaled = scaler.fit_transform(numeric_features)

# Model (Cosine Similarity)
# Combine TF-IDF and numeric features
features_combined = hstack([tfidf_matrix, numeric_features_scaled])

# Compute cosine similarity
cosine_sim = cosine_similarity(features_combined, features_combined)

# Recommendation and Evaluation Functions
# Single product recommendation based on cosine similarity
def get_recommendations(product_idx, cosine_sim=cosine_sim, k=5):
    similarity_scores = list(enumerate(cosine_sim[product_idx]))
    sorted_similar_products = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:k]
    similar_products_idx = [x[0] for x in sorted_similar_products]
    similar_products = df.iloc[similar_products_idx]
    return similar_products

def recommend_products2(product_name, num_recommendations=5):

    product_idx = df[df['product_name'] == product_name].index[0]
    similarity_scores = list(enumerate(cosine_sim[product_idx]))
    
    # 유사도 점수를 기준으로 내림차순 정렬 (자기 자신 제외)
    sorted_similar_products = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    
    # 유사도가 높은 제품의 인덱스 출력
    similar_products_idx = [x[0] for x in sorted_similar_products]
    similar_products = df.iloc[similar_products_idx]
    
    return similar_products
    
recommended_products = recommend_products2("Wake Up Honey Eye Cream with Brightening Vitamin C")

print(recommended_products[['product_name', 'brand_name']])
print('-----------------------------------------------------------------')

# Precision, Recall, and F1 Score calculation functions
def precision_at_k(recommended, relevant, k=10):
    recommended_top_k = recommended[:k]
    relevant_set = set(relevant)
    recommended_set = set(recommended_top_k)
    intersection = len(recommended_set.intersection(relevant_set))
    return intersection / len(recommended_top_k) if len(recommended_top_k) > 0 else 0

def recall_at_k(recommended, relevant, k=5):
    recommended_top_k = recommended[:k]
    relevant_set = set(relevant)
    recommended_set = set(recommended_top_k)
    intersection = len(recommended_set.intersection(relevant_set))
    return intersection / len(relevant_set) if len(relevant_set) > 0 else 0

def f1_score_at_k(recommended, relevant, k=5):
    precision = precision_at_k(recommended, relevant, k)
    recall = recall_at_k(recommended, relevant, k)
    return 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

# Get relevant products based on user_id
def get_relevant_products(user_id, df_test):
    user_reviews = df_test[df_test['author_id'] == user_id]
    return user_reviews['product_id'].tolist() if not user_reviews.empty else []

# Evaluate recommendations for a specific user_id
def evaluate_recommendations(user_id, k=10):
    print(f"Evaluating recommendations for user_id: {user_id} with top {k} recommendations")
    
    # Step 1: Get relevant products for the given user
    relevant_products = get_relevant_products(user_id, df_test)
    print(f"Relevant products for user {user_id}: {relevant_products}")

    if not relevant_products:
        print(f"No relevant products found for user {user_id}. Exiting evaluation.")
        return 0, 0, 0

    # Step 2: Generate recommendations based on relevant products
    all_recommended = []
    for product_id in relevant_products[-5:]:
        if product_id in df['product_id'].values:
            product_idx = df[df['product_id'] == product_id].index[0]
            recommended_products = get_recommendations(product_idx=product_idx, k=int(k/5))['product_id'].tolist()
            print(f"Recommended products for product_id {product_id}: {recommended_products}")
            all_recommended.extend(recommended_products)
        else:
            print(f"Product ID {product_id} not found in product dataset.")

    # Step 3: Process final recommendations
    all_recommended = list(set(all_recommended))[:k]
    print(f"Final unique recommended products (Top {k}): {all_recommended}")

    # Step 4: Calculate evaluation metrics
    precision = precision_at_k(all_recommended, relevant_products, k)
    recall = recall_at_k(all_recommended, relevant_products, k)
    f1 = f1_score_at_k(all_recommended, relevant_products, k)
    
    print(f"Evaluation Results - Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")
    return precision, recall, f1

# Test the function with a specific user_id
user_id = 1288462295  # Example user_id
precision, recall, f1 = evaluate_recommendations(user_id, k=10)
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}")

  df1 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv")
  df4 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv")
  df5 = pd.read_csv("C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv")


                                           product_name           brand_name
1697                  Eye Revival Brightening Eye Cream             ROSE INC
709                          1% Vitamin A Retinol Serum              Farmacy
1927  Brightening Eye Cream Mineral SPF 15 with Pept...      Soleil Toujours
2345  Peptides + C Energy Eye Concentrate with Vitam...  Youth To The People
1910  barrier+ Triple Lipid + Collagen Brightening E...              Skinfix
-----------------------------------------------------------------
Evaluating recommendations for user_id: 1288462295 with top 10 recommendations
Relevant products for user 1288462295: ['P420652', 'P420652', 'P7880', 'P441101', 'P422905', 'P439061', 'P427406', 'P173726', 'P426836', 'P4016', 'P466123', 'P432829', 'P463371', 'P456218', 'P399623', 'P469502', 'P470065', 'P475181', 'P416923', 'P397624', 'P432048', 'P462344', 'P4032', 'P441323', 'P397310', 'P467118', 'P419466', 'P397627', 'P427641', 'P409816', 'P470533', 'P480280', 'P44915

# User-based Filtering

In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from math import sqrt
from random import sample
from concurrent.futures import ThreadPoolExecutor, as_completed
import os

# Step 1: Load and Preprocess Data
# --------------------------------

# Load datasets
# Load datasets with specific encoding
df_product = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/product_info.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_1 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_2 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_250-500.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_3 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_500-750.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_4 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_5 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv', dtype={'product_id': str}, encoding='ISO-8859-1')


# Combine reviews and keep necessary columns
df_reviews_combined = pd.concat([df_review_1, df_review_2, df_review_3, df_review_4, df_review_5], ignore_index=True)
df_reviews_filtered = df_reviews_combined[['author_id', 'product_id', 'rating']].dropna()

# Filter out users and products with fewer than n number.
user_counts = df_reviews_filtered['author_id'].value_counts()
product_counts = df_reviews_filtered['product_id'].value_counts()
active_users = user_counts[user_counts >= 5].index # Minimum 5 reviews for users
popular_products = product_counts[product_counts >= 10].index # Minimum 10 reviews for products

df_reviews_filtered = df_reviews_filtered[(df_reviews_filtered['author_id'].isin(active_users)) & 
                                          (df_reviews_filtered['product_id'].isin(popular_products))]

print("Data Loading and Filtering Complete")

# Step 2: Create User-Item Matrix and Normalize Ratings
# ------------------------------------------------------

# Create user-item matrix
user_item_matrix = df_reviews_filtered.pivot_table(index='author_id', columns='product_id', values='rating')

# Center ratings by subtracting the user's mean rating from each rating
user_means = user_item_matrix.mean(axis=1)  # Calculate mean rating for each user
user_item_matrix = user_item_matrix.sub(user_means, axis=0).fillna(0)  # Center ratings and fill NaN with 0

# Convert to sparse matrix for memory efficiency
user_item_sparse = csr_matrix(user_item_matrix)

# Step 3: Train-Test Split
# ------------------------

# Split data into training and testing sets
train_data, test_data = train_test_split(df_reviews_filtered, test_size=0.2, random_state=42)
train_user_item_matrix = train_data.pivot_table(index='author_id', columns='product_id', values='rating')

# Center training data by subtracting the user's mean rating
train_user_means = train_user_item_matrix.mean(axis=1)
train_user_item_matrix = train_user_item_matrix.sub(train_user_means, axis=0).fillna(0)

# Convert to sparse matrix
train_user_item_sparse = csr_matrix(train_user_item_matrix)

print("Train-Test Split Complete")

# Step 4: Calculate User Similarity
# ---------------------------------

# Calculate user similarity matrix using cosine similarity
user_similarity = cosine_similarity(train_user_item_sparse)
user_similarity_df = pd.DataFrame(user_similarity, index=train_user_item_matrix.index, columns=train_user_item_matrix.index)
print("User Similarity Calculation Complete")

# Step 5: Implement Recommendation System
# ---------------------------------------

def user_based_recommendations(user_id, num_recommendations=5):
    # Ensure the user exists in the training data
    if user_id not in user_similarity_df.index:
        return pd.DataFrame(columns=['product_id', 'score'])

    # Find similar users
    similar_users = user_similarity_df[user_id].sort_values(ascending=False).iloc[1:num_recommendations+1].index
    
    # Get ratings of similar users
    similar_user_ratings = train_user_item_matrix.loc[similar_users]
    user_ratings = train_user_item_matrix.loc[user_id]
    
    # Recommend products that the user has not rated yet
    recommendations = similar_user_ratings.mean().loc[user_ratings[user_ratings == 0].index]
    recommendations = recommendations.sort_values(ascending=False).head(num_recommendations)

    # Map product_id to product name and brand
    recommendation_df = pd.DataFrame(recommendations).reset_index()
    recommendation_df.columns = ['product_id', 'score']
    recommendation_df = recommendation_df.merge(df_product[['product_id', 'product_name', 'brand_name']], on='product_id', how='left')
    
    return recommendation_df[['product_id', 'product_name', 'brand_name', 'score']]



# Step 6: Evaluate Recommendation System with RMSE
# ------------------------------------------------------------------------

def evaluate_recommendation_system(batch_size=100, sample_size=500):
    actual_ratings = []
    predicted_ratings = []

    test_sample = test_data.sample(n=sample_size, random_state=42)

    for i in range(0, len(test_sample), batch_size):
        batch = test_sample.iloc[i:i+batch_size]

        for _, row in batch.iterrows():
            user_id = row['author_id']
            product_id = row['product_id']
            actual_rating = row['rating']

            if user_id in user_similarity_df.index and product_id in train_user_item_matrix.columns:
                user_recommendations = user_based_recommendations(user_id)
                if product_id in user_recommendations['product_id'].values:
                    predicted_rating = user_recommendations[user_recommendations['product_id'] == product_id]['score'].values[0] + train_user_means[user_id]
                else:
                    predicted_rating = train_user_means[user_id]
            else:
                predicted_rating = train_user_item_matrix.values.mean()
            
            actual_ratings.append(actual_rating)
            predicted_ratings.append(predicted_rating)
    
    rmse = sqrt(mean_squared_error(actual_ratings, predicted_ratings))
    return rmse

# Example Usage with Formatted Output
# -----------------------------------

user_id = input("Enter the user ID for product recommendations: ")

if user_id in user_similarity_df.index:
    recommendations = user_based_recommendations(user_id, num_recommendations=5)
    print(f"\nRecommended Products for user {user_id}:\n")
    print(f"{'[ Brand ]':<20} {'[ Product Name ]':<60}")
    for _, row in recommendations.iterrows():
        print(f"{row['brand_name']:<20} {row['product_name']:<60}")
else:
    print(f"User ID {user_id} not found in the dataset.")

# Evaluate model performance with RMSE
print("\nEvaluating model performance (RMSE)...")
rmse_score = evaluate_recommendation_system()
print(f"RMSE of the recommendation system: {rmse_score}")


  df_review_1 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
  df_review_4 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
  df_review_5 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv', dtype={'product_id': str}, encoding='ISO-8859-1')


Data Loading and Filtering Complete
Train-Test Split Complete
User Similarity Calculation Complete

Recommended Products for user 11392228549:

[ Brand ]            [ Product Name ]                                            
Augustinus Bader     The Cream Cleansing Gel with TFC8 Gentle Cleanser           
Sunday Riley         Good Genes All-In-One AHA Lactic Acid Treatment             
Summer Fridays       Jet Lag Mask                                                
Dermalogica          Mini Daily Microfoliant Exfoliator                          
Summer Fridays       Mini Jet Lag Mask                                           

Evaluating model performance (RMSE)...
RMSE of the recommendation system: 0.9233250184502947


# Hybrid Filtering Code

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import mean_squared_error
from sklearn.feature_extraction.text import TfidfVectorizer
from math import sqrt

# Step 1: Load and Preprocess Data
# --------------------------------

# Load datasets
# Load datasets with specific encoding
df_product = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/product_info.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_1 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_2 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_250-500.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_3 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_500-750.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_4 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
df_review_5 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv', dtype={'product_id': str}, encoding='ISO-8859-1')


# Combine reviews and keep necessary columns
df_reviews_combined = pd.concat([df_review_1, df_review_2, df_review_3, df_review_4, df_review_5], ignore_index=True)
df_reviews_filtered = df_reviews_combined[['author_id', 'rating', 'product_id']]
df_product = df_product[['product_id', 'product_name', 'brand_name', 'ingredients', 'primary_category', 'secondary_category', 'tertiary_category']]

# Ensure product_id is the same type in both dataframes
df_product['product_id'] = df_product['product_id'].astype(str)
df_reviews_filtered['product_id'] = df_reviews_filtered['product_id'].astype(str)

# Filter out products with 'Mini Size' in any category
df_product = df_product[
    ~(df_product['primary_category'] == 'Mini Size') &
    ~(df_product['secondary_category'] == 'Mini Size') &
    ~(df_product['tertiary_category'] == 'Mini Size')
]

# Filter out users and products with fewer than n number of reviews
user_counts = df_reviews_filtered['author_id'].value_counts()
product_counts = df_reviews_filtered['product_id'].value_counts()
active_users = user_counts[user_counts >= 10].index  # Minimum 10 reviews for users
popular_products = product_counts[product_counts >= 10].index  # Minimum 10 reviews for products
df_reviews_filtered = df_reviews_filtered[df_reviews_filtered['author_id'].isin(active_users) & df_reviews_filtered['product_id'].isin(popular_products)]

# Merge review and product data
user_product_review = pd.merge(df_reviews_filtered, df_product, on='product_id')

# User-Centric Normalization
user_mean_rating = user_product_review.groupby('author_id')['rating'].mean()
user_product_review = user_product_review.join(user_mean_rating, on='author_id', rsuffix='_mean')
user_product_review['rating'] = user_product_review['rating'] - user_product_review['rating_mean']

# Step 2: Encoding and Creating User-Item Matrix
# ----------------------------------------------

# Encode user and product IDs to numerical values
encoder_user = LabelEncoder()
encoder_product = LabelEncoder()
user_product_review['original_author_id'] = user_product_review['author_id']  # Save original IDs for reference
user_product_review['author_id'] = encoder_user.fit_transform(user_product_review['author_id'].astype(str))
user_product_review['product_id'] = encoder_product.fit_transform(user_product_review['product_id'].astype(str))

# Create user-item rating matrix (CSR format for memory efficiency)
num_users = user_product_review['author_id'].nunique()
num_products = user_product_review['product_id'].nunique()
user_item_matrix = csr_matrix((user_product_review['rating'],
                               (user_product_review['author_id'], user_product_review['product_id'])),
                               shape=(num_users, num_products))

# Step 3: Train-Test Split
# ------------------------

# Split data into training and testing sets
train_data, test_data = train_test_split(user_product_review, test_size=0.2, random_state=42)
train_user_item_matrix = csr_matrix((train_data['rating'],
                                     (train_data['author_id'], train_data['product_id'])),
                                     shape=(num_users, num_products))
test_user_item_matrix = csr_matrix((test_data['rating'],
                                    (test_data['author_id'], test_data['product_id'])),
                                    shape=(num_users, num_products))

# Step 4: User-basd Collaborative Filtering (KNN model for user similarity)
# -------------------------------------------------------------------------

k = 10
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=k, n_jobs=-1)
model_knn.fit(train_user_item_matrix)

# Step 5: Content-Based Filtering (TD-IDF vectorization on product ingredients)
# -----------------------------------------------------------------------------

tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(df_product['ingredients'].astype(str))
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Step 6: Generate Recommendations
# --------------------------------

# Get user input for generating recommendations
pd.set_option('display.max_colwidth', None)
original_user_id = str(input("Enter the user ID for product recommendations: ")) # valid id: 11392228549
if original_user_id not in user_product_review['original_author_id'].astype(str).values:
    raise ValueError("The user ID you entered is invalid. Please try again.")

user_idx = encoder_user.transform([original_user_id])[0]

# Find k similar users, excluding the user themselves
if user_idx < num_users:
    distances, indices = model_knn.kneighbors(train_user_item_matrix[user_idx], n_neighbors=k + 1)
    similar_users = indices.flatten()[1:]  # Exclude the user themselves

    # Collaborative Filtering scores for recommendation
    product_scores_cf = np.zeros(num_products)
    for similar_user in similar_users[:k]:
        product_scores_cf += train_user_item_matrix[similar_user].toarray().flatten()

    # Combine Collaborative Filtering and Content-Based scores
    hybrid_scores = product_scores_cf
    for product_id in range(num_products):
        if product_id < cosine_sim.shape[1]:
            hybrid_scores[product_id] += cosine_sim[user_idx % cosine_sim.shape[0], product_id]

    # Recommend top products
    recommended_products = np.argsort(hybrid_scores)[-10:][::-1]
    recommended_product_info = df_product[df_product['product_id'].isin(encoder_product.inverse_transform(recommended_products))]

    # Display top 5 unique recommended products
    pd.set_option('display.width', 1000)
    pd.set_option('display.max_colwidth', None)
    recommended_product_info = recommended_product_info[['brand_name', 'product_name']].drop_duplicates().reset_index(drop=True).head(5)
    recommended_product_info = recommended_product_info.rename(columns={'brand_name': '[ Brand ]', 'product_name': '[ Product Name ]'})
    print(f"\nRecommended Products for user {original_user_id} :\n")
    print(recommended_product_info.to_string(index=False))

else:
    print(f"User ID {user_idx} is invalid. Please try again.")

# Step 7: Evaluation - RMSE
# -------------------------

y_true = test_data['rating'] # actual ratings from test set

y_pred = [] # predicted ratings for test set
for user_id, product_id in zip(test_data['author_id'], test_data['product_id']):
    if train_user_item_matrix[user_id, product_id] != 0:
        y_pred.append(train_user_item_matrix[user_id, product_id])
    else:
        y_pred.append(np.mean(train_user_item_matrix[user_id].data))  # Use user's mean rating if specific prediction is unavailable

# Remove NaN values from predictions and true values
y_pred = np.array(y_pred)
valid_idx = ~np.isnan(y_pred)
y_true = y_true[valid_idx]
y_pred = y_pred[valid_idx]

# Calculate Root Mean Squared Error (RMSE) for evaluation
rmse = sqrt(mean_squared_error(y_true, y_pred))
print(f"\nRMSE of Recommendation System: {rmse}")

  df_review_1 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_0-250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
  df_review_4 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_750-1250.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
  df_review_5 = pd.read_csv('C:/Users/lms26/MachineLearning/dataset/reviews_1250-end.csv', dtype={'product_id': str}, encoding='ISO-8859-1')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_reviews_filtered['product_id'] = df_reviews_filtered['product_id'].astype(str)



Recommended Products for user 1979739038 :

     [ Brand ]                             [ Product Name ]
      CLINIQUE           Moisture Surge Overnight Face Mask
Drunk Elephant      T.L.C. Sukari Babyfacial AHA + BHA Mask
         fresh     Sugar Recovery Lip Mask Advanced Therapy
   Glow Recipe   Watermelon Glow PHA + BHA Pore-Tight Toner
     Herbivore Blue Tansy BHA and Enzyme Pore Refining Mask

RMSE of Recommendation System: 0.7983647394160953
