# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import seaborn as sns
import warnings
from sklearn.decomposition import TruncatedSVD
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model
from keras.optimizers import Adam
from keras.layers import Input, Dense, Dropout
from keras.models import Model
import pyodbc # pip install pyodbc
from pymongo import MongoClient
from bson import json_util
import json

warnings.filterwarnings('ignore')

In [3]:
def parse_json(data):
    return json.loads(json_util.dumps(data, indent=4))

def db_mongo_find_a_document(Connection_String, dbName, dbCollection, query):

    #Connecting to mongodb compass
    client = MongoClient(Connection_String)   

    #Accessing the database
    db = client[dbName]

    #Accessing the collection within the database
    collection = db[dbCollection]
    
    # finding a particular document
    x = collection.find(query) 
    res = []
    
    for data in x:
        res.append(data)

    return parse_json(res)

def db_mongo_find_documents(Connection_String, dbName, dbCollection, query):

    #Connecting to mongodb compass
    client = MongoClient(Connection_String)   

    #Accessing the database
    db = client[dbName]

    #Accessing the collection within the database
    collection = db[dbCollection]
    
    # finding a particular document
    x = collection.find(query) 
    res = []
    
    for data in x:
        res.append(data)

    return parse_json(res)

In [5]:
productRes = db_mongo_find_documents('mongodb+srv://qwerty:Qwerty123@myecomcluster.jqhkqxz.mongodb.net/?retryWrites=true&w=majority&ssl=true','MyDb','products', {})
products_dataset = pd.DataFrame(productRes)

In [12]:
reviewRes = db_mongo_find_documents('mongodb+srv://qwerty:Qwerty123@myecomcluster.jqhkqxz.mongodb.net/?retryWrites=true&w=majority&ssl=true','MyDb','reviews', {})
review_dataset = pd.DataFrame(reviewRes)

In [9]:
userRes = db_mongo_find_documents('mongodb+srv://qwerty:Qwerty123@myecomcluster.jqhkqxz.mongodb.net/?retryWrites=true&w=majority&ssl=true','MyDb','users', {})
users_dataset = pd.DataFrame(userRes)

# Recommendation Approach 2 (Autoencoder based Collaborative Filter Model)
## Giving recommended produts to a particular user by taking user_id as parameter

In [43]:
# Train Data (subset of all interactions)
amazon_review_data = review_dataset
amazon_review_data = amazon_review_data[['user_id', 'product_id', 'rating']]
amazon_review_data = amazon_review_data.drop_duplicates()
amazon_review_data.head(5)

Unnamed: 0,user_id,product_id,rating
0,AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,B08HDJ86NZ,5.0
1,AG5HTSFRRE6NL3M5SGCUQBP7YSCA,B08HDJ86NZ,5.0
2,AH725ST5NW2Y4JZPKUNTIJCUK2BA,B08HDJ86NZ,5.0
3,AHV3TXIFCJPMS4D5JATCEUR266MQ,B08HDJ86NZ,4.5
4,AGWIGDEMFIIUAOXYY2QATNBSUGHA,B08HDJ86NZ,4.3


In [44]:
# Creating a sparse pivot table with users in rows and items in columns
users_items_matrix_df = amazon_review_data.pivot(index   = 'user_id', 
                                 columns = 'product_id', 
                                 values  = 'rating').fillna(0)
users_items_matrix_df.head(10)

product_id,B002PD61Y4,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B0088TKTY2,B008FWZGSG,B008YW8M0G,B00935MD1C,...,B0BNDGL26T,B0BNDRK886,B0BNQMF152,B0BNV7JM5Y,B0BNXFDTZ2,B0BP18W8TM,B0BP7XLX48,B0BPCJM7TB,B0BQRJ3C47,B0BR4F878Q
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AE22E2AXODSPNK3EBIHNGYS5LOSA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE22MK2NXQD3ZARLIOL3SLD4GU6A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE27PFEMMMJS44GT27KPL6VUOQUQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE27QPJRG7545VJX7LYRK2EO3I4Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE27UOZENYSWCQVQRRUQIV2ZM7VA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE2BZUBJGOBQS2A3U66VXDUV5FRQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE2CMOCWNJRTN53KESNTBUNXV37A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE2E632GMYL5U2ESNXOX5UT5D34A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE2EO67O5G5BPFX5QGUUBOF22LQQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE2FPXNWO4ROL5WOAVLZWUE4OIAQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
def autoEncoder(X):
    '''
    Autoencoder for Collaborative Filter Model
    '''

    # Input
    input_layer = Input(shape=(X.shape[1],), name='UserScore')
    
    # Encoder
    # -----------------------------
    enc = Dense(512, activation='selu', name='EncLayer1')(input_layer)

    # Latent Space
    # -----------------------------
    lat_space = Dense(256, activation='selu', name='LatentSpace')(enc)
    lat_space = Dropout(0.8, name='Dropout')(lat_space) # Dropout

    # Decoder
    # -----------------------------
    dec = Dense(512, activation='selu', name='DecLayer1')(lat_space)

    # Output
    output_layer = Dense(X.shape[1], activation='linear', name='UserScorePred')(dec)

    # this model maps an input to its reconstruction
    model = Model(input_layer, output_layer)    
    
    return model

In [47]:
# input
X = users_items_matrix_df.values
y = users_items_matrix_df.values

In [48]:
# Build model
model = autoEncoder(X)
model.compile(optimizer = 'Adam', loss='mse')
model.summary()

In [49]:
# Predict new Matrix Interactions, set score zero on visualized games
new_matrix = model.predict(X) * (X == 0)

[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step


In [50]:
# converting the reconstructed matrix back to a Pandas dataframe
new_users_items_matrix_df  = pd.DataFrame(new_matrix, 
                                          columns = users_items_matrix_df.columns, 
                                          index   = users_items_matrix_df.index)
new_users_items_matrix_df.head()

product_id,B002PD61Y4,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B0088TKTY2,B008FWZGSG,B008YW8M0G,B00935MD1C,...,B0BNDGL26T,B0BNDRK886,B0BNQMF152,B0BNV7JM5Y,B0BNXFDTZ2,B0BP18W8TM,B0BP7XLX48,B0BPCJM7TB,B0BQRJ3C47,B0BR4F878Q
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AE22E2AXODSPNK3EBIHNGYS5LOSA,-0.300942,-0.164598,-0.40736,-0.621365,-0.02996,0.271363,-0.043799,0.21066,0.379368,-0.001755,...,-0.136387,0.668819,-0.475646,-0.02514,-0.51837,-0.590264,0.199718,-0.615582,-0.460917,0.475156
AE22MK2NXQD3ZARLIOL3SLD4GU6A,-0.20369,0.227014,-0.084425,0.630152,0.019776,-0.513784,0.396148,-0.207232,-0.104944,0.200661,...,-0.120622,0.015913,-0.119759,0.180941,0.339088,0.44303,0.453643,0.604156,-0.343788,0.596964
AE27PFEMMMJS44GT27KPL6VUOQUQ,0.002298,0.196408,-0.481381,0.152345,-0.076602,-0.318518,0.317565,-0.459665,-0.186093,0.152163,...,-0.311254,0.223321,-0.087847,0.192138,-0.056887,0.340588,0.184763,0.829049,0.203771,0.680505
AE27QPJRG7545VJX7LYRK2EO3I4Q,-0.495807,-0.205345,0.313031,0.052459,-0.105447,-0.0332,-0.395583,-0.010363,-0.2032,-0.032382,...,0.13339,0.121068,-0.019014,-0.056713,-0.024559,0.141687,-0.084589,0.043008,-0.037566,0.034743
AE27UOZENYSWCQVQRRUQIV2ZM7VA,0.370383,-0.334383,-0.02492,0.674401,0.052985,-0.683136,-0.155775,-0.01564,0.187036,0.630314,...,0.029763,-0.424589,-0.462523,-0.916621,0.087366,0.04836,0.051413,-0.259736,-0.363036,0.298936


In [54]:
# Content Data of products
amazon_review_data_ref = products_dataset
amazon_review_data_ref = amazon_review_data_ref.drop(['_id','product_description','category','img_link','product_link','rating',"no_of_ratings","product_recommendations","price"], axis=1)
amazon_review_data_ref.head(4)


Unnamed: 0,product_id,product_name
0,B08HDJ86NZ,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...
1,B08Y1TFSP6,pTron Solero TB301 3A Type-C Data and Fast Cha...
2,B08DDRGWTJ,MI Usb Type-C Cable Smartphone (Black)
3,B08CF3D7QR,Portronics Konnect L POR-1081 Fast Charging 3A...


In [95]:
def recommender_for_user(user_id, interact_matrix, df_content, topn = 10):
    '''
    Recommender Products for UserWarning
    '''
    pred_scores = interact_matrix.loc[user_id].values

    df_scores   = pd.DataFrame({'product_id': list(users_items_matrix_df.columns), 
                               'score': pred_scores})

    df_rec      = df_scores.set_index('product_id')\
                    .join(df_content.set_index('product_id'))\
                    .sort_values('score', ascending=False)\
                    .head(topn)[['score', 'product_name']]
    
    return df_rec[df_rec.score > 0].reset_index()

In [96]:
# Games previously purchased by the user
testTest = recommender_for_user(
    user_id         = "AE22E2AXODSPNK3EBIHNGYS5LOSA", 
    interact_matrix = users_items_matrix_df, 
    df_content      = amazon_review_data_ref)

In [99]:
testTest

Unnamed: 0,product_id,score,product_name
0,B0B31FR4Y2,5.0,"Boult Audio Omega with 30dB ANC+ ENC, 32H Play..."


In [100]:
userProductRecommendationList = []
for index, row in testTest.iterrows():
    productId = row['product_id']
    score = row["score"]
    productName = row["product_name"]
    obj = {"product_id":productId,"score":score,"product_name":productName}
    print(obj)    
    


{'product_id': 'B0B31FR4Y2', 'score': 5.0, 'product_name': 'Boult Audio Omega with 30dB ANC+ ENC, 32H Playtime, 45ms Latency Gaming Mode, Quad Mic Zen ENC, 3 Equalizer Modes, ANC, Type-C Fast Charging, IPX5 True Wireless in Ear Bluetooth Earbuds (Black)'}


In [102]:
userProductRecommendationList = []
for index, row in users_dataset.iterrows():
    userId = row['user_id']
    tempDf = recommender_for_user(user_id= userId, interact_matrix = users_items_matrix_df,df_content= amazon_review_data_ref)
    productsRecommendedListTemp = []
    for innerIndex, innerDfRow in tempDf.iterrows():
        productId = innerDfRow['product_id']
        score = innerDfRow["score"]
        productName = innerDfRow["product_name"]
        obj = {"product_id":productId,"score":score,"product_name":productName}
        productsRecommendedListTemp.append(obj)
    fullObj = {"user_id":userId,"recommended_products":productsRecommendedListTemp}
    userProductRecommendationList.append(fullObj)
    


In [106]:
count =0
for data in userProductRecommendationList:
    if len(data['recommended_products']) > 3:
        count = count+1
        
print(count)

0


# Recommendation Approach 4 (Variational autoEncoder - By User Id)
## Spliting the data into train and test and fitting into the autoencoder

In [108]:
user_product_rating_dataset = review_dataset.drop(['_id','review_id','review_title','review_content'], axis=1)

# Create user-item interaction matrix
user_product_matrix = user_product_rating_dataset.pivot(index='user_id', columns='product_id', values='rating').fillna(0).values

# Split the data into training and testing sets
train_data, test_data = train_test_split(user_product_matrix, test_size=0.2, random_state=42)

# Build the autoencoder model
num_users, num_items = user_product_matrix.shape
latent_dim = 50

input_layer = Input(shape=(num_items,))
encoded = Dense(latent_dim, activation='relu')(input_layer)
decoded = Dense(num_items, activation='sigmoid')(encoded)

autoencoder = Model(inputs=input_layer, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

# Train the autoencoder
autoencoder.fit(train_data, train_data, epochs=10, batch_size=64, shuffle=True, validation_data=(test_data, test_data))

# Extract user and item representations from the encoder part of the autoencoder
encoder = Model(inputs=input_layer, outputs=encoded)
user_embeddings = encoder.predict(user_product_matrix)

Epoch 1/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.2419 - val_loss: 0.1352
Epoch 2/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0999 - val_loss: 0.0444
Epoch 3/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0377 - val_loss: 0.0308
Epoch 4/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0288 - val_loss: 0.0278
Epoch 5/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0269 - val_loss: 0.0267
Epoch 6/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0262 - val_loss: 0.0262
Epoch 7/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0257 - val_loss: 0.0259
Epoch 8/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0253 - val_loss: 0.0258
Epoch 9/10
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [109]:
# Example: Recommend items for a specific user
user_id = "AENGU523SXMOS7JPDTW52PNNVWGQ"  # Replace with the desired user ID
user_index = user_product_rating_dataset.index[user_product_rating_dataset['user_id'] == user_id].tolist()[0]
user_representation = user_embeddings[user_index - 1]

# Calculate the predicted ratings for all items
predicted_ratings = np.dot(user_embeddings, user_representation)

# Display top N recommendations
top_n = np.argsort(predicted_ratings)[::-1][:10]

# print(f"Top recommendations for User {user_id}: {top_n + 1}")
print(f"Top recommendations for User {user_id}:")
for x in top_n:
    print(user_product_rating_dataset.loc[x+1,"product_id"])
    

Top recommendations for User AENGU523SXMOS7JPDTW52PNNVWGQ:
B09LHYZ3GJ
B09KH58JZR
B015ZXUDD0
B09MTLG4TP
B07YNTJ8ZM
B09XXZXQC1
B09JPC82QC
B09PLFJ7ZW
B07N42JB4S
B09BNXQ6BR


In [None]:
userProductRecommendationList = []
    for index, row in users_dataset.iterrows():
        userId = row['user_id']
        tempDf = recommender_for_user(users_items_matrix_df,user_id= userId, interact_matrix = users_items_matrix_df,df_content= amazon_review_data_ref)
        productsRecommendedListTemp = []
        for innerIndex, innerDfRow in tempDf.iterrows():
            productId = innerDfRow['product_id']
            score = innerDfRow["score"]
            productName = innerDfRow["product_name"]
            obj = {"product_id":productId,"score":score,"product_name":productName}
            productsRecommendedListTemp.append(obj)
        fullObj = {"user_id":userId,"recommended_products":productsRecommendedListTemp}
        userProductRecommendationList.append(fullObj)