In [1]:
import pandas as pd
import re
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sqlalchemy import inspect
import ast
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from connection import create_db_connection

In [2]:
# Membuat koneksi ke database
engine = create_db_connection()

# Inspect the database to get a list of tables
inspector = inspect(engine)
tables = inspector.get_table_names()

# Check if the connection was successful and if tables were retrieved
if tables:
    print("Connection to the database was successful.")
    print("Tables in the database:", tables)
else:
    print("Failed to connect to the database.")

Connection to the database was successful.
Tables in the database: ['categories', 'failed_jobs', 'migrations', 'password_reset_tokens', 'personal_access_tokens', 'product_images', 'product_reviews', 'products', 'profiles', 'subcategories', 'users']


In [3]:
# Query untuk mengambil data dari tabel products, product_reviews, dan profiles
q_profiles_rating = """
    SELECT 
        u.id AS user_id, u.name AS user_name, pf.gender, pf.age, pf.skin_type_face, 
        pf.hair_issue, pf.skin_type_body, pf.allergy_history, pf.preferred_products, 
        pf.avoided_products, pf.specific_needs,
        pr.id AS review_id, pr.product_id, pr.rating
    FROM 
        users u
    LEFT JOIN 
        profiles pf ON u.id = pf.user_id
    LEFT JOIN 
        product_reviews pr ON u.id = pr.user_id;
"""

# Mengambil data dari database menggunakan query
dt_profiles_rating_df = pd.read_sql(q_profiles_rating, engine)

# Menggantikan nilai-nilai yang kosong dengan nilai 0
dt_profiles_rating_df['review_id'] = dt_profiles_rating_df['review_id'].fillna(0).astype(int)
dt_profiles_rating_df['product_id'] = dt_profiles_rating_df['product_id'].fillna(0).astype(int)
dt_profiles_rating_df['rating'] = dt_profiles_rating_df['rating'].fillna(0).astype(float)

display(dt_profiles_rating_df.dtypes)
display(dt_profiles_rating_df)

user_id                 int64
user_name              object
gender                 object
age                    object
skin_type_face         object
hair_issue             object
skin_type_body         object
allergy_history        object
preferred_products     object
avoided_products       object
specific_needs         object
review_id               int32
product_id              int32
rating                float64
dtype: object

Unnamed: 0,user_id,user_name,gender,age,skin_type_face,hair_issue,skin_type_body,allergy_history,preferred_products,avoided_products,specific_needs,review_id,product_id,rating
0,1,Operator,,,,,,,,,,0,0,0.0
1,2,tester1,Laki-laki,18-25,kering,kering,kering,"[""tidak_ada_alergi""]","[""bahan_alami"", ""pewangi""]","[""vegan"", ""paraben"", ""sls""]","[""meratakan_warna_kulit"", ""menghilangkan_jeraw...",0,0,0.0
2,3,Elisa Regina Simanjuntak,Perempuan,18-25,berminyak,berminyak,normal,"[""tidak_ada_alergi""]","[""bahan_alami""]","[""pewangi""]","[""meratakan_warna_kulit"",""menghilangkan_jerawa...",2,44,5.0
3,3,Elisa Regina Simanjuntak,Perempuan,18-25,berminyak,berminyak,normal,"[""tidak_ada_alergi""]","[""bahan_alami""]","[""pewangi""]","[""meratakan_warna_kulit"",""menghilangkan_jerawa...",5,316,5.0
4,3,Elisa Regina Simanjuntak,Perempuan,18-25,berminyak,berminyak,normal,"[""tidak_ada_alergi""]","[""bahan_alami""]","[""pewangi""]","[""meratakan_warna_kulit"",""menghilangkan_jerawa...",14,60,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,67,Wilona,Perempuan,18-25,kering,kering,kering,"[""pewangi"",""kandungan_kimia""]","[""bahan_alami""]","[""pewangi""]","[""menghidrasi"",""perlindungan_matahari""]",435,315,4.0
445,67,Wilona,Perempuan,18-25,kering,kering,kering,"[""pewangi"",""kandungan_kimia""]","[""bahan_alami""]","[""pewangi""]","[""menghidrasi"",""perlindungan_matahari""]",436,166,4.0
446,67,Wilona,Perempuan,18-25,kering,kering,kering,"[""pewangi"",""kandungan_kimia""]","[""bahan_alami""]","[""pewangi""]","[""menghidrasi"",""perlindungan_matahari""]",437,168,5.0
447,67,Wilona,Perempuan,18-25,kering,kering,kering,"[""pewangi"",""kandungan_kimia""]","[""bahan_alami""]","[""pewangi""]","[""menghidrasi"",""perlindungan_matahari""]",438,262,4.0


In [4]:
# Preprocessing data teks
def preprocess_text(text):
    # Case folding
    text = str(text).lower() 
    # Punctuational removal
    text = re.sub(r'[^\w\s]', '', text)
    # Tokenizing
    tokens = word_tokenize(text)
    # Stop words removal
    stop_words = set(stopwords.words('indonesian'))  # Menggunakan stop words bahasa Indonesia
    tokens = [word for word in tokens if word not in stop_words]
    # Stemming
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    tokens = [stemmer.stem(word) for word in tokens]
    # Menggabungkan kembali token menjadi kalimat
    text = ' '.join(tokens)
    return text

In [5]:
# Preprocessing kolom teks
text_columns = ['gender','skin_type_face', 'hair_issue', 
                'skin_type_body', 'allergy_history', 'preferred_products', 
                'avoided_products', 'specific_needs']
for column in text_columns:
    dt_profiles_rating_df[column] = dt_profiles_rating_df[column].apply(preprocess_text)

display(dt_profiles_rating_df)

Unnamed: 0,user_id,user_name,gender,age,skin_type_face,hair_issue,skin_type_body,allergy_history,preferred_products,avoided_products,specific_needs,review_id,product_id,rating
0,1,Operator,none,,none,none,none,none,none,none,none,0,0,0.0
1,2,tester1,lakilaki,18-25,kering,kering,kering,tidak ada alergi,bahan alami wangi,vegan paraben sls,rata warna kulit hilang jerawat hidrasi lindun...,0,0,0.0
2,3,Elisa Regina Simanjuntak,perempuan,18-25,minyak,minyak,normal,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,2,44,5.0
3,3,Elisa Regina Simanjuntak,perempuan,18-25,minyak,minyak,normal,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,5,316,5.0
4,3,Elisa Regina Simanjuntak,perempuan,18-25,minyak,minyak,normal,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,14,60,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,67,Wilona,perempuan,18-25,kering,kering,kering,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,435,315,4.0
445,67,Wilona,perempuan,18-25,kering,kering,kering,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,436,166,4.0
446,67,Wilona,perempuan,18-25,kering,kering,kering,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,437,168,5.0
447,67,Wilona,perempuan,18-25,kering,kering,kering,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,438,262,4.0


In [6]:
print(dt_profiles_rating_df["gender"].unique())
print(dt_profiles_rating_df["skin_type_face"].unique())
print(dt_profiles_rating_df["hair_issue"].unique())
print(dt_profiles_rating_df["skin_type_body"].unique())

['none' 'lakilaki' 'perempuan']
['none' 'kering' 'minyak' 'normal' 'sensitif' 'kombinasi']
['none' 'kering' 'minyak' 'ketombe' 'normal' 'rontok' 'cabang']
['none' 'kering' 'normal' 'minyak' 'kombinasi']


In [7]:
# convert skin type, hair issue, skin type body to numeric value (int)
def convert_skin_type_face(skin_type): 
    skin_type_dict = {'normal': 0, 'kering': 1, 'minyak': 2, 'sensitif': 3, 'kombinasi': 4}
    return skin_type_dict.get(skin_type, 0)

def convert_hair_issue(hair_issue): 
    hair_issue_dict = {'normal': 1, 'ketombe': 1, 'kering': 2, 'minyak': 3, 'rontok': 4, 'cabang': 5}
    return hair_issue_dict.get(hair_issue, 0)

def convert_skin_type_body(skin_type): 
    skin_type_dict = {'normal': 0, 'kering': 1, 'minyak': 2, 'kombinasi': 3} 
    return skin_type_dict.get(skin_type, 0)

In [8]:
dt_profiles_rating_df["skin_type_face"] = dt_profiles_rating_df["skin_type_face"].apply(convert_skin_type_face) 
dt_profiles_rating_df["hair_issue"] = dt_profiles_rating_df["hair_issue"].apply(convert_hair_issue) 
dt_profiles_rating_df["skin_type_body"] = dt_profiles_rating_df["skin_type_body"].apply(convert_skin_type_body)
display(dt_profiles_rating_df)

Unnamed: 0,user_id,user_name,gender,age,skin_type_face,hair_issue,skin_type_body,allergy_history,preferred_products,avoided_products,specific_needs,review_id,product_id,rating
0,1,Operator,none,,0,0,0,none,none,none,none,0,0,0.0
1,2,tester1,lakilaki,18-25,1,2,1,tidak ada alergi,bahan alami wangi,vegan paraben sls,rata warna kulit hilang jerawat hidrasi lindun...,0,0,0.0
2,3,Elisa Regina Simanjuntak,perempuan,18-25,2,3,0,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,2,44,5.0
3,3,Elisa Regina Simanjuntak,perempuan,18-25,2,3,0,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,5,316,5.0
4,3,Elisa Regina Simanjuntak,perempuan,18-25,2,3,0,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,14,60,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,67,Wilona,perempuan,18-25,1,2,1,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,435,315,4.0
445,67,Wilona,perempuan,18-25,1,2,1,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,436,166,4.0
446,67,Wilona,perempuan,18-25,1,2,1,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,437,168,5.0
447,67,Wilona,perempuan,18-25,1,2,1,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,438,262,4.0


In [9]:
# Fungsi untuk melakukan min-max scaling
def min_max_scaling(column):
    min_value = column.min()
    max_value = column.max()
    return (column - min_value) / (max_value - min_value)

# Melakukan min-max scaling pada kolom
dt_profiles_rating_df["skin_type_face"] = min_max_scaling(dt_profiles_rating_df["skin_type_face"])
dt_profiles_rating_df["hair_issue"] = min_max_scaling(dt_profiles_rating_df["hair_issue"])
dt_profiles_rating_df["skin_type_body"] = min_max_scaling(dt_profiles_rating_df["skin_type_body"])

#  Membuat nilai numerik 2 angka dibelakang koma
dt_profiles_rating_df = dt_profiles_rating_df.round(2)

# Menampilkan data
display(dt_profiles_rating_df)

Unnamed: 0,user_id,user_name,gender,age,skin_type_face,hair_issue,skin_type_body,allergy_history,preferred_products,avoided_products,specific_needs,review_id,product_id,rating
0,1,Operator,none,,0.00,0.0,0.00,none,none,none,none,0,0,0.0
1,2,tester1,lakilaki,18-25,0.25,0.4,0.33,tidak ada alergi,bahan alami wangi,vegan paraben sls,rata warna kulit hilang jerawat hidrasi lindun...,0,0,0.0
2,3,Elisa Regina Simanjuntak,perempuan,18-25,0.50,0.6,0.00,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,2,44,5.0
3,3,Elisa Regina Simanjuntak,perempuan,18-25,0.50,0.6,0.00,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,5,316,5.0
4,3,Elisa Regina Simanjuntak,perempuan,18-25,0.50,0.6,0.00,tidak ada alergi,bahan alami,wangi,rata warna kulitmenghilangkan jerawatmenghidra...,14,60,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444,67,Wilona,perempuan,18-25,0.25,0.4,0.33,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,435,315,4.0
445,67,Wilona,perempuan,18-25,0.25,0.4,0.33,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,436,166,4.0
446,67,Wilona,perempuan,18-25,0.25,0.4,0.33,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,437,168,5.0
447,67,Wilona,perempuan,18-25,0.25,0.4,0.33,pewangikandungan kimia,bahan alami,wangi,menghidrasiperlindungan matahari,438,262,4.0


In [10]:
# Hitung rata rata dari user yang ada
user_vectors = dt_profiles_rating_df.groupby('user_id')[['skin_type_face', 'hair_issue', 'skin_type_body']].mean().round(2)

# Rset index matriks
user_vectors.reset_index(inplace=True)
display(user_vectors)

Unnamed: 0,user_id,skin_type_face,hair_issue,skin_type_body
0,1,0.00,0.0,0.00
1,2,0.25,0.4,0.33
2,3,0.50,0.6,0.00
3,4,0.00,0.0,0.00
4,5,0.00,0.0,0.00
...,...,...,...,...
63,64,0.75,0.8,0.33
64,65,1.00,0.4,0.33
65,66,0.00,0.8,0.33
66,67,0.25,0.4,0.33


In [11]:
# Filter hanya data user_id yang ada dalam file CSV
user_vectors = user_vectors[user_vectors['user_id'].isin(dt_profiles_rating_df['user_id'].unique())]

# Simpan user_id sebelum dihapus dari user_vectors
user_ids = user_vectors['user_id']

# Hapus kolom user_id sebelum menghitung similaritas
user_vectors = user_vectors.drop('user_id', axis=1)

# Hitung similaritas kosinus antara semua pasangan pengguna
user_similarities = cosine_similarity(user_vectors)

# Konversi matriks similaritas ke dataframe pandas
user_similarities = pd.DataFrame(user_similarities, index=user_ids, columns=user_ids).round(2)
display(user_similarities)

user_id,1,2,3,4,5,6,7,8,9,10,...,59,60,61,62,63,64,65,66,67,68
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
2,0.0,1.00,0.81,0.0,0.0,0.0,0.69,0.87,0.56,0.78,...,0.81,0.99,0.82,0.77,0.69,0.94,0.80,0.86,1.00,0.0
3,0.0,0.81,1.00,0.0,0.0,0.0,0.77,0.86,0.78,0.65,...,1.00,0.83,0.42,0.98,0.77,0.96,0.84,0.71,0.81,0.0
4,0.0,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
5,0.0,0.00,0.00,0.0,0.0,0.0,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,0.0,0.94,0.96,0.0,0.0,0.0,0.70,0.95,0.78,0.81,...,0.96,0.94,0.67,0.95,0.70,1.00,0.91,0.76,0.94,0.0
65,0.0,0.80,0.84,0.0,0.0,0.0,0.36,0.99,0.94,0.95,...,0.84,0.76,0.71,0.92,0.36,0.91,1.00,0.44,0.80,0.0
66,0.0,0.86,0.71,0.0,0.0,0.0,0.92,0.54,0.18,0.36,...,0.71,0.91,0.50,0.58,0.92,0.76,0.44,1.00,0.86,0.0
67,0.0,1.00,0.81,0.0,0.0,0.0,0.69,0.87,0.56,0.78,...,0.81,0.99,0.82,0.77,0.69,0.94,0.80,0.86,1.00,0.0


In [12]:
# Ambil semua data produk berdasarkan id
items = dt_profiles_rating_df['product_id'].unique()

# Ambil semuda data user berdasarkan id sebelum dilakukan proses data
unique_user_ids = dt_profiles_rating_df['user_id'].unique()
display(items)
print()
display(unique_user_ids)

array([  0,  44, 316,  60, 215,  22, 311, 286,  82, 140, 112,  63,  51,
        21,  18,   5,   8,  68,  92,  85, 216, 307, 280, 276, 236,  58,
        24, 300, 315,   7, 294, 134,  42,  43, 123, 167, 168, 199,  84,
       283,  16, 309,  47,  20, 263,  35, 135, 219,  61,  78,  93,  62,
        39, 282, 165, 169, 261, 191,   6, 295, 122, 314,  46, 312, 306,
        59, 188,  36, 138, 298,  81, 166, 310,  56,  23, 161, 207, 252,
        88,  32, 142,  40, 179,  95, 141, 163, 170, 120, 208,  94, 302,
       228, 193, 147,   4, 171,  69,  79,  87, 114, 274, 260, 160,   9,
       304, 234, 218, 285, 299,  14,  15,  37,  25,  83, 121, 156, 159,
        66, 164,  57, 211,  13, 305,  50, 116, 149,  33,  12, 175, 130,
       273, 217,  30,  48, 313, 259, 137,  73, 232,  45,  65,  75,  34,
       187,  67, 265, 255,  28, 233, 214,  64,  74,  96, 278, 267, 258,
       195, 229, 292, 254, 251, 227, 178, 139,  99,  98,  97,  71,  55,
        26, 146, 262])




array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68],
      dtype=int64)

In [13]:
def get_recommendations_for_new_user(user_id):
    # Check user profile and give recommendations based on profile similarity
    # Or provide default recommendations for new users
    if user_profile_similar_to_existing(user_id):
        return recommend_based_on_profile(user_id)
    else:
        return recommend_default_products()

def user_profile_similar_to_existing(user_id):
    # Misalnya, kita akan mengasumsikan bahwa profil pengguna baru dianggap
    # mirip dengan profil pengguna yang sudah ada jika mereka memiliki jenis kelamin yang sama
    existing_user_profiles = dt_profiles_rating_df[dt_profiles_rating_df['user_id'] != user_id]
    new_user_profile = dt_profiles_rating_df[dt_profiles_rating_df['user_id'] == user_id].iloc[0]
    
    # Cek apakah ada pengguna dengan jenis kelamin yang sama dengan pengguna baru
    similar_users = existing_user_profiles[existing_user_profiles['gender'] == new_user_profile['gender']]
    return not similar_users.empty

def recommend_based_on_profile(user_id):
    # Misalnya, kita akan memberikan rekomendasi berdasarkan produk yang sering dibeli oleh pengguna dengan profil serupa
    user_profile = dt_profiles_rating_df[dt_profiles_rating_df['user_id'] == user_id].iloc[0]
    similar_users = dt_profiles_rating_df[dt_profiles_rating_df['gender'] == user_profile['gender']]
    similar_users_ratings = similar_users.groupby('product_id')['rating'].mean().sort_values(ascending=False)
    return similar_users_ratings.index.tolist()

def recommend_default_products():
    # Misalnya, kita akan memberikan rekomendasi produk yang populer secara keseluruhan
    popular_products = dt_profiles_rating_df.groupby('product_id')['rating'].count().sort_values(ascending=False)
    return popular_products.index.tolist()

In [15]:
# Initialize a dictionary to store the recommendation values for each user
recommendations = {}

# Iterate over each user ID from the preprocessed data
for user_id in unique_user_ids:
    # Calculate the sum of similarities for the current user
    similarity_sum = user_similarities.loc[user_id].sum()
    predictions = {}

    if similarity_sum > 0:  # Check if similarity_sum is greater than 0 to avoid division by zero
        for item in items:
            # Filter ratings for the item from other users
            other_user_ratings = dt_profiles_rating_df[dt_profiles_rating_df['product_id'] == item]

            # Calculate the sum of ratings weighted by similarity values
            rating_sum = 0
            for other_user_id in other_user_ratings['user_id']:
                if other_user_id != user_id:
                    rating = other_user_ratings[other_user_ratings['user_id'] == other_user_id]['rating'].values[0]
                    similarity = user_similarities.loc[user_id, other_user_id]
                    rating_sum += rating * similarity

            # Calculate the predicted rating for the item
            predictions[item] = rating_sum / similarity_sum

    # Store the predictions for the current user
    recommendations[user_id] = predictions

# Initialize recommendations_df
recommendations_df = pd.DataFrame(recommendations)
recommendations_df = recommendations_df.T
recommendations_df = recommendations_df.round(2)

# Fill NaN values and 0 values with 0
recommendations_df = recommendations_df.fillna(0).replace(0, 0)

# Check if user is new or hasn't rated any products
new_users = [user_id for user_id in unique_user_ids if user_id not in recommendations_df.index]

# Iterate over new users or users who haven't rated any products
for user_id in new_users:
    # Give recommendations based on user profile or default recommendations
    recommendations[user_id] = get_recommendations_for_new_user(user_id)

# Convert the recommendations dictionary to a pandas dataframe
recommendations_df = pd.DataFrame(recommendations)
recommendations_df = recommendations_df.T
recommendations_df = recommendations_df.round(2)

# Fill NaN values and 0 values with 0
recommendations_df = recommendations_df.fillna(0).replace(0, 0)

display(recommendations_df)

Unnamed: 0,0,44,316,60,215,22,311,286,82,140,...,178,139,99,98,97,71,55,26,146,262
1,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2,0.0,0.31,0.79,0.41,0.51,0.07,0.57,0.19,1.11,0.41,...,0.07,0.09,0.07,0.07,0.16,0.06,0.04,0.09,0.09,0.09
3,0.0,0.22,0.66,0.33,0.57,0.08,0.57,0.22,1.16,0.45,...,0.06,0.07,0.06,0.06,0.13,0.04,0.03,0.07,0.07,0.07
4,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
5,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,0.0,0.32,0.79,0.43,0.53,0.07,0.45,0.20,1.13,0.44,...,0.06,0.08,0.06,0.06,0.14,0.05,0.03,0.08,0.07,0.07
65,0.0,0.32,0.75,0.46,0.48,0.04,0.44,0.18,1.07,0.49,...,0.04,0.05,0.04,0.04,0.11,0.03,0.02,0.05,0.07,0.07
66,0.0,0.31,0.68,0.37,0.58,0.12,0.62,0.23,1.07,0.22,...,0.00,0.00,0.00,0.00,0.09,0.00,0.00,0.00,0.09,0.09
67,0.0,0.25,0.79,0.41,0.51,0.07,0.57,0.19,1.11,0.41,...,0.07,0.09,0.07,0.07,0.07,0.06,0.04,0.09,0.00,0.00


In [16]:
# Hitung MAE
absolute_errors = []
for user_id in unique_user_ids:
    user_actual_ratings = dt_profiles_rating_df.loc[dt_profiles_rating_df['user_id'] == user_id, ['product_id', 'rating']]
    user_actual_ratings = user_actual_ratings.dropna(subset=['product_id', 'rating'])  # Hapus baris dengan data kosong
    user_actual_ratings = user_actual_ratings.set_index('product_id')['rating'].to_dict()

    user_predicted_ratings = recommendations_df.loc[user_id].to_dict()

    for product_id in user_actual_ratings.keys():
        if product_id in user_predicted_ratings:
            actual_rating = user_actual_ratings[product_id]
            predicted_rating = user_predicted_ratings[product_id]
            absolute_error = abs(actual_rating - predicted_rating)
            absolute_errors.append(absolute_error)

# Hitung MAE hanya jika ada nilai dalam absolute_errors
if absolute_errors:
    mae = np.mean(absolute_errors)
    print(f"Mean Absolute Error (MAE): {mae:.3f}")
else:
    print("Tidak ada data untuk menghitung MAE.")

Mean Absolute Error (MAE): 4.087


In [18]:
# Iterate over each user ID
for user_id in unique_user_ids:
    # Look up the user's name in the data_df dataframe
    user_name = dt_profiles_rating_df.loc[dt_profiles_rating_df['user_id'] == user_id, 'user_name'].values[0]

    # Select the row for the specified user
    user_recs = recommendations_df.loc[user_id]

    # Sort the series in descending order and select the top 10 values
    top_recs = user_recs.sort_values(ascending=False).iloc[:10].index.tolist()

    # Print the recommended product IDs and names
    print("Top 10 recommended products for user_id = {} : ({})".format(user_id, user_name))
    print(f"Product IDs: {top_recs}")
    print()

Top 10 recommended products for user_id = 1 : (Operator)
Product IDs: [0, 164, 15, 37, 25, 83, 121, 156, 159, 66]

Top 10 recommended products for user_id = 2 : (tester1)
Product IDs: [82, 58, 316, 311, 168, 215, 42, 68, 62, 135]

Top 10 recommended products for user_id = 3 : (Elisa Regina Simanjuntak)
Product IDs: [82, 58, 316, 215, 311, 68, 42, 168, 135, 140]

Top 10 recommended products for user_id = 4 : (Hamada)
Product IDs: [0, 164, 15, 37, 25, 83, 121, 156, 159, 66]

Top 10 recommended products for user_id = 5 : (Elisa Regina Simanjuntak)
Product IDs: [0, 164, 15, 37, 25, 83, 121, 156, 159, 66]

Top 10 recommended products for user_id = 6 : (haruto)
Product IDs: [0, 164, 15, 37, 25, 83, 121, 156, 159, 66]

Top 10 recommended products for user_id = 7 : (Dini Sipahutar)
Product IDs: [82, 58, 311, 316, 215, 168, 42, 135, 57, 84]

Top 10 recommended products for user_id = 8 : (Gladys)
Product IDs: [82, 58, 316, 62, 42, 168, 68, 215, 311, 135]

Top 10 recommended products for user_id 