Evaluasi Model Algoritma Content-Based Filltering menggunakan MAE

In [4]:
# Import library yang dibutuhkan
import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_error
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from preprocessing import dt_product_df

# Fungsi untuk menghitung prediksi rating
def calculate_predicted_rating(user_id, product_id):
    user_data = dt_product_df[dt_product_df['user_id'] == user_id]
    if product_id not in user_data['id'].values:
        return None

    user_rated_products = set(user_data['id'])
    product_info = dt_product_df[dt_product_df['id'] == product_id].iloc[0]
    group = dt_product_df[(dt_product_df['category_id'] == product_info['category_id']) & 
                          (dt_product_df['subcategory_id'] == product_info['subcategory_id'])]
    tfidf_vectorizer = TfidfVectorizer()
    attributes = group[['skin_type_face', 'hair_issue', 'skin_type_body']].astype(str).apply(lambda x: ' '.join(x), axis=1)
    tfidf_matrix = tfidf_vectorizer.fit_transform(attributes)
    rated_indices = [idx for idx, pid in enumerate(group['id']) if pid in user_rated_products]

    sum_similarities = 0
    weighted_sum = 0
    query = group[group['id'] == product_id]
    if query.empty:
        return None

    query_index = query.index[0]
    if query_index >= tfidf_matrix.shape[0]:
        return None

    query_tfidf = tfidf_matrix[query_index]
    cosine_similarities = cosine_similarity(query_tfidf, tfidf_matrix).flatten()

    for idx in rated_indices:
        similarity = cosine_similarities[idx]
        rating = group.iloc[idx]['rating']
        sum_similarities += similarity
        weighted_sum += similarity * rating

    if sum_similarities > 0:
        return weighted_sum / sum_similarities
    else:
        return None

# Fungsi untuk evaluasi model
def evaluate_model():
    user_ids = dt_product_df['user_id'].unique()

    actual_ratings = []
    predicted_ratings = []

    for user_id in user_ids:
        user_data = dt_product_df[dt_product_df['user_id'] == user_id]
        for product_id in user_data['id'].values:
            actual_rating = user_data[user_data['id'] == product_id]['rating'].values[0]
            predicted_rating = calculate_predicted_rating(user_id, product_id)
            if predicted_rating is not None:
                actual_ratings.append(actual_rating)
                predicted_ratings.append(predicted_rating)

    mae = mean_absolute_error(actual_ratings, predicted_ratings)
    return mae

# Hitung MAE
mae = evaluate_model()
print(f'Mean Absolute Error (MAE): {mae}')

Mean Absolute Error (MAE): 0.6655845259917988


1. Mengambil Nilai Rating Sebenarnya Pengguna untuk Item-Item Terkait:
    - Kode ini mengambil rating sebenarnya dari dt_product_df yang sudah diproses sebelumnya.
    - user_data mengandung semua data produk yang dirating oleh user tertentu.

2. Menghitung Nilai Rating Prediksi Menggunakan Cosine Similarity:
    - Fungsi calculate_predicted_rating bertanggung jawab untuk menghitung rating prediksi.
    - Pertama, kode memeriksa apakah product_id ada dalam user_data yang dirating oleh user.
    - Kode kemudian mengambil informasi produk (product_info) dan mengelompokkan produk berdasarkan category_id dan subcategory_id.
    - Kode menghitung TF-IDF dari atribut yang relevan.
    - Kode menghitung similaritas kosinus antara produk yang sedang dianalisis dengan semua produk lain dalam kelompok yang sama.

3. Menghitung Prediksi Rating:
    - Dalam fungsi calculate_predicted_rating, kode menghitung weighted sum dari rating yang sudah diberikan oleh user, menggunakan similaritas kosinus sebagai bobot.
    - Jika sum_similarities lebih besar dari 0, kode mengembalikan nilai rata-rata berbobot sebagai prediksi rating.

4. Mengumpulkan Rating Prediksi dan Rating Aktual:
    - Dalam fungsi evaluate_model, kode mengiterasi semua user dan produk yang telah dirating oleh user tersebut.
    - Kode mengumpulkan rating aktual dan prediksi dalam list actual_ratings dan predicted_ratings.

5. Menghitung MAE:
Kode menggunakan mean_absolute_error dari sklearn.metrics untuk menghitung MAE antara rating aktual dan prediksi.

Evaluasi Model Algoritma User-Based Filltering menggunakan MAE

In [5]:
from sklearn.metrics import mean_absolute_error
from user_based import user_similarities, dt_profiles_rating_df

def calculate_predicted_rating(user_id, product_id):
    predictions = {}
    similarity_sum = user_similarities.loc[user_id].sum()
    
    if similarity_sum > 0:
        other_user_ratings = dt_profiles_rating_df[dt_profiles_rating_df['product_id'] == product_id]
        rating_sum = 0
        weight_sum = 0
        for other_user_id in other_user_ratings['user_id']:
            if other_user_id != user_id:
                rating = other_user_ratings[other_user_ratings['user_id'] == other_user_id]['rating'].values[0]
                similarity = user_similarities.loc[user_id, other_user_id]
                rating_sum += rating * similarity
                weight_sum += similarity
        if weight_sum > 0:
            return rating_sum / weight_sum

    return None

def evaluate_model():
    user_ids = dt_profiles_rating_df['user_id'].unique()
    actual_ratings = []
    predicted_ratings = []

    for user_id in user_ids:
        user_data = dt_profiles_rating_df[dt_profiles_rating_df['user_id'] == user_id]
        for product_id in user_data['product_id'].values:
            actual_rating = user_data[user_data['product_id'] == product_id]['rating'].values[0]
            predicted_rating = calculate_predicted_rating(user_id, product_id)
            if predicted_rating is not None:
                actual_ratings.append(actual_rating)
                predicted_ratings.append(predicted_rating)

    mae = mean_absolute_error(actual_ratings, predicted_ratings)
    return mae

# Hitung MAE
mae = evaluate_model()
print(f'Mean Absolute Error (MAE): {mae}')

Mean Absolute Error (MAE): 0.6337755885852256


Evaluasi Model Algoritma User-Based Filltering menggunakan RMSE

In [6]:
from sklearn.metrics import mean_squared_error
import numpy as np
from user_based import user_similarities, dt_profiles_rating_df

def calculate_predicted_rating(user_id, product_id):
    predictions = {}
    similarity_sum = user_similarities.loc[user_id].sum()
    
    if similarity_sum > 0:
        other_user_ratings = dt_profiles_rating_df[dt_profiles_rating_df['product_id'] == product_id]
        rating_sum = 0
        weight_sum = 0
        for other_user_id in other_user_ratings['user_id']:
            if other_user_id != user_id:
                rating = other_user_ratings[other_user_ratings['user_id'] == other_user_id]['rating'].values[0]
                similarity = user_similarities.loc[user_id, other_user_id]
                rating_sum += rating * similarity
                weight_sum += similarity
        if weight_sum > 0:
            return rating_sum / weight_sum

    return None

def evaluate_model():
    user_ids = dt_profiles_rating_df['user_id'].unique()
    actual_ratings = []
    predicted_ratings = []

    for user_id in user_ids:
        user_data = dt_profiles_rating_df[dt_profiles_rating_df['user_id'] == user_id]
        for product_id in user_data['product_id'].values:
            actual_rating = user_data[user_data['product_id'] == product_id]['rating'].values[0]
            predicted_rating = calculate_predicted_rating(user_id, product_id)
            if predicted_rating is not None:
                actual_ratings.append(actual_rating)
                predicted_ratings.append(predicted_rating)

    mse = mean_squared_error(actual_ratings, predicted_ratings)
    rmse = np.sqrt(mse)
    return rmse

# Hitung RMSE
rmse = evaluate_model()
print(f'Root Mean Squared Error (RMSE): {rmse}')

Root Mean Squared Error (RMSE): 0.9447141705454563
