## 1.Environment Setup & Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from surprise import KNNBasic
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy
from surprise.model_selection import GridSearchCV
from sklearn.metrics import r2_score, precision_score, recall_score, f1_score
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# Define path
data_path = '../datasets/student_grade.csv'


In [None]:
df = pd.read_csv(data_path, low_memory=False)
df

## 2.Data Loading & Preprocessing

In [None]:
# === 2.1 Load Data ===
df = pd.read_csv(data_path, low_memory=False)

# === 2.2 Transform Data (Wide to Long) ===
id_vars = ['student_id']
df_long = pd.melt(df, id_vars=id_vars, var_name='course', value_name='grade')

# === 2.3 Clean Data ===
# Convert grade to numeric and remove invalid/empty grades
df_long['grade'] = pd.to_numeric(df_long['grade'], errors='coerce')
df_long_cleaned = df_long[(df_long['grade'] > 0.0) & (df_long['grade'].notna())].copy()

# === 2.4 Filter for 'INT' Courses Only ===
# This ensures the model only learns from INT courses
df_long_filtered = df_long_cleaned[df_long_cleaned['course'].astype(str).str.startswith('INT')].copy()

print(f"--- Data Preparation Complete ---")
print(f"Total records after cleaning: {len(df_long_cleaned)}")
print(f"Filtered to INT courses only: {len(df_long_filtered)}")
display(df_long_filtered)


## 3.Split Data to train and test set

In [None]:
# === 3.1 Load Data into Surprise Dataset ===
# Define rating scale (assuming grades are 1.0 to 4.0)
reader = Reader(rating_scale=(1, 4))
data = Dataset.load_from_df(df_long_filtered[['student_id', 'course', 'grade']], reader)

# === 3.2 Split Data ===
trainset, testset = train_test_split(data, test_size=0.30, random_state=42)

## 4.Model Training (KNNBasic)


In [None]:
# === 3.3 Train the Model ===
print("--- Training KNN Model (User - User) ---")
sim_user = {
    "name": "cosine",
    "user_based": True
}

algo_user = KNNBasic(sim_options=sim_user)
algo_user.fit(trainset)
print("Training KNN Model (User - User) complete.")
print("-----------------------------------------------------")
print("--- Training KNN Model (Item - Item) ---")
sim_item = {
    "name": "cosine",
    "user_based": False
}

algo_item = KNNBasic(sim_options=sim_item)
algo_item.fit(trainset)
print("Training KNN Model (Item - Item) complete.")



In [None]:
def recommend_for_user(algo, user_id, n=5):
    items = trainset.all_items()
    items_raw = [trainset.to_raw_iid(i) for i in items]

    predictions = [
        (iid, algo.predict(user_id, iid).est)
        for iid in items_raw
    ]

    predictions.sort(key=lambda x: x[1], reverse=True)
    return predictions[:n]

In [None]:
def similar_items(algo, item_raw_id, k=5):
    inner = algo.trainset.to_inner_iid(item_raw_id)
    neighbors = algo.get_neighbors(inner, k=k)
    raw_ids = [algo.trainset.to_raw_iid(i) for i in neighbors]
    return raw_ids

In [None]:
recommend_for_user(algo_user, "A246", n=10) # ‡πÅ‡∏ô‡∏∞‡∏ô‡∏≥‡∏ß‡∏¥‡∏ä‡∏≤‡πÉ‡∏´‡πâ student_id = A246

In [None]:
similar_items(algo_item, "INT102 WEB TECHNOLOGY") # ‡∏ß‡∏¥‡∏ä‡∏≤‡∏ó‡∏µ‡πà‡∏Ñ‡∏•‡πâ‡∏≤‡∏¢‡∏Å‡∏±‡∏ô

## 5.Test and evaluate model

In [None]:
from surprise import accuracy

# Predict on testset
pred_user = algo_user.test(testset)
pred_item = algo_item.test(testset)

# Evaluate
print("=== USER‚ÄìUSER KNN PERFORMANCE ===")
accuracy.rmse(pred_user)
accuracy.mae(pred_user)

print("\n=== ITEM‚ÄìITEM KNN PERFORMANCE ===")
accuracy.rmse(pred_item)
accuracy.mae(pred_item)

In [None]:
from surprise.model_selection import cross_validate

cross_validate(algo_user, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_classification(predictions, threshold=2):
    y_true = []
    y_pred = []

    for p in predictions:
        true_r = p.r_ui
        pred_r = p.est

        # Convert ratings ‚Üí class
        y_true.append(1 if true_r >= threshold else 0)
        y_pred.append(1 if pred_r >= threshold else 0)

    metrics = {
        "R-score": np.corrcoef(y_true, y_pred)[0, 1],  # Correlation
        "Precision-Macro": precision_score(y_true, y_pred, average='macro'),
        "Recall-Macro": recall_score(y_true, y_pred, average='macro'),
        "F1-Macro": f1_score(y_true, y_pred, average='macro'),
        "Precision-Weighted": precision_score(y_true, y_pred, average='weighted'),
        "Recall-Weighted": recall_score(y_true, y_pred, average='weighted'),
        "F1-Weighted": f1_score(y_true, y_pred, average='weighted')
    }

    return metrics

In [None]:
metrics_user = evaluate_classification(pred_user, threshold=3)
metrics_user

metrics_item = evaluate_classification(pred_item, threshold=3)
metrics_item

In [None]:
from collections import defaultdict

# === 4.1 Predict for Unknown Items ===
print("--- Generating Predictions for all missing pairs ---")
anti_testset = trainset.build_anti_testset()
all_predictions = algo_user.test(anti_testset)

# === 4.2 Helper Function for Top-N ===
def get_top_n(predictions, n=5):
    """Return the top-N recommendation for each user from a set of predictions."""
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

# === 4.3 Generate Top 5 Recommendations ===
top_n_recommendations = get_top_n(all_predictions, n=5)
print(f"Generated recommendations for {len(top_n_recommendations)} students.")

In [None]:
# =========================================================
# === Calculate Precision@K and Recall@K ===================
# =========================================================

from collections import defaultdict

# Build Ground Truth:
actual_courses_all = defaultdict(list)
actual_courses_B = defaultdict(list)

for uid, iid, true_r, est, _ in pred_user:
    actual_courses_all[uid].append(iid)

    if true_r >= 3.0:  # grade >= B
        actual_courses_B[uid].append(iid)


def precision_recall_at_k(top_n, actual_dict, K=5):
    precisions = []
    recalls = []

    for uid, user_recs in top_n.items():
        recommended_items = [iid for iid, _ in user_recs[:K]]
        actual_items = actual_dict.get(uid, [])

        if len(actual_items) == 0:
            continue

        true_positives = len(set(recommended_items) & set(actual_items))

        precision = true_positives / K
        recall = true_positives / len(actual_items)

        precisions.append(precision)
        recalls.append(recall)

    precision_avg = sum(precisions) / len(precisions)
    recall_avg = sum(recalls) / len(recalls)
    return precision_avg, recall_avg


# --------- Calculate results for K = 5 and 10 -----------
K_values = [5, 10]

print("\n====================== Precision & Recall ======================")

for K in K_values:
    precision_all, recall_all = precision_recall_at_k(top_n_recommendations, actual_courses_all, K)
    precision_B, recall_B = precision_recall_at_k(top_n_recommendations, actual_courses_B, K)

    print(f"\n========== K = {K} ==========")
    print("-- Using ALL enrolled courses --")
    print(f"Precision@{K}: {precision_all:.4f}")
    print(f"Recall@{K}: {recall_all:.4f}")

    print("\n-- Using ONLY courses with grade >= B --")
    print(f"Precision@{K}: {precision_B:.4f}")
    print(f"Recall@{K}: {recall_B:.4f}")

print("===================================================================")


In [None]:
from collections import defaultdict, Counter
import matplotlib.pyplot as plt

# === 5.1 Visualization Function ===
def visualize_topk_by_rank(top_n_recommendations, K=5, top_m=10):
    """
    Visualizes the frequency of recommended courses by rank.
    K: Number of recommendations per student.
    top_m: Number of top courses to show in the chart.
    """
    rows = []
    for student_id, recs in top_n_recommendations.items():
        for r, (course, score) in enumerate(recs[:K], start=1):
            rows.append({"rank": r, "course": course})

    if not rows:
        print("No recommendations to visualize.")
        return

    df_viz = pd.DataFrame(rows)
    rank_counters = {r: Counter(df_viz[df_viz["rank"] == r]["course"]) for r in range(1, K+1)}

    for r in range(1, K+1):
        counter = rank_counters[r]
        if not counter: continue

        most_common = counter.most_common(top_m)
        courses = [c for c, _ in most_common]
        counts  = [cnt for _, cnt in most_common]

        plt.figure(figsize=(10, 4))
        plt.bar(courses, counts, color='skyblue')
        plt.title(f"Top {top_m} Courses @ Rank {r}")
        plt.xlabel("Course")
        plt.ylabel("Frequency")
        plt.xticks(rotation=45, ha="right")
        plt.tight_layout()
        plt.show()

# === 5.2 Run Visualization ===
print("--- Visualization of Recommendations ---")
visualize_topk_by_rank(top_n_recommendations, K=5, top_m=10)

# === 5.3 Create Final DataFrame ===
rows = []
for student_id, recs in top_n_recommendations.items():
    for rank, (course, predicted_grade) in enumerate(recs, start=1):
        # Note: Data was filtered for INT in step 2, so this check is just a safeguard
        if isinstance(course, str) and course.startswith("INT"):
            rows.append({
                "student_id": student_id,
                "rank": rank,
                "course": course,
                "predicted_grade": predicted_grade
            })

df_recommendations = pd.DataFrame(rows)

print("\n--- Final Recommendations Preview ---")
display(df_recommendations.head(15))

## 6.Confusion Matrix

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# 1. Define Mapping: Score -> Letter
# ‡∏ï‡πâ‡∏≠‡∏á‡πÄ‡∏£‡∏µ‡∏¢‡∏á‡∏•‡∏≥‡∏î‡∏±‡∏ö‡∏à‡∏≤‡∏Å‡∏ô‡πâ‡∏≠‡∏¢‡πÑ‡∏õ‡∏°‡∏≤‡∏Å ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ Matrix ‡∏™‡∏ß‡∏¢‡∏á‡∏≤‡∏°
score_to_letter = {
    0.0: 'F', 1.0: 'D', 1.5: 'D+', 2.0: 'C', 2.5: 'C+',
    3.0: 'B', 3.5: 'B+', 4.0: 'A'
}

# ‡∏™‡∏£‡πâ‡∏≤‡∏á list ‡∏Ç‡∏≠‡∏á‡πÄ‡∏Å‡∏£‡∏î‡∏ó‡∏µ‡πà‡πÄ‡∏õ‡πá‡∏ô‡πÑ‡∏õ‡πÑ‡∏î‡πâ (‡πÄ‡∏≠‡∏≤‡πÑ‡∏ß‡πâ‡∏£‡∏∞‡∏ö‡∏∏ Labels)
# ‡∏Å‡∏£‡∏≠‡∏á‡πÄ‡∏≠‡∏≤‡πÄ‡∏â‡∏û‡∏≤‡∏∞‡∏ó‡∏µ‡πà‡∏°‡∏µ‡πÉ‡∏ô map (‡πÄ‡∏ú‡∏∑‡πà‡∏≠‡∏Å‡∏£‡∏ì‡∏µ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡πÑ‡∏°‡πà‡∏°‡∏µ F)
valid_scores = sorted(score_to_letter.keys())
valid_labels = [score_to_letter[s] for s in valid_scores]

# 2. Helper function: ‡∏´‡∏≤‡πÄ‡∏Å‡∏£‡∏î‡∏ó‡∏µ‡πà‡πÉ‡∏Å‡∏•‡πâ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î
def get_nearest_grade_key(pred_score):
    # ‡∏´‡∏≤ key (‡∏Ñ‡∏∞‡πÅ‡∏ô‡∏ô) ‡∏ó‡∏µ‡πà‡πÉ‡∏Å‡∏•‡πâ‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î
    return min(valid_scores, key=lambda x: abs(x - pred_score))

# 3. Prepare Data
y_true_letters = []
y_pred_letters = []

for pred in pred_user:
    # 3.1 ‡πÅ‡∏õ‡∏•‡∏á Actual Grade (r_ui) ‡πÄ‡∏õ‡πá‡∏ô Letter
    # ‡πÉ‡∏ä‡πâ get_nearest_grade_key ‡πÄ‡∏ú‡∏∑‡πà‡∏≠‡∏Ñ‡πà‡∏≤ r_ui ‡∏°‡∏µ‡∏ó‡∏®‡∏ô‡∏¥‡∏¢‡∏°‡πÄ‡∏û‡∏µ‡πâ‡∏¢‡∏ô‡πÄ‡∏•‡πá‡∏Å‡∏ô‡πâ‡∏≠‡∏¢
    true_score_key = get_nearest_grade_key(pred.r_ui)
    y_true_letters.append(score_to_letter[true_score_key])

    # 3.2 ‡πÅ‡∏õ‡∏•‡∏á Predicted Grade (est) ‡πÄ‡∏õ‡πá‡∏ô Letter
    pred_score_key = get_nearest_grade_key(pred.est)
    y_pred_letters.append(score_to_letter[pred_score_key])

# 4. Generate Confusion Matrix
# ‡∏™‡∏≥‡∏Ñ‡∏±‡∏ç: ‡∏ï‡πâ‡∏≠‡∏á‡πÉ‡∏™‡πà labels=valid_labels ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏ö‡∏±‡∏á‡∏Ñ‡∏±‡∏ö‡∏•‡∏≥‡∏î‡∏±‡∏ö (D -> A)
cm = confusion_matrix(y_true_letters, y_pred_letters, labels=valid_labels)

# 5. Plot Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=valid_labels,
            yticklabels=valid_labels)

plt.title('Confusion Matrix: Actual vs Predicted Grades')
plt.xlabel('Predicted Grade')
plt.ylabel('Actual Grade')
plt.show()

# 6. Classification Report
print("\n--- Detailed Classification Report ---")
print(classification_report(y_true_letters, y_pred_letters, target_names=valid_labels, zero_division=0,labels=valid_labels))

In [None]:
from sklearn.metrics import r2_score

y_true_raw = [pred.r_ui for pred in pred_user]
y_pred_raw = [pred.est for pred in pred_user]

# 2. ‡∏Ñ‡∏≥‡∏ô‡∏ß‡∏ì R2 Score
r2 = r2_score(y_true_raw, y_pred_raw)

print(f"R2 Score: {r2:.4f}")

In [None]:
from surprise.model_selection import GridSearchCV

# data = Dataset.load_from_df(df_long_filtered[['student_id', 'course', 'grade']], reader)

param_grid = {
    'k': [10, 20, 40],
    'sim_options': {
        'name': ['cosine', 'pearson', 'msd', 'euclidean'],
        'user_based': [True, False]
    }
}

gs = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=5)
gs.fit(data)

print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

In [None]:
from surprise import KNNBasic
from surprise import Dataset
from surprise.model_selection import GridSearchCV

# Parameter ‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ‡πÑ‡∏î‡πâ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö KNNBasic
param_grid = {
    'k': [20, 40, 60],
    'min_k': [1, 5],
    'sim_options': {
        'name': ['cosine', 'pearson'],
        'user_based': [True, False]   # True=User‚ÄìUser, False=Item‚ÄìItem
    }
}

gs = GridSearchCV(
    KNNBasic,
    param_grid,
    measures=['rmse', 'mae'],
    cv=5,
    joblib_verbose=3
)

print("üöÄ Start GridSearchCV (KNN)...")
gs.fit(data)
print("‚úÖ GridSearchCV Finished")

print("\nüéØ Best RMSE:", gs.best_score['rmse'])
print("üèÜ Best parameters:", gs.best_params['rmse'])


In [None]:
param_grid = {
    'n_factors': [110, 120, 130],
    'n_epochs':  [80, 90, 100],
    'reg_pu':    [0.04, 0.06, 0.08], # Regularization ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö user latent factors
    'reg_qi':    [0.04, 0.06, 0.08], # Regularization ‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö item latent factors
    'min_k': [1, 5],
    'sim_options': {
        'name': ['cosine', 'pearson', 'msd'],
        'user_based': [True, False]   # True=User‚ÄìUser, False=Item‚ÄìItem
    }
}

gs = GridSearchCV(
    KNNBasic,
    param_grid,
    measures=['rmse', 'mae'],
    cv=5,    # fold cross-validation
    joblib_verbose=3
)

print("üöÄ Start GridSearchCV (KNN)...")
gs.fit(data)
print("‚úÖ GridSearchCV Finished")

In [None]:
import pandas as pd

# ‡πÅ‡∏õ‡∏•‡∏á‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡∏à‡∏≤‡∏Å Grid Search ‡πÄ‡∏õ‡πá‡∏ô DataFrame
results_df = pd.DataFrame(gs.cv_results)

# ‡πÅ‡∏ï‡∏Å params dict ‡πÉ‡∏´‡πâ‡πÄ‡∏õ‡πá‡∏ô‡∏Ñ‡∏≠‡∏•‡∏±‡∏°‡∏ô‡πå
params_df = results_df['params'].apply(pd.Series)

# ‡∏£‡∏ß‡∏°‡πÄ‡∏Ç‡πâ‡∏≤‡∏Å‡∏±‡∏ö metric ‡∏ó‡∏µ‡πà‡∏™‡∏ô‡πÉ‡∏à
df = pd.concat([
    params_df,
    results_df[['mean_test_rmse', 'mean_test_mae']]
], axis=1)

# ‡πÅ‡∏¢‡∏Å‡∏Ñ‡πà‡∏≤ name ‡πÅ‡∏•‡∏∞ user_based ‡∏≠‡∏≠‡∏Å‡∏à‡∏≤‡∏Å sim_options
df['sim_name'] = df['sim_options'].apply(lambda x: x['name'])
df['user_based'] = df['sim_options'].apply(lambda x: x['user_based'])

print("Best RMSE score:", gs.best_score['rmse'])
print("Best params for RMSE:")
print(gs.best_params['rmse'])

print("\nBest MAE score:", gs.best_score['mae'])
print("Best params for MAE:")
print(gs.best_params['mae'])

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 6))

sns.lineplot(
    data=df,
    x='k',
    y='mean_test_rmse',
    hue='sim_name',
    marker='o',
    palette='viridis'
)

plt.title('Effect of k on RMSE for KNN')
plt.xlabel('Number of Neighbors (k)')
plt.ylabel('RMSE (Lower is Better)')
plt.grid(True, alpha=0.3)
plt.show()


In [None]:
# 1. ‡πÅ‡∏õ‡∏•‡∏á‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î‡∏à‡∏≤‡∏Å Grid Search ‡∏°‡∏≤‡πÄ‡∏õ‡πá‡∏ô DataFrame ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡πÉ‡∏´‡πâ‡∏û‡∏•‡∏≠‡∏ï‡∏Å‡∏£‡∏≤‡∏ü‡∏á‡πà‡∏≤‡∏¢
results_df = pd.DataFrame.from_dict(gs.cv_results)

# -------------------------------------------------------
# Graph 1: Effect of Epochs (Learning Curve)
# ‡∏î‡∏π‡∏ß‡πà‡∏≤ "‡∏à‡∏≥‡∏ô‡∏ß‡∏ô‡∏£‡∏≠‡∏ö‡∏Å‡∏≤‡∏£‡πÄ‡∏£‡∏µ‡∏¢‡∏ô‡∏£‡∏π‡πâ" ‡∏™‡πà‡∏á‡∏ú‡∏•‡∏ï‡πà‡∏≠ Error ‡∏≠‡∏¢‡πà‡∏≤‡∏á‡πÑ‡∏£
# -------------------------------------------------------
plt.figure(figsize=(10, 6))

# ‡πÅ‡∏Å‡∏ô X: ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô Epochs
# ‡πÅ‡∏Å‡∏ô Y: ‡∏Ñ‡πà‡∏≤ Error (RMSE)
# Hue (‡∏™‡∏µ‡πÄ‡∏™‡πâ‡∏ô): ‡πÅ‡∏¢‡∏Å‡∏ï‡∏≤‡∏°‡∏à‡∏≥‡∏ô‡∏ß‡∏ô Factors (‡∏Ñ‡∏ß‡∏≤‡∏°‡∏ã‡∏±‡∏ö‡∏ã‡πâ‡∏≠‡∏ô‡∏Ç‡∏≠‡∏á‡πÇ‡∏°‡πÄ‡∏î‡∏•)
sns.lineplot(data=results_df, x='param_n_epochs', y='mean_test_rmse',
             hue='param_n_factors', marker='o', palette='viridis')

plt.title('Effect of Epochs on RMSE (Learning Curve)')
plt.xlabel('Number of Epochs')
plt.ylabel('RMSE (Lower is Better)')
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
import matplotlib.pyplot as plt

df.plot(x='k', y='mean_test_rmse')
plt.xlabel("k")
plt.ylabel("RMSE")
plt.title("K vs RMSE")
plt.show()


In [None]:
# -------------------------------------------------------
# Graph 2: Heatmap (reg_pu vs reg_qi)
# ‡∏î‡∏π‡∏à‡∏∏‡∏î‡∏ó‡∏µ‡πà RMSE ‡∏ï‡πà‡∏≥‡∏ó‡∏µ‡πà‡∏™‡∏∏‡∏î‡∏à‡∏≤‡∏Å‡∏Ñ‡πà‡∏≤‡∏Ñ‡∏π‡πà‡∏Ç‡∏≠‡∏á regularization
# -------------------------------------------------------
pivot_table = results_df.pivot_table(
    values='mean_test_rmse',
    index='param_reg_pu',    # ‡πÅ‡∏Å‡∏ô‡∏ï‡∏±‡πâ‡∏á: reg_pu
    columns='param_reg_qi'   # ‡πÅ‡∏Å‡∏ô‡∏ô‡∏≠‡∏ô: reg_qi
)

plt.figure(figsize=(8, 6))
sns.heatmap(
    pivot_table,
    annot=True,
    fmt='.4f',
    cmap='Blues_r'  # ‡∏¢‡∏¥‡πà‡∏á‡πÄ‡∏Ç‡πâ‡∏° = RMSE ‡∏¢‡∏¥‡πà‡∏á‡∏ï‡πà‡∏≥ (‡∏î‡∏µ‡∏Å‡∏ß‡πà‡∏≤)
)

plt.title('RMSE Heatmap (KNN)')
plt.xlabel('reg_qi (item regularization)')
plt.ylabel('reg_pu (user regularization)')
plt.show()

In [None]:
# -------------------------------------------------------
# Graph: Heatmap (k vs similarity metric)
# -------------------------------------------------------

# ‡πÅ‡∏õ‡∏•‡∏á‡∏ú‡∏• GridSearch ‡πÉ‡∏´‡πâ‡∏≠‡∏¢‡∏π‡πà‡πÉ‡∏ô‡∏£‡∏π‡∏õ DataFrame
results_df = pd.DataFrame.from_dict(gs.cv_results)

# NOTE:
# param_sim_options ‡πÄ‡∏õ‡πá‡∏ô dict ‡∏î‡∏±‡∏á‡∏ô‡∏±‡πâ‡∏ô‡∏ï‡πâ‡∏≠‡∏á‡πÅ‡∏ï‡∏Å‡∏Ñ‡πà‡∏≤ metric ‡∏≠‡∏≠‡∏Å‡∏°‡∏≤‡∏Å‡πà‡∏≠‡∏ô
results_df['metric'] = results_df['param_sim_options'].apply(lambda x: x['name'])

# ‡∏ó‡∏≥ pivot table
pivot_table = results_df.pivot_table(
    values='mean_test_rmse',
    index='param_k',      # ‡πÅ‡∏Å‡∏ô‡∏ï‡∏±‡πâ‡∏á: k
    columns='metric'      # ‡πÅ‡∏Å‡∏ô‡∏ô‡∏≠‡∏ô: similarity metric
)

plt.figure(figsize=(9, 6))
sns.heatmap(
    pivot_table,
    annot=True,
    fmt='.4f',
    cmap='Blues_r'   # ‡∏Ñ‡πà‡∏≤‡∏™‡∏µ‡πÄ‡∏Ç‡πâ‡∏°‡∏Å‡∏ß‡πà‡∏≤ = ‡∏î‡∏µ‡∏Å‡∏ß‡πà‡∏≤ (RMSE ‡∏ï‡πà‡∏≥‡∏Å‡∏ß‡πà‡∏≤)
)

plt.title('RMSE Heatmap (KNN) ‚Äî k vs Similarity Metric')
plt.xlabel('Similarity Metric')
plt.ylabel('k')
plt.show()


In [None]:
pivot_table = results_df.pivot_table(
    values='mean_test_rmse',
    index='param_k',
    columns='param_min_k'
)

plt.figure(figsize=(9, 6))
sns.heatmap(
    pivot_table,
    annot=True,
    fmt='.4f',
    cmap='Blues_r'
)

plt.title('RMSE Heatmap (KNN) ‚Äî k vs min_k')
plt.xlabel('min_k')
plt.ylabel('k')
plt.show()
