### Importing libs and reqired modules

In [1]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import json
import time

# Importing our models

from src.models.naive_bayes_model import NaiveBayesRecommender
from src.models.knn_model import KNNRecommender
from src.models.logistic_regression_model import LogisticRegressionRecommender


# Setting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

### Loading Preprocessed Data

In [2]:
print("Loading processed data...")

train = pd.read_csv('../data/processed/processed_train.csv')
val = pd.read_csv('../data/processed/processed_val.csv')
test = pd.read_csv('../data/processed/processed_test.csv')

# Loaingd category mapping
with open('../data/processed/category_mapping.json', 'r') as f:
    category_mapping = json.load(f)

print(f"Training set: {len(train)} books")
print(f"Validation set: {len(val)} books")
print(f"Test set: {len(test)} books")
print(f"\nCategories: {list(category_mapping.keys())}")

Loading processed data...
Training set: 663 books
Validation set: 95 books
Test set: 190 books

Categories: ['Arts', 'Biography', 'Business', 'Fiction', 'General', 'History', 'Science', 'Technology', 'Young Adult']


### Prepare Features and Labels

In [3]:
# Features (X) and labels (y)
X_train = train['combined_text']
y_train = train['category_encoded']

X_val = val['combined_text']
y_val = val['category_encoded']

X_test = test['combined_text']
y_test = test['category_encoded']

print("Features prepared!")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")

Features prepared!
X_train shape: (663,)
y_train shape: (663,)


### Train Naive Bayes Model

In [4]:
print("="*60)
print("TRAINING NAIVE BAYES MODEL")
print("="*60)

start_time = time.time()

nb_model = NaiveBayesRecommender(model_type='multinomial')
nb_model.train(X_train, y_train)

nb_train_time = time.time() - start_time
print(f"\nTraining time: {nb_train_time:.2f} seconds")

# Evaluating on validation set
print("\n--- Validation Set Performance ---")
nb_val_metrics = nb_model.evaluate(X_val, y_val)

# Evaluating on test set
print("\n--- Test Set Performance ---")
nb_test_metrics = nb_model.evaluate(X_test, y_test)

# Saving model
nb_model.save_model()
print("\n✓ Naive Bayes model saved!")

TRAINING NAIVE BAYES MODEL
Training Naive Bayes model...
Training completed!

Training time: 0.02 seconds

--- Validation Set Performance ---

MULTINOMIAL Naive Bayes Evaluation:
Accuracy: 0.4211
Precision: 0.2971
Recall: 0.4211
F1-Score: 0.3055

--- Test Set Performance ---

MULTINOMIAL Naive Bayes Evaluation:
Accuracy: 0.4316
Precision: 0.2386
Recall: 0.4316
F1-Score: 0.3026
Model saved to /Users/nirdeshsubedi/Documents/Courseworks/book-rec-ai/ai-book-recommendation/trained_models/naive_bayes.pkl

✓ Naive Bayes model saved!


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


### Training Logistic Regression Model

In [5]:
print("\n" + "="*60)
print("TRAINING LOGISTIC REGRESSION MODEL")
print("="*60)

start_time = time.time()

lr_model = LogisticRegressionRecommender(max_iter=1000)
lr_model.train(X_train, y_train)

lr_train_time = time.time() - start_time
print(f"\nTraining time: {lr_train_time:.2f} seconds")

# Evaluating on validation set
print("\n--- Validation Set Performance ---")
lr_val_metrics = lr_model.evaluate(X_val, y_val)

# Evaluating on test set
print("\n--- Test Set Performance ---")
lr_test_metrics = lr_model.evaluate(X_test, y_test)

# Saving model
lr_model.save_model()
print("\n✓ Logistic Regression model saved!")


TRAINING LOGISTIC REGRESSION MODEL
Training Logistic Regression model...




Training completed!

Training time: 1.25 seconds

--- Validation Set Performance ---

Logistic Regression Evaluation:
Accuracy: 0.4421
Precision: 0.3152
Recall: 0.4421
F1-Score: 0.3280

--- Test Set Performance ---

Logistic Regression Evaluation:
Accuracy: 0.4316
Precision: 0.3608
Recall: 0.4316
F1-Score: 0.3418

✓ Logistic Regression model saved!


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


### Training KNN Model

In [6]:
print("\n" + "="*60)
print("TRAINING KNN MODEL")
print("="*60)

start_time = time.time()

knn_model = KNNRecommender(n_neighbors=5)
knn_model.train(X_train, y_train)

knn_train_time = time.time() - start_time
print(f"\nTraining time: {knn_train_time:.2f} seconds")

# Evaluating on validation set
print("\n--- Validation Set Performance ---")
knn_val_metrics = knn_model.evaluate(X_val, y_val)

# Evaluating on test set
print("\n--- Test Set Performance ---")
knn_test_metrics = knn_model.evaluate(X_test, y_test)

# Saving model
knn_model.save_model()
print("\n✓ KNN model saved!")


TRAINING KNN MODEL
Training KNN model with 5 neighbors...
Training completed!

Training time: 0.02 seconds

--- Validation Set Performance ---

KNN Evaluation (k=5):
Accuracy: 0.5158
Precision: 0.5275
Recall: 0.5158
F1-Score: 0.4964

--- Test Set Performance ---

KNN Evaluation (k=5):
Accuracy: 0.4632
Precision: 0.4736
Recall: 0.4632
F1-Score: 0.4372
Model saved to /Users/nirdeshsubedi/Documents/Courseworks/book-rec-ai/ai-book-recommendation/trained_models/knn.pkl

✓ KNN model saved!


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
