# Chalchitra Talks Book Recommendations

## Library/Package Import

In [1]:
import numpy as np 
import pandas as pd 
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/chalchitra-talks-book-recommendations/guests.csv
/kaggle/input/chalchitra-talks-book-recommendations/books.csv
/kaggle/input/chalchitra-talks-book-recommendations/recommendations.csv


## Business Understanding

**Tujuan**

Membuat sistem yang dapat merekomendasikan buku kepada pengguna (dalam konteks ini, "pengguna" bisa jadi adalah tamu lain, atau audiens Chalchitra Talks yang mencari rekomendasi).
Menyediakan dua jenis rekomendasi:
- **Content-Based Filtering**: Merekomendasikan buku berdasarkan kemiripan konten buku (deskripsi, genre, penulis) dengan buku yang disukai atau direkomendasikan sebelumnya.
- **Collaborative Filtering**: Merekomendasikan buku berdasarkan pola rekomendasi dari para tamu (misalnya, jika Tamu A dan Tamu B menyukai buku yang sama, buku lain yang disukai Tamu A mungkin juga disukai Tamu B).

**Target Keberhasilan**

Kemampuan sistem menghasilkan rekomendasi yang relevan.
Peningkatan engagement (misalnya, jika ini platform online, jumlah klik pada buku yang direkomendasikan).
Untuk evaluasi offline, kita bisa menggunakan metrik seperti presisi, recall, atau RMSE jika kita memodelkannya sebagai prediksi rating.

## Data Understanding

### Data Loading

In [2]:
# import os
# print(f"Current Working Directory: {os.getcwd()}")

In [3]:
try:
    guests_df = pd.read_csv('/kaggle/input/chalchitra-talks-book-recommendations/guests.csv')
    books_df = pd.read_csv('/kaggle/input/chalchitra-talks-book-recommendations/books.csv')
    recommendations_df = pd.read_csv('/kaggle/input/chalchitra-talks-book-recommendations/recommendations.csv')
    print("File berhasil dimuat!")
except FileNotFoundError as e:
    print(f"Error: File tidak ditemukan. {e}")
    print("Periksa kembali nama dataset di Kaggle dan pastikan path sudah benar.")
    print("Path yang diharapkan: /kaggle/input/NAMA-DATASET-ANDA-DI-KAGGLE/nama_file.csv")

File berhasil dimuat!


### Data Inspection

#### Guest Data

In [4]:
if not guests_df.empty:
    print(guests_df.head())
    print("\nInfo:")
    guests_df.info()
    print("\nDescriptive Statistics:")
    print(guests_df.describe(include='all'))
    print("\nUnique Values:")
    print(guests_df.nunique())
    print("\Missing values:")
    print(guests_df.isnull().sum())
else:
    print("guests_df kosong atau tidak dapat dimuat.")

   guest_id         guest_name guest_profession_or_org  \
0         0  Ratna Pathak Shah                   Actor   
1         1        Peter Gould               Filmmaker   
2         2      Ashish Shakya                Comedian   
3         3    Gulshan Devaiah                   Actor   
4         4      Nikhil Taneja                   Yuvaa   

                                          guest_info  \
0  Ratna Pathak Shah is an actress and director b...   
1  Peter Gould is an American television writer, ...   
2  Ashish is an Indian comedian, writer, actor, p...   
3  Gulshan Devaiah is an Indian actor who appears...   
4  Nikhil Taneja is a Mumbai-based entrepreneur, ...   

                                    guest_page_url  
0  https://www.chalchitratalks.com/ratnapathakshah  
1       https://www.chalchitratalks.com/petergould  
2     https://www.chalchitratalks.com/ashishshakya  
3   https://www.chalchitratalks.com/gulshandevaiah  
4     https://www.chalchitratalks.com/nikhiltanej

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


#### Books Data

In [5]:
if not books_df.empty:
    print(books_df.head())
    print("\nInfo:")
    books_df.info()
    print("\nDeskripsi Statistik:")
    print(books_df.describe(include='all'))
    print("\nJumlah nilai unik per kolom:")
    print(books_df.nunique())
    print("\nNilai yang hilang (missing values):")
    print(books_df.isnull().sum())
else:
    print("books_df kosong atau tidak dapat dimuat.")

   book_id                            book_name  \
0        0  Charles Laughton: A Difficult Actor   
1        1                   Gone with the Wind   
2        2                             Hayvadan   
3        3               The London Jungle Book   
4        4           The Private Life of Plants   

                    book_description_from_chalchitra  \
0  This is the fullest biography yet on the actor...   
1           A historical novel by Margaret Mitchell.   
2  It is based on Kathasaritsagara and Thomas Man...   
3                A visual travelogue by Bhajju Shyam   
4  This book reveals hidden events and phenomena ...   

                                             authors               publisher  \
0                                   ['Simon Callow']     Fromm International   
1                              ['Margaret Mitchell']            Random House   
2                                  ['Girish Karnad']  Radhakrishna Prakashan   
3  ['Bhajju Shyam', 'Sirish Rao', 'G

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


#### Guest-Books Data

In [6]:
if not recommendations_df.empty:
    print(recommendations_df.head())
    print("\nInfo:")
    recommendations_df.info()
    print("\nJumlah rekomendasi unik:")
    print(len(recommendations_df))
    print("\nJumlah tamu unik yang memberi rekomendasi:")
    print(recommendations_df['guest_id'].nunique())
    print("\nJumlah buku unik yang direkomendasikan:")
    print(recommendations_df['book_id'].nunique())
    print("\nNilai yang hilang (missing values):")
    print(recommendations_df.isnull().sum())
else:
    print("recommendations_df kosong atau tidak dapat dimuat.")

   book_id  guest_id
0        0         0
1        1         0
2        2         0
3        3         0
4        4         0

Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 335 entries, 0 to 334
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   book_id   335 non-null    int64
 1   guest_id  335 non-null    int64
dtypes: int64(2)
memory usage: 5.4 KB

Jumlah rekomendasi unik:
335

Jumlah tamu unik yang memberi rekomendasi:
28

Jumlah buku unik yang direkomendasikan:
319

Nilai yang hilang (missing values):
book_id     0
guest_id    0
dtype: int64


In [7]:
if not recommendations_df.empty:
    recommendations_per_guest = recommendations_df.groupby('guest_id').size()
    print("\nDistribusi jumlah rekomendasi per tamu:")
    print(recommendations_per_guest.describe())


Distribusi jumlah rekomendasi per tamu:
count    28.000000
mean     11.964286
std       8.582852
min       2.000000
25%       6.000000
50%       8.500000
75%      15.250000
max      32.000000
dtype: float64


In [8]:
if not books_df.empty and 'categories' in books_df.columns:
    print("\nContoh kategori buku (perlu parsing jika formatnya kompleks):")
    print(books_df['categories'].value_counts().head(10))


Contoh kategori buku (perlu parsing jika formatnya kompleks):
categories
['Fiction']                      101
['Biography & Autobiography']     24
['Business & Economics']          22
['History']                       12
['Juvenile Fiction']              10
['Comics & Graphic Novels']        8
['Humor']                          8
['Science']                        8
['Performing Arts']                5
['Young Adult Fiction']            5
Name: count, dtype: int64


## Data Preparation

## Missing Values

In [9]:
if not books_df.empty:
    books_df['book_description_from_chalchitra'] = books_df['book_description_from_chalchitra'].fillna('')
    # For other text columns in books_df (assuming they exist based on previous context)
    books_df['description'] = books_df['description'].fillna('')
    books_df['synopsis'] = books_df['synopsis'].fillna('')
    books_df['categories'] = books_df['categories'].fillna('Unknown')
    books_df['authors'] = books_df['authors'].fillna('Unknown')
    books_df['publisher'] = books_df['publisher'].fillna('Unknown')

    if 'averageRating' in books_df.columns:
        median_avg_rating = books_df['averageRating'].median() # Calculate median once
        books_df['averageRating'] = books_df['averageRating'].fillna(median_avg_rating)
    if 'ratingsCount' in books_df.columns:
        books_df['ratingsCount'] = books_df['ratingsCount'].fillna(0) # Atau median
    if 'pageCount' in books_df.columns:
        median_page_count = books_df['pageCount'].median() # Calculate median once
        books_df['pageCount'] = books_df['pageCount'].fillna(median_page_count)

if not guests_df.empty:
    guests_df['guest_profession_or_org'] = guests_df['guest_profession_or_org'].fillna('Unknown')
    guests_df['guest_info'] = guests_df['guest_info'].fillna('')

if not recommendations_df.empty:
    if recommendations_df.isnull().values.any():
        print("Menghapus baris dengan NaN di recommendations_df...")
        recommendations_df.dropna(inplace=True)
        
print("Fillna applied using for missing values.")

Fillna applied using for missing values.


#### Feature Engineering

In [10]:
if not books_df.empty:
    cols_for_content = {
        'book_name': 'text',
        'book_description_from_chalchitra': 'text',
        'authors': 'list_like_text', 
        'categories': 'list_like_text',
        'description': 'text',
        'synopsis': 'text',
        'publisher': 'text', 
        'language': 'text'  
    }

    books_df['content_features'] = ''

    for col, col_type in cols_for_content.items():
        if col in books_df.columns:
            # Isi NaN dengan string kosong untuk kolom teks atau 'Unknown' untuk kategori jika perlu
            # Untuk kolom yang akan diproses, pastikan semuanya string dulu
            series_to_process = books_df[col].fillna('').astype(str)

            if col_type == 'list_like_text':
                # Membersihkan karakter list dan tanda kutip dari string
                # Ini adalah asumsi umum, mungkin perlu disesuaikan jika format list berbeda
                # Contoh: "['Author A', 'Author B']" -> "Author A Author B"
                # Contoh: "['Fiction']" -> "Fiction"
                processed_series = series_to_process.str.replace(r"[\[\]',]", '', regex=True)
                # Hapus spasi berlebih yang mungkin muncul setelah replace
                processed_series = processed_series.str.replace(r'\s+', ' ', regex=True).str.strip()
                books_df['content_features'] += processed_series + ' '
            else: # col_type == 'text'
                books_df['content_features'] += series_to_process + ' '
        else:
            print(f"Peringatan: Kolom '{col}' tidak ditemukan di books_df untuk feature engineering.")

    # Hapus spasi berlebih di awal/akhir dan ubah ke lowercase
    books_df['content_features'] = books_df['content_features'].str.strip().str.lower()
    # Hapus spasi ganda yang mungkin terbentuk
    books_df['content_features'] = books_df['content_features'].str.replace(r'\s+', ' ', regex=True)

    print("\nContoh content_features setelah perbaikan:")
    if 'content_features' in books_df.columns and not books_df.empty:
        print(books_df[['book_name', 'content_features']].head())
    else:
        print("Kolom 'content_features' tidak dapat dibuat atau books_df kosong.")


Contoh content_features setelah perbaikan:
                             book_name  \
0  Charles Laughton: A Difficult Actor   
1                   Gone with the Wind   
2                             Hayvadan   
3               The London Jungle Book   
4           The Private Life of Plants   

                                    content_features  
0  charles laughton: a difficult actor this is th...  
1  gone with the wind a historical novel by marga...  
2  hayvadan it is based on kathasaritsagara and t...  
3  the london jungle book a visual travelogue by ...  
4  the private life of plants this book reveals h...  


In [11]:
books_df

Unnamed: 0,book_id,book_name,book_description_from_chalchitra,authors,publisher,publishedDate,pageCount,categories,averageRating,ratingsCount,language,description,synopsis,content_features
0,0,Charles Laughton: A Difficult Actor,This is the fullest biography yet on the actor...,['Simon Callow'],Fromm International,1997,318.0,['Biography & Autobiography'],4.0,0.0,en,,,charles laughton: a difficult actor this is th...
1,1,Gone with the Wind,A historical novel by Margaret Mitchell.,['Margaret Mitchell'],Random House,2020-01-02,1072.0,['Fiction'],4.5,5.0,en,"'My dear, I don't give a damn.' Margaret Mitch...","'My dear, I don't give a damn.'\n\nMargaret Mi...",gone with the wind a historical novel by marga...
2,2,Hayvadan,It is based on Kathasaritsagara and Thomas Man...,['Girish Karnad'],Radhakrishna Prakashan,2021-07-29,132.0,Unknown,4.0,0.0,hi,,,hayvadan it is based on kathasaritsagara and t...
3,3,The London Jungle Book,A visual travelogue by Bhajju Shyam,"['Bhajju Shyam', 'Sirish Rao', 'Gita Wolf-Samp...",Tara Publishing,2004,48.0,['Art'],5.0,1.0,en,"Bhajju Shyam, of the Gond tribe of central Ind...",“A beautiful book. I would like to give it to ...,the london jungle book a visual travelogue by ...
4,4,The Private Life of Plants,This book reveals hidden events and phenomena ...,['David Attenborough'],Unknown,1995,320.0,['Botany'],4.0,3.0,en,"Without plants, there would be no food, no ani...","Without plants, there would be no food, no ani...",the private life of plants this book reveals h...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314,314,Anathem,It is a science fiction novel by American writ...,['Neal Stephenson'],Harper Collins,2009-08-25,1008.0,['Fiction'],4.0,175.0,en,"For ten years Fraa Erasmas, a young avout, has...","For ten years Fraa Erasmas, a young avout, ha...",anathem it is a science fiction novel by ameri...
315,315,Cloud Atlas,"It is a work combining metafiction, historical...",['David Mitchell'],Vintage Canada,2010-07-16,528.0,['Fiction'],4.0,508.0,en,By the New York Times bestselling author of Th...,By the New York Times bestselling author of Th...,cloud atlas it is a work combining metafiction...
316,316,Watchmen,It is an American comic book maxiseries by Ala...,"['Alan Moore', 'Dave Gibbons', 'John Higgins']",Dc Comics,1987,413.0,['Comics & Graphic Novels'],4.0,3898.0,en,As former members of a disbanded group of supe...,"""Following two generations of masked superhero...",watchmen it is an american comic book maxiseri...
317,317,Matterhorn,An incredible publishing story written over th...,['Karl Marlantes'],"Grove/Atlantic, Inc.",2010-04-01,592.0,['Fiction'],4.5,93.0,en,"Intense, powerful, and compelling, Matterhorn ...","Intense, powerful, and compelling, Matterhorn ...",matterhorn an incredible publishing story writ...


#### Add Implicit Rating

In [12]:
if not recommendations_df.empty:
    recommendations_df['rating'] = 1

recommendations_df

Unnamed: 0,book_id,guest_id,rating
0,0,0,1
1,1,0,1
2,2,0,1
3,3,0,1
4,4,0,1
...,...,...,...
330,91,27,1
331,315,27,1
332,316,27,1
333,317,27,1


## Modelling

### Content-Based Filtering

#### TF-IDF

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim_books = None
book_indices = None

if not books_df.empty and 'content_features' in books_df.columns and not books_df['content_features'].dropna().empty:
    print("Membangun model Content-Based Filtering...")
    try:
        tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_df=0.85, min_df=2)
        tfidf_matrix = tfidf_vectorizer.fit_transform(books_df['content_features'].dropna())

        if tfidf_matrix.shape[0] > 0:
            cosine_sim_books = cosine_similarity(tfidf_matrix, tfidf_matrix)
            books_df_for_indices = books_df.dropna(subset=['book_name', 'content_features']).reset_index(drop=True)
            if not books_df_for_indices.empty:
                 book_indices = pd.Series(books_df_for_indices.index, index=books_df_for_indices['book_name']).drop_duplicates()
            else:
                print("Tidak ada buku yang valid untuk membuat book_indices pada Content-Based.")
        else:
            print("Matriks TF-IDF kosong, tidak bisa menghitung cosine similarity.")
            
    except Exception as e:
        print(f"Error saat membangun model Content-Based: {e}")
else:
    print("Skipping Content-Based Filtering: books_df tidak siap atau kolom 'content_features' kosong/NaN semua.")

Membangun model Content-Based Filtering...


#### Recommendation Method

In [14]:
def get_content_based_recommendations(book_title, num_recommendations=5):
    """Memberikan rekomendasi buku berdasarkan kemiripan konten."""
    if cosine_sim_books is None or book_indices is None or book_indices.empty:
        return "Model Content-Based belum siap atau tidak ada data."
    if book_title not in book_indices:
        # Coba cari judul yang mirip jika ada typo kecil (opsional, bisa kompleks)
        # from difflib import get_close_matches
        # close_matches = get_close_matches(book_title, book_indices.index)
        # if close_matches:
        #     return f"Buku '{book_title}' tidak ditemukan. Mungkin maksud Anda: {close_matches[0]}?"
        return f"Buku '{book_title}' tidak ditemukan dalam indeks buku."

    idx = book_indices[book_title]
    
    # Pastikan idx valid untuk cosine_sim_books
    if idx >= cosine_sim_books.shape[0]:
        return f"Indeks buku '{book_title}' di luar jangkauan matriks similaritas."

    sim_scores = list(enumerate(cosine_sim_books[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:num_recommendations+1]
    book_rec_indices = [i[0] for i in sim_scores]

    # Menggunakan DataFrame yang digunakan untuk membuat book_indices
    # (books_df_for_indices dari blok training model CB)
    # Jika book_indices dibuat dari books_df asli, maka gunakan books_df.iloc
    # Ini ASUMSI book_indices dibuat dari books_df_for_indices yang sudah di-reset indexnya
    if 'books_df_for_indices' in globals() and not books_df_for_indices.empty:
         recommended_books = books_df_for_indices['book_name'].iloc[book_rec_indices]
    elif not books_df.empty: # Fallback jika books_df_for_indices tidak terdefinisi dengan baik
        # Ini mungkin tidak akurat jika ada dropna pada content_features
        print("Peringatan: Menggunakan books_df.iloc untuk rekomendasi CB, indeks mungkin tidak sinkron jika ada NaN di content_features.")
        # Pastikan book_rec_indices valid untuk books_df
        valid_rec_indices = [i for i in book_rec_indices if i < len(books_df)]
        recommended_books = books_df['book_name'].iloc[valid_rec_indices]
    else:
        return "DataFrame buku tidak tersedia untuk mengambil nama buku."

    return recommended_books

#### Example

In [15]:
# Contoh penggunaan Content-Based Recommendation
if not books_df.empty and 'book_name' in books_df.columns and len(books_df['book_name'].dropna()) > 1 and book_indices is not None and not book_indices.empty:
    try:
        sample_book_title_cb = book_indices.index.to_series().sample(1).iloc[0]
        print(f"\n--- Rekomendasi Content-Based untuk '{sample_book_title_cb}' ---")
        recommendations_cb = get_content_based_recommendations(sample_book_title_cb, num_recommendations=3)
        if isinstance(recommendations_cb, str):
             print(recommendations_cb)
        elif not recommendations_cb.empty:
            print(recommendations_cb)
        else:
            print("Tidak ada rekomendasi content-based yang dapat dibuat.")
    except Exception as e:
        print(f"Error saat mengambil contoh rekomendasi Content-Based: {e}")
else:
    print("Skipping contoh Content-Based: 'book_name' column missing, tidak cukup buku, atau model belum siap.")



--- Rekomendasi Content-Based untuk 'The Godfather' ---
312         2001 Series
112    American Kingpin
16          Bleak House
Name: book_name, dtype: object


### Collaborative Filtering

CF > Model-Based > Matrix Factorization

#### Single Value Decomposition (SVD)

In [16]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import cross_validate

SURPRISE_AVAILABLE = True
algo_svd = None

if SURPRISE_AVAILABLE and not recommendations_df.empty and \
   all(col in recommendations_df.columns for col in ['guest_id', 'book_id', 'rating']) and \
   len(recommendations_df) > 1: 
    print("\nMembangun model Collaborative Filtering (SVD)...")
    try:
        reader = Reader(rating_scale=(0, 1))
        data_cf = Dataset.load_from_df(recommendations_df[['guest_id', 'book_id', 'rating']], reader)
        trainset_full = data_cf.build_full_trainset()

        algo_svd = SVD(n_factors=50, n_epochs=20, random_state=42, verbose=False)
        algo_svd.fit(trainset_full)
        print("Model SVD berhasil dilatih.")
    except Exception as e:
        print(f"Error saat membangun model SVD: {e}")
        algo_svd = None 
else:
    if SURPRISE_AVAILABLE:
        print("Skipping Collaborative Filtering: recommendations_df tidak siap, kosong, atau kurang dari 2 interaksi.")


Membangun model Collaborative Filtering (SVD)...
Model SVD berhasil dilatih.


#### Recommendation Method

In [17]:
def get_collaborative_filtering_recommendations(guest_id_input, algo, n_recs=5):
    """Memberikan rekomendasi buku untuk seorang tamu menggunakan model CF."""
    if algo is None:
        return "Model Collaborative Filtering belum siap."
    if not 'book_id' in recommendations_df or not 'guest_id' in recommendations_df:
        return "Kolom 'book_id' atau 'guest_id' tidak ada di recommendations_df."

    try:
        # Konversi guest_id_input ke tipe data yang sama dengan di trainset (biasanya string atau int)
        # algo.trainset.to_inner_uid(guest_id_input) # Ini akan error jika guest_id tidak ada
        # Cek dulu apakah guest_id ada di trainset
        try:
            _ = algo.trainset.to_inner_uid(guest_id_input)
        except ValueError:
            return f"Guest ID '{guest_id_input}' tidak ditemukan dalam training set model CF."

        all_book_ids_in_model = [algo.trainset.to_raw_iid(inner_id) for inner_id in algo.trainset.all_items()]
        guest_recommended_books_raw_ids = recommendations_df[recommendations_df['guest_id'] == guest_id_input]['book_id'].unique()

        books_to_predict = np.setdiff1d(all_book_ids_in_model, guest_recommended_books_raw_ids)

        if not books_to_predict.any():
            return "Tamu ini sudah merekomendasikan semua buku yang ada di model, atau tidak ada buku baru untuk direkomendasikan."

        test_data_for_guest = [(guest_id_input, book_id, 0) for book_id in books_to_predict]
        if not test_data_for_guest: # Jika tidak ada buku lagi untuk diprediksi setelah filtering
            return "Tidak ada buku lagi yang dapat diprediksi untuk tamu ini."

        predictions = algo.test(test_data_for_guest)
        predictions.sort(key=lambda x: x.est, reverse=True)

        recommended_book_ids = [pred.iid for pred in predictions[:n_recs]]

        if not books_df.empty and 'book_id' in books_df.columns and 'book_name' in books_df.columns:
            recommended_book_names = books_df[books_df['book_id'].isin(recommended_book_ids)][['book_id', 'book_name']]
            # Urutkan berdasarkan urutan di recommended_book_ids
            recommended_book_names = recommended_book_names.set_index('book_id').loc[recommended_book_ids].reset_index()['book_name']
            return recommended_book_names
        else:
            return pd.Series(recommended_book_ids, name="book_id") # Kembalikan ID jika books_df tidak tersedia
    except Exception as e:
        return f"Error saat mendapatkan rekomendasi CF: {e}"

#### Example

In [18]:
if algo_svd is not None and not guests_df.empty and 'guest_id' in guests_df.columns and \
   not recommendations_df.empty and 'guest_id' in recommendations_df.columns and \
   len(recommendations_df['guest_id'].unique()) > 0:
    try:
        # Dapatkan semua inner user IDs dari trainset
        all_inner_user_ids = list(algo_svd.trainset.all_users())

        if all_inner_user_ids:
            # Konversi inner user IDs kembali ke raw user IDs
            available_guest_ids_in_model = [algo_svd.trainset.to_raw_uid(inner_id) for inner_id in all_inner_user_ids]

            if available_guest_ids_in_model:
                sample_guest_id_cf = np.random.choice(available_guest_ids_in_model)
                print(f"\n--- Rekomendasi Collaborative Filtering (SVD) untuk Guest ID '{sample_guest_id_cf}' ---")
                recommendations_svd = get_collaborative_filtering_recommendations(sample_guest_id_cf, algo_svd, n_recs=3)
                if isinstance(recommendations_svd, str):
                    print(recommendations_svd)
                elif recommendations_svd is not None and not recommendations_svd.empty: # Tambahkan pengecekan None
                    print(recommendations_svd)
                else:
                    print("Tidak ada rekomendasi collaborative filtering yang dapat dibuat (kemungkinan hasil kosong atau None).")
            else:
                print("Tidak ada guest ID mentah yang valid di model CF setelah konversi.")
        else:
            print("Tidak ada user ID (inner) di model CF untuk dijadikan sampel.")
    except Exception as e:
        print(f"Error saat mengambil contoh rekomendasi Collaborative Filtering: {e}")
else:
    print("Skipping contoh Collaborative Filtering: model SVD belum siap atau data tidak cukup.")


--- Rekomendasi Collaborative Filtering (SVD) untuk Guest ID '17' ---
0    Charles Laughton: A Difficult Actor
1                     Gone with the Wind
2             The Private Life of Plants
Name: book_name, dtype: object


## Evaluation

In [19]:
if SURPRISE_AVAILABLE and algo_svd is not None and not recommendations_df.empty and \
   all(col in recommendations_df.columns for col in ['guest_id', 'book_id', 'rating']) and \
   len(recommendations_df) > 1:
    try:
        reader_eval = Reader(rating_scale=(0, 1))
        data_cf_eval = Dataset.load_from_df(recommendations_df[['guest_id', 'book_id', 'rating']], reader_eval)
        algo_svd_eval = SVD(n_factors=50, n_epochs=20, random_state=42, verbose=False)
        cv_results = cross_validate(algo_svd_eval, data_cf_eval, measures=['RMSE', 'MAE'], cv=3, verbose=False)
        print("\nHasil Cross-Validation SVD:")
        print(f"Rata-rata RMSE: {cv_results['test_rmse'].mean():.4f}")
        print(f"Rata-rata MAE: {cv_results['test_mae'].mean():.4f}")
    except Exception as e:
        print(f"Error saat melakukan cross-validation: {e}")
else:
    if SURPRISE_AVAILABLE:
        print("Skipping Evaluasi CF: model SVD belum siap atau data tidak cukup.")


Hasil Cross-Validation SVD:
Rata-rata RMSE: 0.0126
Rata-rata MAE: 0.0056
