In [None]:
pip install tensorflow-recommenders



In [None]:
pip install requests



In [None]:
pip install pandas



In [None]:
pip install numpy



In [None]:
pip install scikit-learn



In [None]:
pip install scipy



In [None]:
pip install scikit-surprise



In [None]:
pip install matplotlib seaborn



In [None]:
pip install sqlalchemy



In [None]:
pip install jupyter



In [None]:
pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: implicit
Successfully installed implicit-0.7.2


In [None]:
import requests
import json
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from tensorflow.keras import layers
from surprise import Reader, Dataset
from surprise import SVD, model_selection
from surprise import SVD
from surprise.model_selection import cross_validate
import matplotlib.pyplot as plt
import seaborn as sns
from sqlalchemy import create_engine
import sqlite3
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

In [None]:
def fetch_books(query, api_key, max_results=40):
    """Fungsi ini mengirim permintaan ke Google Books API dan menerima respons."""
    url = "https://www.googleapis.com/books/v1/volumes"
    params = {'q': query, 'key': api_key, 'maxResults': max_results}
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Google Books API: {e}")
        return {}

def extract_full_book_info(json_data):
    """Fungsi ini mengekstrak informasi lengkap dari setiap buku dalam respons JSON."""
    books = []
    for item in json_data.get('items', []):
        volume_info = item.get('volumeInfo', {})
        sale_info = item.get('saleInfo', {})
        access_info = item.get('accessInfo', {})
        book = {
            'title': volume_info.get('title'),
            'authors': volume_info.get('authors'),
            'publisher': volume_info.get('publisher'),
            'publishedDate': volume_info.get('publishedDate'),
            'description': volume_info.get('description'),
            'industryIdentifiers': volume_info.get('industryIdentifiers'),
            'pageCount': volume_info.get('pageCount'),
            'categories': volume_info.get('categories'),
            'averageRating': volume_info.get('averageRating'),
            'ratingsCount': volume_info.get('ratingsCount'),
            'maturityRating': volume_info.get('maturityRating'),
            'imageLinks': volume_info.get('imageLinks'),
            'language': volume_info.get('language'),
            'previewLink': volume_info.get('previewLink'),
            'infoLink': volume_info.get('infoLink'),
            'canonicalVolumeLink': volume_info.get('canonicalVolumeLink'),
            'saleability': sale_info.get('saleability'),
            'isEbook': sale_info.get('isEbook'),
            'listPrice': sale_info.get('listPrice'),
            'retailPrice': sale_info.get('retailPrice'),
            'buyLink': sale_info.get('buyLink'),
            'epub': access_info.get('epub', {}).get('isAvailable'),
            'pdf': access_info.get('pdf', {}).get('isAvailable'),
            'webReaderLink': access_info.get('webReaderLink'),
            'accessViewStatus': access_info.get('accessViewStatus'),
            'quoteSharingAllowed': access_info.get('quoteSharingAllowed')
        }
        books.append(book)
    return books

def create_table(conn):
    """ create a table for storing book information """
    try:
        sql_create_books_table = """ CREATE TABLE IF NOT EXISTS books (
                                        id integer PRIMARY KEY,
                                        title text,
                                        authors text,
                                        publisher text,
                                        publishedDate text,
                                        description text,
                                        industryIdentifiers text,
                                        pageCount integer,
                                        categories text,
                                        averageRating real,
                                        ratingsCount integer,
                                        maturityRating text,
                                        imageLinks text,
                                        language text,
                                        previewLink text,
                                        infoLink text,
                                        canonicalVolumeLink text,
                                        saleability text,
                                        isEbook boolean,
                                        listPrice real,
                                        retailPrice real,
                                        buyLink text,
                                        epub boolean,
                                        pdf boolean,
                                        webReaderLink text,
                                        accessViewStatus text,
                                        quoteSharingAllowed boolean
                                    ); """
        c = conn.cursor()
        c.execute(sql_create_books_table)
    except sqlite3.Error as e:
        print(e)

def recreate_table(conn):
    """ Drop the existing table and create a new one """
    try:
        c = conn.cursor()
        c.execute("DROP TABLE IF EXISTS books")
        create_table(conn)
    except sqlite3.Error as e:
        print(e)

# Initialize database and table
database = "books.db"
conn = create_connection(database)
if conn is not None:
    recreate_table(conn)  # Recreate the table to ensure schema is correct
    conn.close()

def main():
    api_key = 'AIzaSyDdWzNgGVN-QhwCcvCPvdqSalsYiBBZKHo'  # Replace with your actual API key
    query = input("Enter a search query: ")
    book_data = fetch_books(query, api_key)
    books = extract_full_book_info(book_data)
    save_books_to_db(books)
    display_books(books)

if __name__ == "__main__":
    main()

Enter a search query: Marketing
Title: Principles of Marketing
Authors: ['Gary Armstrong', 'Stewart Adam', 'Sara Denize', 'Philip Kotler']
Publisher: Pearson Australia
Published Date: 2014-10-01
Description: The 6th edition of Principles of Marketing makes the road to learning and teaching marketing more effective, easier and more enjoyable than ever. Today’s marketing is about creating customer value and building profitable customer relationships. With even more new Australian and international case studies, engaging real-world examples and up-to-date information, Principles of Marketing shows students how customer value–creating and capturing it–drives every effective marketing strategy. The 6th edition is a thorough revision, reflecting the latest trends in marketing, including new coverage of social media, mobile and other digital technologies. In addition, it covers the rapidly changing nature of customer relationships with both companies and brands, and the tools marketers use to

In [None]:
def create_connection(db_file):
    """ create a database connection to a SQLite database """
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except sqlite3.Error as e:
        print(e)
    return conn

def load_books_from_db(database):
    conn = create_connection(database)
    query = "SELECT * FROM books"
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df

def recommend_books(book_title, cosine_sim, df):
    if book_title not in df['title'].values:
        print(f"Book titled '{book_title}' not found in the database.")
        return pd.DataFrame()  # Return an empty DataFrame

    idx = df.index[df['title'] == book_title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]  # Get top 10 similar books
    book_indices = [i[0] for i in sim_scores]
    return df.iloc[book_indices]

def display_books(books):
    for index, book in books.iterrows():
        print(f"Title: {book['title']}")
        print(f"Authors: {book.get('authors')}")
        print(f"Publisher: {book.get('publisher')}")
        print(f"Published Date: {book.get('publishedDate')}")
        print(f"Description: {book.get('description')}")
        print(f"Page Count: {book.get('pageCount')}")
        print(f"Categories: {book.get('categories')}")
        print(f"Average Rating: {book.get('averageRating')}")
        print(f"Ratings Count: {book.get('ratingsCount')}")
        print(f"Language: {book.get('language')}")
        print(f"Preview Link: {book.get('previewLink')}")
        print("="*50)

def main():
    database = "books.db"
    df = load_books_from_db(database)
    df['description'] = df['description'].fillna('')

    # Combine features for better representation
    df['combined_features'] = (df['title'] + " " + df['authors'].fillna('') + " " +
                               df['categories'].fillna('') + " " + df['description'])

    # Feature extraction
    tfidf = TfidfVectorizer(stop_words='english', max_df=0.8, min_df=2, ngram_range=(1, 2))
    tfidf_matrix = tfidf.fit_transform(df['combined_features'])

    # Compute cosine similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Recommend books
    book_title = input("Enter a book title to get recommendations: ")
    recommended_books = recommend_books(book_title, cosine_sim, df)
    if not recommended_books.empty:
        display_books(recommended_books)

if __name__ == "__main__":
    main()

Enter a book title to get recommendations: Essentials of Marketing
Title: Essentials of Marketing Management
Authors: ["Geoffrey Lancaster", "Lester Massingham"]
Publisher: Routledge
Published Date: 2010-10
Description: The overall success of an organization is dependent on how marketing is able to inform strategy and maintain an operational focus on market needs. This title covers such topics as: consumer and organizational buyer behaviour; product and innovation strategies; direct marketing; and, e-marketing.
Page Count: 551
Categories: ["Business & Economics"]
Average Rating: nan
Ratings Count: nan
Language: en
Preview Link: http://books.google.com/books?id=WfctCgAAQBAJ&printsec=frontcover&dq=Marketing&hl=&cd=28&source=gbs_api
Title: Marketing
Authors: ["Juan M. Mart\u00ednez S\u00e1nchez"]
Publisher: Firmas Press
Published Date: 2010-01-01
Description: 
Page Count: 195
Categories: ["Business & Economics"]
Average Rating: nan
Ratings Count: nan
Language: es
Preview Link: http://book

In [None]:
def precision_at_k(recommended_books, relevant_books, k=3):
    recommended_k = recommended_books['title'].tolist()[:k]
    relevant_count = len(set(recommended_k) & set(relevant_books))
    return relevant_count / k

def recall_at_k(recommended_books, relevant_books, k=3):
    recommended_k = recommended_books['title'].tolist()[:k]
    relevant_count = len(set(recommended_k) & set(relevant_books))
    return relevant_count / len(relevant_books)

def f1_score_at_k(precision, recall):
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

def mean_average_precision_at_k(recommended_books, relevant_books, k=3):
    relevant_count = 0
    score = 0.0
    recommended_k = recommended_books['title'].tolist()[:k]
    for i in range(k):
        if recommended_k[i] in relevant_books:
            relevant_count += 1
            score += relevant_count / (i + 1)
    return score / min(len(relevant_books), k)

def reciprocal_rank_at_k(recommended_books, relevant_books):
    recommended_titles = recommended_books['title'].tolist()
    for i, book in enumerate(recommended_titles):
        if book in relevant_books:
            return 1 / (i + 1)
    return 0

def test_metrics():
    database = "books.db"
    df = load_books_from_db(database)
    df['description'] = df['description'].fillna('')

    # Combine features for better representation
    df['combined_features'] = df['title'] + " " + df['authors'].fillna('') + " " + df['categories'].fillna('') + " " + df['description']

    # Feature extraction
    tfidf = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf.fit_transform(df['description'])

    # Compute cosine similarity
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # Define test cases
    test_cases = [
        {
            "book_title": "Essentials of Marketing",
            "relevant_books": ["Marketing For Dummies", "The Marketing Book", "Internet Marketing"]
        },
    ]

    # Evaluate each test case
    for test in test_cases:
        recommended_books = recommend_books(test["book_title"], cosine_sim, df)
        if recommended_books.empty:
            print(f"Book titled '{test['book_title']}' not found in the database.")
            continue

        precision = precision_at_k(recommended_books, test["relevant_books"], k=3)
        recall = recall_at_k(recommended_books, test["relevant_books"], k=3)
        f1_score = f1_score_at_k(precision, recall)
        map_score = mean_average_precision_at_k(recommended_books, test["relevant_books"], k=3)
        rr_score = reciprocal_rank_at_k(recommended_books, test["relevant_books"])

        print(f"Testing book: {test['book_title']}")
        print(f"Precision@3: {precision}")
        print(f"Recall@3: {recall}")
        print(f"F1 Score@3: {f1_score}")
        print(f"Mean Average Precision@3: {map_score}")
        print(f"Reciprocal Rank: {rr_score}")
        print("="*50)

if __name__ == "__main__":
    test_metrics()

Testing book: Essentials of Marketing
Precision@3: 0.3333333333333333
Recall@3: 0.3333333333333333
F1 Score@3: 0.3333333333333333
Mean Average Precision@3: 0.3333333333333333
Reciprocal Rank: 1.0


Collaborative Filtering

In [None]:
import requests
import sqlite3
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split

def fetch_books(query, api_key, max_results=40):
    url = "https://www.googleapis.com/books/v1/volumes"
    params = {'q': query, 'key': api_key, 'maxResults': max_results}
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Ensure the response is successful
        books_data = response.json()
        if not books_data.get('items'):
            print("No books found for the query.")
        return books_data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Google Books API: {e}")
        return {}

def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print("Connection to database successful")
        return conn
    except sqlite3.Error as e:
        print(f"Error connecting to database: {e}")
    return conn

def simulate_user_ratings(conn):
    books_df = pd.read_sql_query("SELECT id, averageRating, ratingsCount FROM books", conn)
    if books_df.empty:
        print("No books in the database to rate.")
        return pd.DataFrame()  # Return empty DataFrame if no books available

    user_ids = range(1, 6)  # Simulate 5 users
    ratings = []
    for _, row in books_df.iterrows():
        book_id = row['id']
        avg_rating = row['averageRating'] if not pd.isna(row['averageRating']) else 3
        ratings_count = row['ratingsCount'] if not pd.isna(row['ratingsCount']) else 1
        ratings_per_user = max(1, ratings_count // len(user_ids))
        for user_id in user_ids:
            if np.random.rand() > 0.2:  # Leave some books unrated by each user
                for _ in range(ratings_per_user):
                    rating = np.random.normal(loc=avg_rating, scale=1.0)
                    rating = min(max(round(rating), 1), 5)  # Ensure the rating is between 1 and 5
                    ratings.append((user_id, book_id, rating))
    ratings_df = pd.DataFrame(ratings, columns=['user_id', 'book_id', 'rating'])
    if ratings_df.empty:
        print("No ratings generated.")
    return ratings_df

def get_collaborative_recommendations(user_id, svd_model, books_df, ratings_df, num_recommendations=5):
    book_ids = books_df['id'].unique()
    print(f"Total books: {len(book_ids)}")  # Check total books available

    book_titles = books_df.set_index('id')['title'].to_dict()
    predictions = []
    for book_id in book_ids:
        if ratings_df[(ratings_df['user_id'] == user_id) & (ratings_df['book_id'] == book_id)].empty:
            predicted_rating = svd_model.predict(user_id, book_id).est
            predictions.append((book_id, predicted_rating))
        else:
            print(f"Book {book_id} already rated by user {user_id}")

    print(f"Total predictions made: {len(predictions)}")  # Check how many predictions were made
    predictions.sort(key=lambda x: x[1], reverse=True)
    recommended_books = [book_titles.get(book_id, "Unknown Book Title") for book_id, _ in predictions[:num_recommendations]]
    return recommended_books

def extract_full_book_info(book_data):
    # Placeholder function to extract book info
    books = []
    for item in book_data.get('items', []):
        book = {
            'id': item['id'],
            'title': item['volumeInfo'].get('title', 'N/A'),
            'averageRating': item['volumeInfo'].get('averageRating', None),
            'ratingsCount': item['volumeInfo'].get('ratingsCount', None)
        }
        books.append(book)
    return books

def save_books_to_db(books):
    # Placeholder function to save books to the database
    conn = create_connection('books.db')
    if conn is not None:
        df = pd.DataFrame(books)
        df.to_sql('books', conn, if_exists='replace', index=False)
        print("Books saved to database.")
    else:
        print("Failed to connect to the database.")

def collaborative_filtering(ratings_df):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings_df, reader)
    trainset, testset = train_test_split(data, test_size=0.2)
    svd = SVD()
    svd.fit(trainset)
    return svd

def main():
    api_key = 'AIzaSyDdWzNgGVN-QhwCcvCPvdqSalsYiBBZKHo'  # Replace with your actual API key
    query = "Marketing Campaign Development"
    book_data = fetch_books(query, api_key)
    books = extract_full_book_info(book_data)
    save_books_to_db(books)

    conn = create_connection('books.db')
    ratings_df = simulate_user_ratings(conn)
    svd_model = collaborative_filtering(ratings_df)

    books_df = pd.read_sql_query("SELECT * FROM books", conn)
    user_id = 1
    recommendations = get_collaborative_recommendations(user_id, svd_model, books_df, ratings_df)
    print(f"Recommendations for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main()

Connection to database successful
Books saved to database.
Connection to database successful
Total books: 40
Book xFlXcScu-V8C already rated by user 1
Book gGgcSSr4d1IC already rated by user 1
Book ycRWDwAAQBAJ already rated by user 1
Book iZUeo2IM01kC already rated by user 1
Book l5OMJ6T53IQC already rated by user 1
Book bIN7AwAAQBAJ already rated by user 1
Book RhMvDAAAQBAJ already rated by user 1
Book mAuptAEACAAJ already rated by user 1
Book VEoa0AEACAAJ already rated by user 1
Book XoMt2xlBQAAC already rated by user 1
Book XDoYcqmFAx8C already rated by user 1
Book joEkP6e_MVsC already rated by user 1
Book xAvPKgd-jMsC already rated by user 1
Book cEItElOnRScC already rated by user 1
Book GmmdEAAAQBAJ already rated by user 1
Book aqo_EAAAQBAJ already rated by user 1
Book yfZyAwAAQBAJ already rated by user 1
Book vAe_yQEACAAJ already rated by user 1
Book A4hO-zwxXi8C already rated by user 1
Book yCkB3ruNe_0C already rated by user 1
Book -aB1AwAAQBAJ already rated by user 1
Book JL59

In [None]:
import requests
import sqlite3
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import mae, rmse
from collections import defaultdict

def fetch_books(query, api_key, max_results=40):
    url = "https://www.googleapis.com/books/v1/volumes"
    params = {'q': query, 'key': api_key, 'maxResults': max_results}
    try:
        response = requests.get(url, params=params)
        response.raise_for_status()  # Ensure the response is successful
        books_data = response.json()
        if not books_data.get('items'):
            print("No books found for the query.")
        return books_data
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from Google Books API: {e}")
        return {}

def create_connection(db_file):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        print("Connection to database successful")
        return conn
    except sqlite3.Error as e:
        print(f"Error connecting to database: {e}")
    return conn

def simulate_user_ratings(conn):
    books_df = pd.read_sql_query("SELECT id, averageRating, ratingsCount FROM books", conn)
    if books_df.empty:
        print("No books in the database to rate.")
        return pd.DataFrame()  # Return empty DataFrame if no books available

    user_ids = range(1, 6)  # Simulate 5 users
    ratings = []
    for _, row in books_df.iterrows():
        book_id = row['id']
        avg_rating = row['averageRating'] if not pd.isna(row['averageRating']) else 3
        ratings_count = row['ratingsCount'] if not pd.isna(row['ratingsCount']) else 1
        ratings_per_user = max(1, ratings_count // len(user_ids))
        for user_id in user_ids:
            if np.random.rand() > 0.2:  # Leave some books unrated by each user
                for _ in range(ratings_per_user):
                    rating = np.random.normal(loc=avg_rating, scale=1.0)
                    rating = min(max(round(rating), 1), 5)  # Ensure the rating is between 1 and 5
                    ratings.append((user_id, book_id, rating))
    ratings_df = pd.DataFrame(ratings, columns=['user_id', 'book_id', 'rating'])
    if ratings_df.empty:
        print("No ratings generated.")
    return ratings_df

def get_collaborative_recommendations(user_id, svd_model, books_df, ratings_df, num_recommendations=5):
    book_ids = books_df['id'].unique()
    print(f"Total books: {len(book_ids)}")  # Check total books available

    book_titles = books_df.set_index('id')['title'].to_dict()
    predictions = []
    for book_id in book_ids:
        if ratings_df[(ratings_df['user_id'] == user_id) & (ratings_df['book_id'] == book_id)].empty:
            predicted_rating = svd_model.predict(user_id, book_id).est
            predictions.append((book_id, predicted_rating))
        else:
            print(f"Book {book_id} already rated by user {user_id}")

    print(f"Total predictions made: {len(predictions)}")  # Check how many predictions were made
    predictions.sort(key=lambda x: x[1], reverse=True)
    recommended_books = [book_titles.get(book_id, "Unknown Book Title") for book_id, _ in predictions[:num_recommendations]]
    return recommended_books

def extract_full_book_info(book_data):
    books = []
    for item in book_data.get('items', []):
        book = {
            'id': item['id'],
            'title': item['volumeInfo'].get('title', 'N/A'),
            'averageRating': item['volumeInfo'].get('averageRating', None),
            'ratingsCount': item['volumeInfo'].get('ratingsCount', None)
        }
        books.append(book)
    return books

def save_books_to_db(books):
    conn = create_connection('books.db')
    if conn is not None:
        df = pd.DataFrame(books)
        df.to_sql('books', conn, if_exists='replace', index=False)
        print("Books saved to database.")
    else:
        print("Failed to connect to the database.")

def precision_recall_at_k(predictions, k=10, threshold=3.5):
    """Return precision and recall at k metrics for each user"""
    # First map the predictions to each user.
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():
        # Sort user ratings by estimated value
        user_ratings.sort(key=lambda x: x[0], reverse=True)

        # Number of relevant items
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        # Number of recommended items in top k
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        # Number of relevant and recommended items in top k
        n_rel_and_rec_k = sum(((true_r >= threshold) and (est >= threshold))
                              for (est, true_r) in user_ratings[:k])

        # Precision@K: Proportion of recommended items that are relevant
        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 1

        # Recall@K: Proportion of relevant items that are recommended
        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 1

    return precisions, recalls

def collaborative_filtering(ratings_df):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings_df, reader)
    trainset, testset = train_test_split(data, test_size=0.2)
    svd = SVD()
    svd.fit(trainset)

    # Calculate MAE and RMSE
    test_predictions = svd.test(testset)
    print("MAE:", mae(test_predictions))
    print("RMSE:", rmse(test_predictions))

    # Calculate Precision@k and Recall@k
    k = 5
    precisions, recalls = precision_recall_at_k(test_predictions, k=k)
    print(f"Precision@{k}: {sum(precisions.values()) / len(precisions)}")
    print(f"Recall@{k}: {sum(recalls.values()) / len(recalls)}")

    return svd, test_predictions

def main():
    api_key = 'AIzaSyDdWzNgGVN-QhwCcvCPvdqSalsYiBBZKHo'  # Replace with your actual API key
    query = "Marketing Campaign Development"
    book_data = fetch_books(query, api_key)
    books = extract_full_book_info(book_data)
    save_books_to_db(books)

    conn = create_connection('books.db')
    ratings_df = simulate_user_ratings(conn)
    svd_model, test_predictions = collaborative_filtering(ratings_df)

    books_df = pd.read_sql_query("SELECT * FROM books", conn)
    user_id = 1
    recommendations = get_collaborative_recommendations(user_id, svd_model, books_df, ratings_df)
    print(f"Recommendations for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main()


Connection to database successful
Books saved to database.
Connection to database successful
MAE:  0.9369
MAE: 0.9368740478365685
RMSE: 1.1017
RMSE: 1.1016753608982515
Precision@5: 0.9
Recall@5: 0.44000000000000006
Total books: 40
Book xFlXcScu-V8C already rated by user 1
Book gGgcSSr4d1IC already rated by user 1
Book ycRWDwAAQBAJ already rated by user 1
Book iZUeo2IM01kC already rated by user 1
Book l5OMJ6T53IQC already rated by user 1
Book bIN7AwAAQBAJ already rated by user 1
Book RhMvDAAAQBAJ already rated by user 1
Book mAuptAEACAAJ already rated by user 1
Book VEoa0AEACAAJ already rated by user 1
Book t2y8EAAAQBAJ already rated by user 1
Book XoMt2xlBQAAC already rated by user 1
Book XDoYcqmFAx8C already rated by user 1
Book joEkP6e_MVsC already rated by user 1
Book xAvPKgd-jMsC already rated by user 1
Book cEItElOnRScC already rated by user 1
Book GmmdEAAAQBAJ already rated by user 1
Book aqo_EAAAQBAJ already rated by user 1
Book yfZyAwAAQBAJ already rated by user 1
Book WB1OZRVe

K-Nearest Neighbors (KNN)


In [None]:
from surprise import KNNBasic
from surprise import accuracy
from surprise.model_selection import train_test_split

def knn_recommender(ratings_df):
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(ratings_df[['user_id', 'book_id', 'rating']], reader)
    trainset, testset = train_test_split(data, test_size=0.2)

    # KNNBasic menggunakan kesamaan kosinus
    algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': True})
    algo.fit(trainset)

    predictions = algo.test(testset)
    accuracy.rmse(predictions)
    accuracy.mae(predictions)

    return algo

def main_knn():
    api_key = 'AIzaSyDdWzNgGVN-QhwCcvCPvdqSalsYiBBZKHo'  # Replace with your actual API key
    query = "Marketing Campaign Development"
    book_data = fetch_books(query, api_key)
    books = extract_full_book_info(book_data)
    save_books_to_db(books)

    conn = create_connection('books.db')
    ratings_df = simulate_user_ratings(conn)
    knn_model = knn_recommender(ratings_df)

    books_df = pd.read_sql_query("SELECT * FROM books", conn)
    user_id = 1
    recommendations = get_collaborative_recommendations(user_id, knn_model, books_df, ratings_df)
    print(f"Recommendations for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main_knn()

Connection to database successful
Books saved to database.
Connection to database successful
Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.1654
MAE:  0.8875
Total books: 40
Book xFlXcScu-V8C already rated by user 1
Book JOTtAAAAMAAJ already rated by user 1
Book gGgcSSr4d1IC already rated by user 1
Book ycRWDwAAQBAJ already rated by user 1
Book iZUeo2IM01kC already rated by user 1
Book l5OMJ6T53IQC already rated by user 1
Book bIN7AwAAQBAJ already rated by user 1
Book VEoa0AEACAAJ already rated by user 1
Book t2y8EAAAQBAJ already rated by user 1
Book XoMt2xlBQAAC already rated by user 1
Book XDoYcqmFAx8C already rated by user 1
Book joEkP6e_MVsC already rated by user 1
Book xAvPKgd-jMsC already rated by user 1
Book cEItElOnRScC already rated by user 1
Book GmmdEAAAQBAJ already rated by user 1
Book mc5CR1di40cC already rated by user 1
Book yfZyAwAAQBAJ already rated by user 1
Book vAe_yQEACAAJ already rated by user 1
Book UOT4DwAAQBAJ already rated b

Matrix Factorization using Alternating Least Squares (ALS)

In [None]:
import pandas as pd
import implicit
from scipy.sparse import csr_matrix

def als_recommender(ratings_df):
    # Create mappings for user_id and book_id to integer indices
    user_ids = ratings_df['user_id'].astype(str).unique()
    book_ids = ratings_df['book_id'].astype(str).unique()

    user_id_map = {user_id: idx for idx, user_id in enumerate(user_ids)}
    book_id_map = {book_id: idx for idx, book_id in enumerate(book_ids)}

    # Map original IDs to new numeric IDs
    ratings_df['user_id'] = ratings_df['user_id'].astype(str).map(user_id_map)
    ratings_df['book_id'] = ratings_df['book_id'].astype(str).map(book_id_map)

    # Convert data frame to sparse matrix
    user_item_matrix = csr_matrix((ratings_df['rating'], (ratings_df['user_id'], ratings_df['book_id'])))

    model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=50)
    model.fit(user_item_matrix)

    return model, user_item_matrix, user_id_map, book_id_map

def get_als_recommendations(user_id, model, user_item_matrix, user_id_map, book_id_map, books_df, num_recommendations=5):
    # Map user_id to internal ID
    if str(user_id) not in user_id_map:
        raise KeyError(f"User ID {user_id} not found in user_id_map.")
    internal_user_id = user_id_map[str(user_id)]
    user_ratings = user_item_matrix[internal_user_id]
    recommended_indices, scores = model.recommend(internal_user_id, user_ratings, N=num_recommendations)

    book_titles = books_df.set_index('id')['title'].to_dict()
    reverse_book_id_map = {v: k for k, v in book_id_map.items()}

    recommended_books = [book_titles.get(reverse_book_id_map[idx], "Unknown Book Title") for idx in recommended_indices]
    return recommended_books


def main_als():
    api_key = 'AIzaSyDdWzNgGVN-QhwCcvCPvdqSalsYiBBZKHo'  # Replace with your actual API key
    query = "Marketing Campaign Development"
    book_data = fetch_books(query, api_key)
    books = extract_full_book_info(book_data)
    save_books_to_db(books)

    conn = create_connection('books.db')
    ratings_df = simulate_user_ratings(conn)

    # Debug: Print the initial ratings_df to check its content
    print("Initial ratings_df:")
    print(ratings_df.head())

    als_model, user_item_matrix, user_id_map, book_id_map = als_recommender(ratings_df)

    # Debug: Print user_id_map to check its content
    print("User ID Map:")
    print(user_id_map)

    books_df = pd.read_sql_query("SELECT * FROM books", conn)
    user_id = 2  # Adjust this to match an existing user in your ratings_df
    recommendations = get_als_recommendations(user_id, als_model, user_item_matrix, user_id_map, book_id_map, books_df)
    print(f"Recommendations for user {user_id}: {recommendations}")

if __name__ == "__main__":
    main_als()


Connection to database successful
Books saved to database.
Connection to database successful
Initial ratings_df:
   user_id       book_id  rating
0        1  xFlXcScu-V8C       2
1        2  xFlXcScu-V8C       3
2        3  xFlXcScu-V8C       4
3        4  xFlXcScu-V8C       4
4        5  xFlXcScu-V8C       2


  0%|          | 0/50 [00:00<?, ?it/s]

User ID Map:
{'1': 0, '2': 1, '3': 2, '4': 3, '5': 4}
Recommendations for user 2: ['Advertising Campaign Planning', 'Buy Now', 'Hands-On Social Marketing', 'Social Media Marketing For Dummies', 'Critical Success Factors of Online Marketing Campaign']
