# Important

`make data/processed/similar_books.csv` has to be run before any cell in this notebook

# Imports

In [None]:
import pandas as pd

# Helper functions

In [None]:
def retrieve_similar_books(book_id, similar_books_data, book_data):
    similar_books = similar_books_data.groupby('book_id').get_group(
        book_id)['similar_book_id']
    selected_book_title = book_data.loc[book_id, 'title']
    similar_books_titles = book_data.loc[similar_books, 'title'].values
    
    print(f'Selected Book: {selected_book_title}:\n')
    print(f'\tSimilar Books:\n')
    for similar_book_title in similar_books_titles:
        print(f'\t\t{similar_book_title}')

In [None]:
def get_series_similar_books(series_ids, similar_books_data, book_data):
    for book_id in series_ids:
        retrieve_similar_books(book_id, similar_books_data, book_data)
        print(90*'-' +'\n')

# Analysis

In [None]:
similar_books_data = pd.read_csv('../data/processed/similar_books.csv', index_col='book_id')
book_data = pd.read_csv('../data/processed/book.csv', index_col='book_id')

In [None]:
similar_books_data.head()

# Popular book series

In order to check if the data from the `similar books` tag is appropriate a manual analysis for popular book series of differnt genres will be made. If the proposed similar books will belong to the same series or be similar in terms of settings/plots/genre then it will be considered as a valid position in the data.

## Harry Potter

In [None]:
harry_potter_book_ids = [2, 18, 21, 23, 24, 25, 27]

In [None]:
get_series_similar_books(
    harry_potter_book_ids, similar_books_data, book_data)

## Twilight

In [None]:
twilight_book_ids = [3, 49, 52]

In [None]:
get_series_similar_books(
    twilight_book_ids, similar_books_data, book_data)

# Rober Langdon series - Dan Brown

In [None]:
robert_langdon_book_ids = [9, 26, 201, 240]

In [None]:
get_series_similar_books(
    robert_langdon_book_ids, similar_books_data, book_data)

# Winnie the Pooh

In [None]:
winnie_the_pooh_book_ids = [444, 1545, 2648, 7481, 7696]

In [None]:
get_series_similar_books(
    winnie_the_pooh_book_ids, similar_books_data, book_data)

# Summary

The `similar books` usually does not contain books from the same series. However, the remaining books do seem to be similar in terms of setting or plot e.g. books about `vampires` in case of similar books to Twilight or childern books in case of Winnie the Pooh.