In [1]:
import matplotlib.pyplot as plt
import numpy as np
from sentence_transformers import SentenceTransformer
import pandas as pd
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA

plt.style.use('ggplot')
plt.rcParams['font.family'] = 'sans-serif' 
plt.rcParams['font.serif'] = 'Ubuntu' 
plt.rcParams['font.monospace'] = 'Ubuntu Mono' 
plt.rcParams['font.size'] = 14 
plt.rcParams['axes.labelsize'] = 12 
plt.rcParams['axes.labelweight'] = 'bold' 
plt.rcParams['axes.titlesize'] = 12 
plt.rcParams['xtick.labelsize'] = 12 
plt.rcParams['ytick.labelsize'] = 12 
plt.rcParams['legend.fontsize'] = 12 
plt.rcParams['figure.titlesize'] = 12 
plt.rcParams['image.cmap'] = 'jet' 
plt.rcParams['image.interpolation'] = 'none' 
plt.rcParams['figure.figsize'] = (12, 10) 
plt.rcParams['axes.grid']=True
plt.rcParams['lines.linewidth'] = 2 
plt.rcParams['lines.markersize'] = 8
colors = ['xkcd:pale orange', 'xkcd:sea blue', 'xkcd:pale red', 'xkcd:sage green', 'xkcd:terra cotta', 'xkcd:dull purple', 'xkcd:teal', 'xkcd: goldenrod', 'xkcd:cadet blue',
'xkcd:scarlet']

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('Cleaned/HorrorCleanedAndJoined.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Author,Year,Rating,Raters,Genres,Description
0,0,The Shining (Paperback),Stephen King,1977,4.26,1342128,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta...",Jack Torrance's new job at the Overlook Hotel ...
1,1,It (Paperback),Stephen King,1984,4.25,988428,"Horror,Fiction,Thriller,Fantasy,Audiobook,Clas...","Welcome to Derry, Maine ...It’s a small city, ..."
2,2,Dracula (Paperback),Bram Stoker,1897,4.01,1134432,"Classics,Horror,Fiction,Fantasy,Paranormal,Got...",You can find an alternative cover edition for ...
3,3,Carrie (Mass Market Paperback),Stephen King,1974,3.98,636575,"Horror,Fiction,Thriller,Fantasy,Fantasy,Classi...","A modern classic, Carrie introduced a distinct..."
4,4,Pet Sematary (Kindle Edition),Stephen King,1983,4.04,521288,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant...",'This is an alternate Cover Edition for ASIN: ...


In [3]:
df = df.drop("Unnamed: 0", axis = 1)
df.head()

Unnamed: 0,Title,Author,Year,Rating,Raters,Genres,Description
0,The Shining (Paperback),Stephen King,1977,4.26,1342128,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta...",Jack Torrance's new job at the Overlook Hotel ...
1,It (Paperback),Stephen King,1984,4.25,988428,"Horror,Fiction,Thriller,Fantasy,Audiobook,Clas...","Welcome to Derry, Maine ...It’s a small city, ..."
2,Dracula (Paperback),Bram Stoker,1897,4.01,1134432,"Classics,Horror,Fiction,Fantasy,Paranormal,Got...",You can find an alternative cover edition for ...
3,Carrie (Mass Market Paperback),Stephen King,1974,3.98,636575,"Horror,Fiction,Thriller,Fantasy,Fantasy,Classi...","A modern classic, Carrie introduced a distinct..."
4,Pet Sematary (Kindle Edition),Stephen King,1983,4.04,521288,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant...",'This is an alternate Cover Edition for ASIN: ...


In [4]:
X = np.array(df.Description)

In [5]:
data = df[['Title','Description','Genres']]
data.head()

Unnamed: 0,Title,Description,Genres
0,The Shining (Paperback),Jack Torrance's new job at the Overlook Hotel ...,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta..."
1,It (Paperback),"Welcome to Derry, Maine ...It’s a small city, ...","Horror,Fiction,Thriller,Fantasy,Audiobook,Clas..."
2,Dracula (Paperback),You can find an alternative cover edition for ...,"Classics,Horror,Fiction,Fantasy,Paranormal,Got..."
3,Carrie (Mass Market Paperback),"A modern classic, Carrie introduced a distinct...","Horror,Fiction,Thriller,Fantasy,Fantasy,Classi..."
4,Pet Sematary (Kindle Edition),'This is an alternate Cover Edition for ASIN: ...,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant..."


In [6]:
text_data = X
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
embeddings = model.encode(text_data, show_progress_bar=True)

Batches: 100%|██████████| 40/40 [00:54<00:00,  1.35s/it]


In [7]:
embed_data = embeddings

In [8]:
cos_sim_data = pd.DataFrame(cosine_similarity(embed_data))
def give_recommendations(index,print_recommendation = False,print_recommendation_plots= False,print_genres =False):
  index_recomm =cos_sim_data.loc[index].sort_values(ascending=False).index.tolist()[1:6]
  books_recomm =  data['Title'].loc[index_recomm].values
  result = {'Books':books_recomm,'Index':index_recomm}
  if print_recommendation==True:
    print('The read book is this one: %s \n'%(data['Title'].loc[index]))
    k=1
    for book in books_recomm:
      print('The number %i recommended book is this one: %s \n'%(k,book))
  if print_recommendation_plots==True:
    print('The plot of the watched book is this one:\n %s \n'%(data['Description'].loc[index]))
    k=1
    for q in range(len(books_recomm)):
      plot_q = data['Description'].loc[index_recomm[q]]
      print('The plot of the number %i recommended book is this one:\n %s \n'%(k,plot_q))
      k=k+1
  if print_genres==True:
    print('The genres of the read book is this one:\n %s \n'%(data['Genres'].loc[index]))
    k=1
    for q in range(len(books_recomm)):
      plot_q = data['Genres'].loc[index_recomm[q]]
      print('The plot of the number %i recommended book is this one:\n %s \n'%(k,plot_q))
      k=k+1
  return result

In [9]:
give_recommendations(5,True)

The read book is this one: Frankenstein: The 1818 Text (Paperback) 

The number 1 recommended book is this one: The Complete Fiction (Leather Bound) 

The number 1 recommended book is this one: The Imago Sequence and Other Stories (Hardcover) 

The number 1 recommended book is this one: The Thing on the Doorstep and Other Weird Stories (Paperback) 

The number 1 recommended book is this one: City Infernal (City Infernal, #1) 

The number 1 recommended book is this one: The Lighthouse Witches (Paperback) 



{'Books': array(['The Complete Fiction (Leather Bound)',
        'The Imago Sequence and Other Stories (Hardcover)',
        'The Thing on the Doorstep and Other Weird Stories (Paperback)',
        'City Infernal (City Infernal, #1)',
        'The Lighthouse Witches (Paperback)'], dtype=object),
 'Index': [388, 525, 657, 1139, 841]}

In [10]:
give_recommendations(5,False,True)

The plot of the watched book is this one:
 This is a previously-published edition of ISBN 9780143131847.Mary Shelley's seminal novel of the scientist whose creation becomes a monsterThis edition is the original 1818 text, which preserves the hard-hitting and politically charged aspects of Shelley's original writing, as well as her unflinching wit and strong female voice. This edition also includes a new introduction and suggestions for further reading by author and Shelley expert Charlotte Gordon, literary excerpts and reviews selected by Gordon and a chronology and essay by preeminent Shelley scholar Charles E. Robinson. 

The plot of the number 1 recommended book is this one:
 Another excellent edition in the Knickerbocker Classic series, The Complete Fiction of H.P. Lovecraft collects the author's novel, four novellas, and fifty-three short stories. Written between the years 1917 and 1935, this collection features Lovecraft's trademark fantastical creatures and supernatural thrills,

{'Books': array(['The Complete Fiction (Leather Bound)',
        'The Imago Sequence and Other Stories (Hardcover)',
        'The Thing on the Doorstep and Other Weird Stories (Paperback)',
        'City Infernal (City Infernal, #1)',
        'The Lighthouse Witches (Paperback)'], dtype=object),
 'Index': [388, 525, 657, 1139, 841]}

In [11]:
give_recommendations(5,False,False, True)

The genres of the read book is this one:
 Classics,Fiction,Horror,Science Fiction,Gothic,Fantasy,Academic,Literature,Novels,Academic 

The plot of the number 1 recommended book is this one:
 Horror,Fiction,Classics,Fantasy,Short Stories,Science Fiction,Anthologies,Anthologies,Fantasy,Horror 

The plot of the number 2 recommended book is this one:
 Horror,Short Stories,Fiction,Fantasy,Fantasy,Horror,Anthologies,Anthologies,Speculative Fiction,Short Stories 

The plot of the number 3 recommended book is this one:
 Horror,Short Stories,Fiction,Classics,Fantasy,Science Fiction,Fantasy,Gothic,Horror,Literature 

The plot of the number 4 recommended book is this one:
 Horror,Fiction,Fantasy,Horror,Paranormal,Fantasy,Fantasy,Fantasy,Fantasy,Death 

The plot of the number 5 recommended book is this one:
 Fantasy,Horror,Mystery,Fiction,Paranormal,Gothic,Thriller,Fantasy,Thriller,Audiobook 



{'Books': array(['The Complete Fiction (Leather Bound)',
        'The Imago Sequence and Other Stories (Hardcover)',
        'The Thing on the Doorstep and Other Weird Stories (Paperback)',
        'City Infernal (City Infernal, #1)',
        'The Lighthouse Witches (Paperback)'], dtype=object),
 'Index': [388, 525, 657, 1139, 841]}