In [2]:
import matplotlib.pyplot as plt
import numpy as np
from sentence_transformers import SentenceTransformer
import pandas as pd
import seaborn as sns
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA

plt.style.use('ggplot')
plt.rcParams['font.family'] = 'sans-serif' 
plt.rcParams['font.serif'] = 'Ubuntu' 
plt.rcParams['font.monospace'] = 'Ubuntu Mono' 
plt.rcParams['font.size'] = 14 
plt.rcParams['axes.labelsize'] = 12 
plt.rcParams['axes.labelweight'] = 'bold' 
plt.rcParams['axes.titlesize'] = 12 
plt.rcParams['xtick.labelsize'] = 12 
plt.rcParams['ytick.labelsize'] = 12 
plt.rcParams['legend.fontsize'] = 12 
plt.rcParams['figure.titlesize'] = 12 
plt.rcParams['image.cmap'] = 'jet' 
plt.rcParams['image.interpolation'] = 'none' 
plt.rcParams['figure.figsize'] = (12, 10) 
plt.rcParams['axes.grid']=True
plt.rcParams['lines.linewidth'] = 2 
plt.rcParams['lines.markersize'] = 8
colors = ['xkcd:pale orange', 'xkcd:sea blue', 'xkcd:pale red', 'xkcd:sage green', 'xkcd:terra cotta', 'xkcd:dull purple', 'xkcd:teal', 'xkcd: goldenrod', 'xkcd:cadet blue',
'xkcd:scarlet']

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
df = pd.read_csv('HorrorCleanedAndJoined.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Title,Author,Year,Rating,Raters,Genres,Description
0,0,The Shining (Paperback),Stephen King,1977,4.26,1342128,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta...",Jack Torrance's new job at the Overlook Hotel ...
1,1,It (Paperback),Stephen King,1984,4.25,988428,"Horror,Fiction,Thriller,Fantasy,Audiobook,Clas...","Welcome to Derry, Maine ...It’s a small city, ..."
2,2,Dracula (Paperback),Bram Stoker,1897,4.01,1134432,"Classics,Horror,Fiction,Fantasy,Paranormal,Got...",You can find an alternative cover edition for ...
3,3,Carrie (Mass Market Paperback),Stephen King,1974,3.98,636575,"Horror,Fiction,Thriller,Fantasy,Fantasy,Classi...","A modern classic, Carrie introduced a distinct..."
4,4,Pet Sematary (Kindle Edition),Stephen King,1983,4.04,521288,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant...",'This is an alternate Cover Edition for ASIN: ...


In [5]:
df = df.drop("Unnamed: 0", axis = 1)
df.head()

Unnamed: 0,Title,Author,Year,Rating,Raters,Genres,Description
0,The Shining (Paperback),Stephen King,1977,4.26,1342128,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta...",Jack Torrance's new job at the Overlook Hotel ...
1,It (Paperback),Stephen King,1984,4.25,988428,"Horror,Fiction,Thriller,Fantasy,Audiobook,Clas...","Welcome to Derry, Maine ...It’s a small city, ..."
2,Dracula (Paperback),Bram Stoker,1897,4.01,1134432,"Classics,Horror,Fiction,Fantasy,Paranormal,Got...",You can find an alternative cover edition for ...
3,Carrie (Mass Market Paperback),Stephen King,1974,3.98,636575,"Horror,Fiction,Thriller,Fantasy,Fantasy,Classi...","A modern classic, Carrie introduced a distinct..."
4,Pet Sematary (Kindle Edition),Stephen King,1983,4.04,521288,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant...",'This is an alternate Cover Edition for ASIN: ...


In [11]:
X = np.array(df.Description)

In [6]:
data = df[['Title','Description','Genres']]
data.head()

Unnamed: 0,Title,Description,Genres
0,The Shining (Paperback),Jack Torrance's new job at the Overlook Hotel ...,"Horror,Fiction,Thriller,Classics,Fantasy,Fanta..."
1,It (Paperback),"Welcome to Derry, Maine ...It’s a small city, ...","Horror,Fiction,Thriller,Fantasy,Audiobook,Clas..."
2,Dracula (Paperback),You can find an alternative cover edition for ...,"Classics,Horror,Fiction,Fantasy,Paranormal,Got..."
3,Carrie (Mass Market Paperback),"A modern classic, Carrie introduced a distinct...","Horror,Fiction,Thriller,Fantasy,Fantasy,Classi..."
4,Pet Sematary (Kindle Edition),'This is an alternate Cover Edition for ASIN: ...,"Horror,Fiction,Thriller,Fantasy,Audiobook,Fant..."


In [12]:
text_data = X
model = SentenceTransformer('distilbert-base-nli-mean-tokens')
embeddings = model.encode(text_data, show_progress_bar=True)

Downloading: 100%|██████████| 690/690 [00:00<00:00, 691kB/s]
Downloading: 100%|██████████| 190/190 [00:00<00:00, 190kB/s]
Downloading: 100%|██████████| 3.99k/3.99k [00:00<00:00, 3.99MB/s]
Downloading: 100%|██████████| 550/550 [00:00<00:00, 542kB/s]
Downloading: 100%|██████████| 122/122 [00:00<00:00, 115kB/s]
Downloading: 100%|██████████| 265M/265M [23:05<00:00, 192kB/s]    
Downloading: 100%|██████████| 53.0/53.0 [00:00<00:00, 51.0kB/s]
Downloading: 100%|██████████| 112/112 [00:00<00:00, 109kB/s]
Downloading: 100%|██████████| 466k/466k [00:02<00:00, 199kB/s]  
Downloading: 100%|██████████| 450/450 [00:00<00:00, 450kB/s]
Downloading: 100%|██████████| 232k/232k [00:01<00:00, 116kB/s]  
Downloading: 100%|██████████| 229/229 [00:00<00:00, 115kB/s]
Batches: 100%|██████████| 40/40 [00:53<00:00,  1.33s/it]


In [14]:
embed_data = embeddings

In [16]:
cos_sim_data = pd.DataFrame(cosine_similarity(embed_data))
def give_recommendations(index,print_recommendation = False,print_recommendation_plots= False,print_genres =False):
  index_recomm =cos_sim_data.loc[index].sort_values(ascending=False).index.tolist()[1:6]
  books_recomm =  data['Title'].loc[index_recomm].values
  result = {'Books':books_recomm,'Index':index_recomm}
  if print_recommendation==True:
    print('The read book is this one: %s \n'%(data['Title'].loc[index]))
    k=1
    for book in books_recomm:
      print('The number %i recommended book is this one: %s \n'%(k,book))
  if print_recommendation_plots==True:
    print('The plot of the watched book is this one:\n %s \n'%(data['Description'].loc[index]))
    k=1
    for q in range(len(books_recomm)):
      plot_q = data['Description'].loc[index_recomm[q]]
      print('The plot of the number %i recommended book is this one:\n %s \n'%(k,plot_q))
      k=k+1
  if print_genres==True:
    print('The genres of the read book is this one:\n %s \n'%(data['Genres'].loc[index]))
    k=1
    for q in range(len(books_recomm)):
      plot_q = data['Genres'].loc[index_recomm[q]]
      print('The plot of the number %i recommended book is this one:\n %s \n'%(k,plot_q))
      k=k+1
  return result

In [21]:
give_recommendations(5,True)

The read book is this one: It (Paperback) 

The number 1 recommended book is this one: The Ghost Tree (Paperback) 

The number 1 recommended book is this one: Seed (ebook) 

The number 1 recommended book is this one: Black Mouth (Paperback) 

The number 1 recommended book is this one: Storm of the Century (Paperback) 

The number 1 recommended book is this one: The Pallbearers Club (Hardcover) 



{'Books': array(['The Ghost Tree (Paperback)', 'Seed (ebook)',
        'Black Mouth (Paperback)', 'Storm of the Century (Paperback)',
        'The Pallbearers Club (Hardcover)'], dtype=object),
 'Index': [624, 337, 1158, 569, 528]}

In [22]:
give_recommendations(5,False,True)

The plot of the watched book is this one:
 Welcome to Derry, Maine ...It’s a small city, a place as hauntingly familiar as your own hometown. Only in Derry the haunting is real ...They were seven teenagers when they first stumbled upon the horror. Now they are grown-up men and women who have gone out into the big world to gain success and happiness. But none of them can withstand the force that has drawn them back to Derry to face the nightmare without an end, and the evil without a name. 

The plot of the number 1 recommended book is this one:
 When people go missing in the sleepy town of Smith's Hollow, the only clue to their fate comes when a teenager starts having terrifying visions, in a chilling horror novel from national bestselling author Christina Henry.When the bodies of two girls are found torn apart in the town of Smiths Hollow, Lauren is surprised, but she also expects that the police won't find the killer. After all, the year before her father's body was found with his he

{'Books': array(['The Ghost Tree (Paperback)', 'Seed (ebook)',
        'Black Mouth (Paperback)', 'Storm of the Century (Paperback)',
        'The Pallbearers Club (Hardcover)'], dtype=object),
 'Index': [624, 337, 1158, 569, 528]}

In [23]:
give_recommendations(5,False,False, True)

The genres of the read book is this one:
 Horror,Fiction,Thriller,Fantasy,Audiobook,Classics,Mystery,Adult,Fantasy,Fantasy 

The plot of the number 1 recommended book is this one:
 Horror,Mystery,Fantasy,Fiction,Young Adult,Fantasy,Thriller,Paranormal,Fantasy,Adult 

The plot of the number 2 recommended book is this one:
 Horror,Fiction,Fantasy,Fantasy,Thriller,Audiobook,Fantasy,Paranormal,Mystery,Thriller 

The plot of the number 3 recommended book is this one:
 Horror,Fiction,Fantasy,Thriller,Fantasy,Audiobook,Mystery,Adult,Young Adult,Fantasy 

The plot of the number 4 recommended book is this one:
 Horror,Fiction,Thriller,Fantasy,Plays,Suspense,Drama,Fantasy,Mystery,Fantasy 

The plot of the number 5 recommended book is this one:
 Horror,Fiction,Thriller,Mystery,Adult,Thriller,Audiobook,Paranormal,Fantasy,Fantasy 



{'Books': array(['The Ghost Tree (Paperback)', 'Seed (ebook)',
        'Black Mouth (Paperback)', 'Storm of the Century (Paperback)',
        'The Pallbearers Club (Hardcover)'], dtype=object),
 'Index': [624, 337, 1158, 569, 528]}