In [1]:
import pandas as pd
import numpy as np
import math
import nltk
from nltk.corpus import stopwords
import re
import string
from collections import Counter
from PIL import Image
import requests
from io import BytesIO
import matplotlib.pyplot as plt
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from dash.exceptions import PreventUpdate



In [2]:
df = pd.read_csv('books_cleaned.csv')

In [3]:
book_titles_dict = df['book_title'].to_dict()
opt = []
for k,v in book_titles_dict.items():
    temp_d = {}
    temp_d["label"] = v
    temp_d["value"] = v
    opt.append(temp_d)
    

In [4]:
from collections import Counter
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial.distance import cosine
from sklearn.metrics.pairwise import cosine_similarity
def get_cosine_sim(*strs): 
#     print(strs)
    vectors = [t for t in get_vectors(*strs)]
    return cosine(*vectors)

def get_jaccard_sim(str1, str2): 
    a = set(str1.split()) 
    b = set(str2.split())
    c = a.intersection(b)
    return float(len(c)) / (len(a) + len(b) - len(c))
    
def get_vectors(*strs):
    text = []
    for t in strs:
        t = t.translate(str.maketrans('', '', string.punctuation))
        stop_words = stopwords.words('english')
        stopwords_dict = Counter(stop_words)
        text.append(' '.join([word for word in t.split() if word not in stopwords_dict]))
    
    vectorizer = CountVectorizer(text)
    vectorizer.fit(text)
    return vectorizer.transform(text).toarray()

In [5]:
def recommend_desc(book):
    #user input
    if len(df[df['book_title'] == book.lower()]) > 0:
        desc = list(df[df['book_title'] == book.lower()]['book_desc'])[0]
        print('Found match: ', book, '\n')
        match = book
    else:
        index = np.argmax([fuzz.ratio(book.lower(), i) for i in list(df['book_title']) if type(i)== str])
        desc = df.iloc[index,:]['book_desc']
        print('Found closest match: ', df.iloc[index,:]['book_title'], '\n')
        match = df.iloc[index,:]['book_title']
    
    all_desc = list(df['book_desc'])
    all_genres = list(df['genres'])
    similarity_array = np.zeros([len(all_desc),])
    genre_input = df.iloc[df[df['book_title'] == 'the da vinci code'.lower()].index[0],:]['genres'].split('|')
    for k, i in enumerate(all_desc):
        #change this number for genre comparisons (2 for fast, 0 for in depth)
        if len(list(set(genre_input) & set(all_genres[k].split('|')))) > 2:
            if type(i)==str:
                value = get_cosine_sim(i, desc)
                if value != math.nan:
                    similarity_array[k] = get_cosine_sim(desc, i)
    similarity = similarity_array.tolist()
    similarity.remove(max(similarity))
    final_index = np.nanargmax(similarity)
    
    response = requests.get(df.iloc[final_index,:]['image_url'])

    img = Image.open(BytesIO(response.content))

    return [match, df.iloc[final_index,:]['book_title'], df.iloc[final_index,:]['book_desc'], 
            df.iloc[final_index,:]['book_rating'], df.iloc[final_index,:]['book_pages'], df.iloc[final_index,:]['book_authors'], df.iloc[final_index,:]['image_url']]


In [6]:
import warnings
warnings.filterwarnings('ignore')

In [131]:

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']


app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div([html.H1('Keyword Recommendation',
            style={"background-color": "coral","margin-left":"auto","margin-right":"auto", 'display': 'flex', 'justify-content': 'center'}), html.Br(), 
    dcc.Dropdown(
        id='demo-dropdown',
        options= opt,
        value='',
        style={"width":"1300px"}
    ), html.Br(),
    html.Div(html.Center(id='url')),
    html.Div(
        html.Table([
        html.Tr([html.Th(html.H4('Top Recommendation:',style={"color": "white","width":"300px"}),style={"background-color": "grey"}), 
                 html.Td(id='top-rec',style={"width":"500px"})], style={"border": "2px solid grey"}),
        html.Tr([html.Th(html.H6('Author:',style={"color": "white"}),style={"background-color": "grey"}), html.Td(id='author')], style={"border": "2px solid grey"}),
        html.Tr([html.Th(html.H6('Rating:',style={"color": "white"}),style={"background-color": "grey"}), html.Td(id='rating')], style={"border": "2px solid grey"}),
        html.Tr([html.Th(html.H6('Length:',style={"color": "white"}),style={"background-color": "grey"}), html.Td(id='length')], style={"border": "2px solid grey"}),
    ], style={"border-spacing": "60px","border": "2px solid grey",
              "margin-left":"500px","margin-right":"auto","background-color": "coral", "opacity": "0.9"}
    )),
    html.Div(html.Table([
        html.Tr([html.Td(html.H6('Description')), html.Td(id='desc')]),
    ], style={"width": "83%", }))])


@app.callback(
    Output('top-rec', 'children'),
    Output('author', 'children'),
    Output('desc', 'children'),
    Output('rating', 'children'),
    Output('length', 'children'),
    Output('url', 'children'),
    [dash.dependencies.Input('demo-dropdown', 'value')])

def update_output(value):
    x = recommend_desc(value)

    # response = requests.get(x[6])

    img = html.Img(src= x[6],style={"float":"left","width": "300px", "height": "400px" })
    # recc = type(x[1])
    #"max-width": "240px","max-height": "346px", "position": "relative", "top": "200px", "left": "0px", "width": "350.971px", "height": "400.667px"
    l = '{} pages.'.format(str(int(x[4])))

    return x[1],  x[5],  x[2], x[3],l, img


if __name__ =='__main__':
    app.run_server(port=4000)

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is running on http://127.0.0.1:4000/

Dash is run

 * Running on http://127.0.0.1:4000/ (Press CTRL+C to quit)
127.0.0.1 - - [26/May/2021 21:57:27] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [26/May/2021 21:57:28] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [26/May/2021 21:57:28] "GET /_favicon.ico?v=1.20.0 HTTP/1.1" 200 -
127.0.0.1 - - [26/May/2021 21:57:28] "GET /_dash-layout HTTP/1.1" 200 -


Found closest match:  harry potter and the order of the phoenix 



127.0.0.1 - - [26/May/2021 21:57:33] "POST /_dash-update-component HTTP/1.1" 200 -
