In [10]:
import pandas as pd
import mysql.connector
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from IPython.display import Markdown, display

#!pip install mysql-connector-python

## Book Recommender System 

This project adapts my previous book recommender system project by adding over 200K rows of book data from Google Books and 3 million book reviews from Amazon. The data was first cleaned in Python and then later added to a SQL database for safe keeping and secure access. 

I've adapted the code by adding new inputs to my model. We look at book description data as well as title name, average book review score and review sentiment. This recommender is a big improvement from my first attempt and does a better job at recommending similar books based on user input.

### User Input
This user input will be used to filiter down our database to the most relevant results.

In [5]:
user_input = input("Enter a title of a book or a topic that you want to learn about: ")
score_above = input("Enter a rating from 1-5. 1 being the lowest and 5 being the highest: ")
user_category= input("Enter a book genre: ")

Enter a title of a book or a topic that you want to learn about: Learning how to draw
Enter a rating from 1-5. 1 being the lowest and 5 being the highest: 4.0
Enter a book genre: art


In [None]:
#user_input = 'Learning how to draw'
#score_above = 4.0
#user_category= 'art'

### Connecting to MySQL
This cell connects MySQL and executes a query.

In [6]:
# Establish a connection to the MySQL database
conn = mysql.connector.connect(
    host="localhost",
    user="root",
    password="password",
    database="goodreads"
)

cursor = conn.cursor()

# Execute a query
#cursor.execute(f"SELECT book_avg_score.Title, Categories FROM book_avg_score INNER JOIN book_categories ON book_avg_score.Title = book_categories.Title WHERE avg_review_score>={score_above} AND Categories LIKE '%{user_category}%';")
cursor.execute(f"SELECT reviews.Title, Description, Authors, review_score, review_text, Categories FROM reviews INNER JOIN book_data ON reviews.Title = book_data.Title  WHERE reviews.Title IN ( SELECT book_avg_score.Title FROM book_avg_score INNER JOIN book_categories ON book_avg_score.Title = book_categories.Title WHERE avg_review_score>={score_above} AND Categories LIKE '{user_category}');")

# Fetch and print the results
results = cursor.fetchall()

# Create a DataFrame
book_data_df = pd.DataFrame(results, columns=[desc[0] for desc in cursor.description])


cursor.close()
conn.close()

### Add SQL Data into a Dataframe

In [7]:
book_data_df.to_csv('book_query_data.csv')
book_data_df = pd.read_csv("book_query_data.csv")
book_data_df = book_data_df.drop(columns=['Unnamed: 0'])
book_data_df['Description'].fillna('NA', inplace=True)

In [8]:
book_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11537 entries, 0 to 11536
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Title         11537 non-null  object 
 1   Description   11537 non-null  object 
 2   Authors       11104 non-null  object 
 3   review_score  11537 non-null  float64
 4   review_text   11537 non-null  object 
 5   Categories    11247 non-null  object 
dtypes: float64(1), object(5)
memory usage: 540.9+ KB


### Main Book Recommender Function

#### Cleaning User Input
1. The code defines a function called get_recommendations that takes a book title, similarity scores, and book data as input.
2. The function prepares the user's input by cleaning it up (lowercasing, removing extra spaces).

#### Calculate Similarity Score
3. It compares the user's title with all the book titles and calculates a similarity score for each book.
4. It does the same for the book descriptions, comparing the user's input with all the descriptions.
5. The code combines the similarity scores from the title and description to get an overall similarity score for each book.

#### Sort Top Scores
6. It sorts the books based on the combined similarity scores, so the most similar books are at the top.
7. The function selects the top recommended books based on the number of recommendations specified.
8. It retrieves additional information about the recommended books, such as their description, categories, authors, review text, and review score.

#### Book Recommendation Output
9. The recommended books, along with the selected information, are returned from the function.
10. Finally, the code calls the get_recommendations function with appropriate input and stores the result in a variable called book_recommendations.
11. The book_recommendations variable represents the recommended books based on the user's input, and it can be used further in the program.

In [19]:
def get_recommendations(title,  df, num_recommendations=10000):
    title_lower = title.lower().strip() # Remove leading and trailing whitespaces from the user input
    user_input = title_lower

    # Preprocess the user input for title
    title_vectorizer = CountVectorizer(stop_words='english')
    user_title_matrix = title_vectorizer.fit_transform([user_input])

    # Preprocess all book titles
    title_term_matrix = title_vectorizer.transform(df['Title'])

    # Calculate the cosine similarity between the user input and all book titles
    title_sim_scores = cosine_similarity(user_title_matrix, title_term_matrix)

    # Preprocess the user input for description
    description_vectorizer = CountVectorizer(stop_words='english')
    user_description_matrix = description_vectorizer.fit_transform([user_input])

    # Preprocess all book descriptions
    description_term_matrix = description_vectorizer.transform(df['Description'])

    # Calculate the cosine similarity between the user input and all book descriptions
    description_sim_scores = cosine_similarity(user_description_matrix, description_term_matrix)

    # Combine the cosine similarity scores for title and description
    combined_sim_scores = title_sim_scores + description_sim_scores

    # Flatten the combined similarity scores
    combined_sim_scores = combined_sim_scores.flatten()

    # Sort the books based on the combined similarity scores
    sim_indices = combined_sim_scores.argsort()[::-1]

    # Get the indices of the top similar books
    top_indices = sim_indices[:num_recommendations]
    
    # Retrieve the recommendations from the original DataFrame without modifying it
    recommendations = df.iloc[top_indices].copy()

    # Select specific columns for the recommendations
    selected_columns = ['Title', 'Description', 'Categories', 'Authors', 'review_text', 'review_score']
    recommendations = recommendations[selected_columns]

    return recommendations

book_recommendations = get_recommendations(user_input, book_data_df)
book_recommendations

Unnamed: 0,Title,Description,Categories,Authors,review_text,review_score
10372,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",After being out of print it is pure joy to hav...,5.0
10365,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",Rarely have I met a book that invites me to jo...,5.0
10360,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",A Jewish Proverb states A wise teacher makes l...,5.0
10361,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",I was hoping for a book that would zing me wit...,3.0
10362,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",Fantastic beautiful inspiring book for art and...,5.0
...,...,...,...,...,...,...
6065,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I purchased this tome as a refresher volume I ...,5.0
6064,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I recently read this wonderfully written text ...,5.0
6063,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I teach human anatomy and phyiology at Merritt...,5.0
6062,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I used this book as a student and refer to it ...,5.0


### Calculating Review Sentiment Score
This score is calculated for each individual book review from -1 to 1. The more negative a number is, the more negative review was.

In [15]:
# Initialize the VADER sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Perform sentiment analysis on each review
sentiment_scores = []
for review in book_recommendations['review_text']:
    sentiment_scores.append(sia.polarity_scores(review)['compound'])

# Add sentiment score column to the DataFrame
book_recommendations['review_sentiment_score'] = sentiment_scores

# Add sentiment column to the DataFrame
book_recommendations

Unnamed: 0,Title,Description,Categories,Authors,review_text,review_score,review_sentiment_score
10372,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",After being out of print it is pure joy to hav...,5.0,0.9831
10365,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",Rarely have I met a book that invites me to jo...,5.0,0.9892
10360,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",A Jewish Proverb states A wise teacher makes l...,5.0,0.8381
10361,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",I was hoping for a book that would zing me wit...,3.0,0.9274
10362,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",Fantastic beautiful inspiring book for art and...,5.0,0.8807
...,...,...,...,...,...,...,...
6065,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I purchased this tome as a refresher volume I ...,5.0,0.9730
6064,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I recently read this wonderfully written text ...,5.0,0.8564
6063,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I teach human anatomy and phyiology at Merritt...,5.0,0.9670
6062,Human Anatomy Physiology th Edition,The study of anatomy has long been essential t...,Art,Valerie L. Winslow,I used this book as a student and refer to it ...,5.0,0.9709


### Joining Dataframes and Cleaning 
The final list of book recommendations was created by joining and grouping different dataframes together. We clean the data for the last time. 

In [16]:
book_groupby = book_recommendations.groupby('Title').mean()[['review_sentiment_score', 'review_score']]
book_groupby = book_groupby.rename(columns={'review_sentiment_score': 'average_review_sentiment_score', 'review_score': 'avg_review_score' })
book_recommendations = book_recommendations.merge(book_groupby, on='Title', how='left')
book_recommendations = book_recommendations.drop_duplicates(subset='Title')
book_recommendations = book_recommendations.drop(columns=['review_text', 'review_score', 'review_sentiment_score'])
book_recommendations['average_review_sentiment_score'] = (book_recommendations['average_review_sentiment_score']* 100).astype(int)
book_recommendations['avg_review_score'] = (book_recommendations['avg_review_score']).astype(float).round(2)
book_recommendations

Unnamed: 0,Title,Description,Categories,Authors,average_review_sentiment_score,avg_review_score
0,Learning by Heart Teachings To Free The Creati...,Tap into your natural ability to create Engag...,Art,"Corita Kent, Jan Steward",90,4.79
14,Lets Draw Manga All About Fighting,LETS DRAW MANGAALL ABOUT FIGHTING Urban street...,Art,"Makoto Nakajima, Big Mouth Factory",30,4.43
15,Lets Draw Manga,LETS DRAW MANGAALL ABOUT FIGHTING Urban street...,Art,"Makoto Nakajima, Big Mouth Factory",79,4.11
30,Draw Real People Discover Drawing,Having trouble drawing a nose that looks like ...,Art,Lee Hammond,80,4.60
35,How to Draw Spiderman Walter Foster How to Dra...,With his spider powers and highflying acrobati...,Art,Walter Foster Creative Team,84,4.50
...,...,...,...,...,...,...
9984,Woodtli,Martin Woodtlis perfectionist generous approac...,Art,Martin Woodtli,15,5.00
9985,LAST OF THE BOHEMIANS,First complete study of these two major Scotti...,Art,"ANTIQUE COLLECTO, Roger Bristow",57,5.00
9986,The Eastern Gate An Invitation to the Arts of ...,An illustrated introduction to the sculpture p...,Art,Janet Gaylord Moore,91,5.00
9987,Fashion Surrealism,Drawing on interviews with designers and fashi...,Art,Fred Davis,70,5.00


### Final Output

In [17]:
book_recommendations = book_recommendations.head(5)

for index, row in book_recommendations.iterrows():
    title = row['Title']
    authors = row['Authors']
    description = row['Description']
    review_score = row['avg_review_score']
    categories = row['Categories']
    review_sentiment_score = row['average_review_sentiment_score'] 
    display(Markdown(f"**Book**: {title}"))
    display(Markdown(f"**Authors**: {authors}"))
    display(Markdown(f"**Categories**: {categories}"))
    display(Markdown(f"**Average Review Score**: {review_score} "))
    display(Markdown(f"**Review Sentiment Score**: {review_sentiment_score} "))
    print(f"{description}")
    display(Markdown("---"))


**Book**: Learning by Heart Teachings To Free The Creative Spirit

**Authors**: Corita Kent, Jan Steward

**Categories**: Art

**Average Review Score**: 4.79 

**Review Sentiment Score**: 90 

Tap into your natural ability to create  Engaging proven exercises for developing creativity  Priceless resource for teachers artists actors everyone Artist and educator Corita Kent inspired generations of artists and the truth of her words We can all talk we can all write and if the blocks are removed we can all draw and paint and make things still shines through This revised edition of her classic work Learning by Heart features a new foreword and a chart of curriculum standards Kents original projects and exercises developed through more than  years as an art teacher and richly illustrated with  thoughtprovoking images are as inspiring and as freeing today as they were during her lifetime Learn how to challenge fears be open to new directions recognize connections between objects and ideas and much more in this remarkable indispensable guide to freeing the creative spirit within all of us With new material by art world heavyweights Susan Friel and Barbara Loste Learning by Heart bri

---

**Book**: Lets Draw Manga All About Fighting

**Authors**: Makoto Nakajima, Big Mouth Factory

**Categories**: Art

**Average Review Score**: 4.43 

**Review Sentiment Score**: 30 

LETS DRAW MANGAALL ABOUT FIGHTING Urban street rumbles battling maids destructive fantasy creatures and futuristic battle sequencesits all covered here Everything from learning how to draw gun combat down to the most pedestrian and domestic husband and wife disputes this unique instructional guide presents to readers stepbystep drawing techniques and the hows and whys of effective manga storytelling from the perspective of a manga artist This book is sure to pack a big punch for those wanting to learn how to draw dynamic battle royals with the unique manga flavor


---

**Book**: Lets Draw Manga

**Authors**: Makoto Nakajima, Big Mouth Factory

**Categories**: Art

**Average Review Score**: 4.11 

**Review Sentiment Score**: 79 

LETS DRAW MANGAALL ABOUT FIGHTING Urban street rumbles battling maids destructive fantasy creatures and futuristic battle sequencesits all covered here Everything from learning how to draw gun combat down to the most pedestrian and domestic husband and wife disputes this unique instructional guide presents to readers stepbystep drawing techniques and the hows and whys of effective manga storytelling from the perspective of a manga artist This book is sure to pack a big punch for those wanting to learn how to draw dynamic battle royals with the unique manga flavor


---

**Book**: Draw Real People Discover Drawing

**Authors**: Lee Hammond

**Categories**: Art

**Average Review Score**: 4.6 

**Review Sentiment Score**: 80 

Having trouble drawing a nose that looks like a nose In this stepbystep guide Lee Hammond will teach you how to draw realisticlooking portraits of your favorite peoplemore easily than you ever thought possible Really The secret is in the blending With pencil and paper Lee shows you how to create gradual smooth shadings of light and dark to replicate the subtle contours of skinand how to use these simple shading techniques to make any shape look threedimensional After youve got the basics down youll see how to draw every part of the face Its made easy by looking at each feature as simple interlocking shapes then adding the right highlights and shadows In no time youll be drawing realistic noses mouths eyes earseven facial expressions Then youll see how to put all those features together to create an expressive portrait that actually looks like your subject Stepbystep demonstrations guide you all the way


---

**Book**: How to Draw Spiderman Walter Foster How to Draw Series

**Authors**: Walter Foster Creative Team

**Categories**: Art

**Average Review Score**: 4.5 

**Review Sentiment Score**: 84 

With his spider powers and highflying acrobatics SpiderMan is one of the most famous and lasting Super Heroes of all time In Learn to Draw Marvels SpiderMan Marvel artist Cory Hamscher provides stepbystep instructions for drawing SpiderMan characters and villains including series favorites Carnage Venom and Doctor Octopus Learn to Draw Marvels SpiderMan helps both beginning and advanced artists develop stepbystep renderings from beginning sketches to finished product With bonus character descriptions and artist tips it teaches professional technique and gives some of the rich backstory behind every character This truly is an amazing SpiderMan experience


---