## Loading the Book Dataset

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np
import ast

In [20]:
# Set the maximum column width to 100 characters
pd.set_option('display.max_colwidth', 500)

In [21]:
books_df = pd.read_csv('books.csv', index_col=[0])

In [22]:
books_df.head(10)

Unnamed: 0,book_name,summaries,categories
0,The Highly Sensitive Person,"is a self-assessment guide and how-to-live template for people who feel, relate, process, and notice more deeply than others, and who frequently suffer from overstimulation as a result.",science
1,Why Has Nobody Told Me This Before?,"is a collection of a clinical psychologist’s best practical advice to combat anxiety and depression and improve our mental health in small increments, collected from over a decade of 1-on-1 work with patients.",science
2,The Midnight Library,"tells the story of Nora, a depressed woman in her 30s, who, on the day she decides to die, finds herself in a library full of lives she could have lived, where she discovers there’s a lot more to life, even her current one, than she had ever imagined.",science
3,Brave New World,"presents a futuristic society engineered perfectly around capitalism and scientific efficiency, in which everyone is happy, conform, and content — but only at first glance.",science
4,1984,is the story of a man questioning the system that keeps his futuristic but dystopian society afloat and the chaos that quickly ensues once he gives in to his natural curiosity and desire to be free.,science
5,Stolen Focus,"explains why our attention spans have been dwindling for decades, how technology accelerates this worrying trend, and what we can do to reclaim our focus and thus our capacity to live meaningful lives.",science
6,The Life-Changing Science of Detecting Bullshit,teaches its readers how to avoid falling for the lies and false information that other people spread by helping them build essential thinking skills through examples from the real world.,science
7,Dopamine Nation,"talks about the importance of living a balanced life in relation to all the pleasure and stimuli we’re surrounded with on a daily basis, such as drugs, devices, porn, gambling facilities, showing us how to avoid becoming dopamine addicts by restricting our access to them.",science
8,The Art of Statistics,"is a non-technical book that shows how statistics is helping humans everywhere get a new hold of data, interpret numbers, fact-check information, and reveal valuable insights, all while keeping the world as we know it afloat.",science
9,No Self No Problem,"is a provocative read about the implications of Buddhism in neuroscience, and more specifically about the idea that the self is only a product of the mind, meaning that there is no “I”.",science


In [23]:
books_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1227 entries, 0 to 1226
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   book_name   1227 non-null   object
 1   summaries   1227 non-null   object
 2   categories  1227 non-null   object
dtypes: object(3)
memory usage: 38.3+ KB


In [24]:
books_df.shape

(1227, 3)

In [25]:
books_df.categories.value_counts()

Unnamed: 0_level_0,count
categories,Unnamed: 1_level_1
happiness,218
relationships,204
science,199
productivity,168
politics,80
biography,69
money,62
psychology,43
economics,39
marketing,37


## Vectorizing the Summaries

In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [97]:
# Create tf-idf vectors using the overview column
tfidf_vectorizer = TfidfVectorizer(stop_words='english',
                                   min_df = 5,
                                   max_df = 0.6)

In [98]:
tfidf_matrix = tfidf_vectorizer.fit_transform(books_df['summaries'])

In [99]:
tfidf_matrix.shape

(1227, 900)

In [100]:
tfidf_df = pd.DataFrame(tfidf_matrix.toarray())

In [101]:
vocabs = list(tfidf_vectorizer.vocabulary_.keys())

In [102]:
tfidf_df.columns = vocabs

In [103]:
from random import sample

sample_vocabs = sample(vocabs,10)

In [104]:
tfidf_df.sample(10)[sample_vocabs]

Unnamed: 0,present,ancient,sports,experiences,important,knowledge,shocking,balance,online,doesn
1009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
798,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.300025
772,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1113,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
535,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
968,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
179,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.356123
165,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Computing Pairwise Distances

In [105]:
from sklearn.metrics import pairwise_distances
from scipy.spatial.distance import cosine, correlation

In [106]:
books_sim = 1 - pairwise_distances( tfidf_matrix, metric="cosine" )

In [107]:
books_sim_df = pd.DataFrame( books_sim )

In [108]:
books_sim_df.head( 10 )

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1217,1218,1219,1220,1221,1222,1223,1224,1225,1226
0,1.0,0.0,0.0,0.0,0.0,0.077582,0.037083,0.0,0.0,0.07213,...,0.0,0.0,0.043648,0.0,0.0,0.0,0.0,0.0,0.105632,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.081296,0.0
2,0.0,0.0,1.0,0.0,0.076252,0.078215,0.0,0.023923,0.0,0.0,...,0.0,0.0,0.0,0.07951,0.0,0.0,0.022141,0.16425,0.034685,0.0
3,0.0,0.0,0.0,1.0,0.126309,0.0,0.0,0.0,0.0,0.0,...,0.097214,0.0,0.0,0.0,0.0,0.0,0.226319,0.0,0.0,0.0
4,0.0,0.0,0.076252,0.126309,1.0,0.0,0.0,0.0,0.0,0.0,...,0.140918,0.0,0.0,0.0,0.126487,0.0,0.084076,0.059337,0.0,0.0
5,0.077582,0.0,0.078215,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.123349,0.0,0.195044,0.0,0.0,0.0,0.0,0.100842,0.0
6,0.037083,0.0,0.0,0.0,0.0,0.0,1.0,0.078384,0.157998,0.0,...,0.078421,0.104375,0.079798,0.086492,0.0,0.0,0.169283,0.044592,0.0,0.111226
7,0.0,0.0,0.023923,0.0,0.0,0.0,0.078384,1.0,0.0,0.0,...,0.0,0.0,0.0,0.271572,0.0,0.0,0.020419,0.018616,0.031989,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.157998,0.0,1.0,0.0,...,0.083361,0.110949,0.030791,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.07213,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Finding TopN Similar Books

In [109]:
def get_similar_books( title, topN = 5 ):
    book_idx = books_df[books_df['book_name'] == title].index[0]
    books_df['similarity'] = books_sim_df.iloc[book_idx]
    top_n = books_df.sort_values( ["similarity"], ascending = False )[0:topN+1]
    #print( "Similar Movies to: ", )
    return top_n

### Finding Similar Books

 - The Bitcoin Standard
 - Measure What Matters
 - The Happiness Hypothesis

In [110]:
books_df[books_df.book_name.str.contains("Mindfulness")]

Unnamed: 0,book_name,summaries,categories,similarity
731,The Miracle of Mindfulness,teaches the ancient Buddhist practice of mindfulness and how living in the present will make you happier.,happiness,0.0


In [111]:
get_similar_books( 'The Miracle of Mindfulness' )

Unnamed: 0,book_name,summaries,categories,similarity
731,The Miracle of Mindfulness,teaches the ancient Buddhist practice of mindfulness and how living in the present will make you happier.,happiness,1.0
129,Aware,"is a comprehensive overview of the far-reaching benefits of meditation, rooted in both science and practice, enriched with actionable advice on how to practice mindfulness.",science,0.349732
796,10% Happier,"gives skeptics an easy “in” to meditation, by taking a very non-fluffy approach to the science behind this mindfulness practice and showing you how and why letting go of your ego is important for living a stress-free life.",happiness,0.281242
641,Journey of Awakening,"explains the basics of meditation using ideas from multiple spiritual sources, including how to avoid the mental traps that make it difficult so you can practice frequently and make mindfulness, and the many benefits that come with it, part of your daily life.",happiness,0.273187
797,The Power of Now,"shows you that every minute you spend worrying about the future or regretting the past is a minute lost, because the only place you can truly live in is the present, the now, which is why the book offers actionable strategies to start living every minute as it occurs and becoming 100% present in and for your life.",happiness,0.268207
546,How Successful People Think,"lays out eleven specific ways of thinking you can practice to live a better, happier, more successful life.",relationships,0.256036


In [112]:
get_similar_books( 'The Bitcoin Standard' )

Unnamed: 0,book_name,summaries,categories,similarity
366,The Bitcoin Standard,uses the history of money and gold to explain why Bitcoin is the way to go if the world wants to stick to having sound money and why it’s the only cryptocurrency to be focusing on right now.,economics,1.0
370,The Age Of Cryptocurrency,"explains the past, present, and future of Bitcoin, including its benefits and drawbacks, how it aligns with the definition of money well enough to be its own currency, how it and other cryptocurrencies will change our economy and the entire world.",economics,0.283172
829,Blockchain Revolution,"explains how the power of this new technology behind Bitcoin can transform our world financially by improving the way we store our money and do business to make it more fair, transparent, equal, and free from corruption.",money,0.278353
851,You Are A Badass At Making Money,will help you stop making excuses and get over your bad relationship with money to become a money-making machine.,money,0.276384
859,The Power Of Broke,"shows you how to leverage having no money into an advantage in business by compensating it with creativity, passion and authenticity.",money,0.271401
777,Daring Greatly,"is a book about having the courage to be vulnerable in a world where everyone wants to appear strong, confident and like they know what they’re doing.",happiness,0.255553


## Let's sample some books

In [94]:
books_df.sample(10, random_state = 100)

Unnamed: 0,book_name,summaries,categories,similarity
531,No-Drama Discipline,is a refreshing approach to parenting that looks at the neuroscience of a developing child’s brain to understand how to best discipline and teach kids while making them feel loved.,relationships,0.0
715,Q,uit Like A Millionaire,happiness,0.0
530,Executive Presence,"is an actionable guide to the essential components of a strong leader’s charisma, including and teaching you elements like gravitas, communication, appearance, and others.",relationships,0.0
629,The Comfort Book,"explores how depression feels like and its effects on our mind and body, and how we can overcome it by taking small, but significant steps in that direction, starting with finding hope, being more present at the moment, and acknowledging that we’re enough.",happiness,0.0
520,The Secret Life of Pronouns,"is a collection of research and case studies explaining what our use of pronouns, articles, and other style words can reveal about ourselves.",relationships,0.0
830,Get A Financial Life,shows those that are new to managing money how to do it confidently by explaining everything from debt and savings to insurance and investing.,money,0.272066
560,Unlimited Power,"is a self-help classic, which breaks down how Tony Robbins has helped top performers achieve at their highest level and how you can use the same mental and physical tactics to accomplish your biggest goals in life.",relationships,0.0
634,The Burnout Fix,"delivers practical advice on how to thrive in the dynamic working environment we revolve around every day by setting healthy boundaries, keeping a work-life balance, and prioritizing our well-being.",happiness,0.0
482,Battle Hymn Of The Tiger Mother,opens your eyes to the potential benefits of tough love by sharing the traditionally Chinese parenting style and experiences of Amy Chua.,relationships,0.0
985,Mind Gym,"explains why the performance of world-class athletes isn’t only a result of their physical training, but just as much due to their mentally fit minds and shows you how you can cultivate the mindset of a top performer yourself.",productivity,0.036022


In [95]:
get_similar_books( 'Mind Gym' )

Unnamed: 0,book_name,summaries,categories,similarity
985,Mind Gym,"explains why the performance of world-class athletes isn’t only a result of their physical training, but just as much due to their mentally fit minds and shows you how you can cultivate the mindset of a top performer yourself.",productivity,1.0
154,Peak,"accumulates everything the pioneer researcher on deliberate practice has learned about expert performance through decades of exploration and analysis of what separates those, who are average, from those, who are world-class at what they do.",science,0.371478
83,Boost!,is a guide for becoming more productive at work by using the preparation and performance techniques that world-class athletes use to win gold medals.,science,0.280183
1027,Rework,"shows you that you need less than you think to start a business – way less – by explaining why plans are actually harmful, how productivity isn’t a result from working long hours and why hiring and seeking investors should be your absolute last resort.",productivity,0.246811
958,The Inner Game Of Tennis,"is about the mental state required to deliver peak performance and how you can cultivate that state in sports, work, and life.",productivity,0.239288
600,The Speed Of Trust,"not only explains the economics of trust, but also shows you how to cultivate great trust in yourself, your relationships, and the three kinds of stakeholders you’ll deal with when you’re running a company.",relationships,0.23318


In [96]:
get_similar_books( 'Get A Financial Life' )

Unnamed: 0,book_name,summaries,categories,similarity
830,Get A Financial Life,shows those that are new to managing money how to do it confidently by explaining everything from debt and savings to insurance and investing.,money,1.0
871,The Little Book of Common Sense Investing,"shows you an alternative to actively, poorly managed, overpaid funds by introducing you to low-cost, passive index funds as a sustainable investing strategy, which gets you the retirement savings you need without the usual hassle of stock investing.",money,0.496979
604,The Financial Diet,"is a compendium of clever money tips for beginners, offering thrifty spending advice and sound money strategies in a wide range of areas, such as budgeting, investing, work, food, home, and even love.",happiness,0.402943
317,Chasing The Scream,"is a scathing review of the failed war on drugs, explaining its history with surprising statistics and identifying new ways that we can think about addiction, recovery, and drug laws.",politics,0.349489
865,Rule #1,"hands you the reins of personal investing, even if you’ve never held them before, by using a few simple rules from Warren Buffett’s value investing approach to guide you towards financial independence.",money,0.326968
605,Just Keep Buying,"will help you answer the big questions about saving and investing money with clever stories and interesting data, all while acknowledging that your needs and desires will change throughout life and that, therefore, your financial behavior will have to do the same.",happiness,0.303254
