In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('top2k_book_descriptions.csv')

In [3]:
df.columns

Index(['Unnamed: 0', 'id', 'book_id', 'best_book_id', 'work_id', 'books_count',
       'isbn', 'isbn13', 'authors', 'original_publication_year',
       'original_title', 'title', 'language_code', 'average_rating',
       'ratings_count', 'work_ratings_count', 'work_text_reviews_count',
       'ratings_1', 'ratings_2', 'ratings_3', 'ratings_4', 'ratings_5',
       'image_url', 'small_image_url', 'goodreads_book_id', 'tag_name',
       'description'],
      dtype='object')

In [20]:
df.language_code.value_counts()

language_code
eng      1390
en-US     455
en-GB      46
en-CA      16
spa         4
fre         4
nl          1
ara         1
ger         1
Name: count, dtype: int64

In [24]:
df = df[(df['language_code'] == 'eng') | (df['language_code'] == 'en-US')]

In [69]:
data  = df[['original_title','description','authors','image_url']]

In [26]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1845 entries, 0 to 1999
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   original_title  1808 non-null   object
 1   description     1834 non-null   object
 2   authors         1845 non-null   object
dtypes: object(3)
memory usage: 57.7+ KB


In [27]:
data = data.dropna(subset=['description']).reset_index()

In [71]:
data.iloc[0].image_url

'https://images.gr-assets.com/books/1447303603m/2767052.jpg'

## pre-process descriptions
- lowercasing
- remove punctuation
- remove special characters
- handle contractions
- remove stop words

In [39]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

def preprocess_text(text):
    # Lowercasing
    text = text.lower()
    # Removing punctuation
    text = text.translate(str.maketrans(' ', ' ', string.punctuation))
    # Tokenization
    tokens = word_tokenize(text)
    # Removing stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    tokens = [token for token in tokens if token != 'isbn']
    tokens = [token for token in tokens if token != 'isbn13']
    tokens = [token for token in tokens if token != 'isbn10']
    # Remove numerical digits
    tokens = [token for token in tokens if not token.isdigit()]
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)


# Preprocess the description
data['processed_text'] = data['description'].apply(preprocess_text)


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/padmaprabagaran/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/padmaprabagaran/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/padmaprabagaran/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [40]:
data

Unnamed: 0,index,original_title,description,authors,processed_text
0,0,The Hunger Games,WINNING MEANS FAME AND FORTUNE.LOSING MEANS CE...,Suzanne Collins,winning mean fame fortunelosing mean certain d...
1,1,Harry Potter and the Philosopher's Stone,Harry Potter's life is miserable. His parents ...,"J.K. Rowling, Mary GrandPré",harry potter life miserable parent dead he stu...
2,2,Twilight,About three things I was absolutely positive.F...,Stephenie Meyer,three thing absolutely positivefirst edward va...
3,3,To Kill a Mockingbird,The unforgettable novel of a childhood in a sl...,Harper Lee,unforgettable novel childhood sleepy southern ...
4,4,The Great Gatsby,Alternate Cover Edition ISBN: 0743273567 (ISBN...,F. Scott Fitzgerald,alternate cover edition 9780743273565the great...
...,...,...,...,...,...
1829,1995,Simon vs. the Homo Sapiens Agenda,Sixteen-year-old and not-so-openly gay Simon S...,Becky Albertalli,sixteenyearold notsoopenly gay simon spier pre...
1830,1996,Perfection,Zoe is used to taking care of herself and has ...,R.L. Mathewson,zoe used taking care long ago accepted anythin...
1831,1997,Come Away with Me,An alternate cover edition can be found here.B...,Kristen Proby,alternate cover edition found herebeing confro...
1832,1998,"Ms. Marvel, Vol. 1: No Normal","Marvel Comics presents the new Ms. Marvel, the...","G. Willow Wilson, Adrian Alphona",marvel comic present new m marvel groundbreaki...


## Calculate similarity of books based on its description

In [46]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

doc1 = data.iloc[1833]
doc2 = data.iloc[4]
# corpus = [doc1.description,doc2.description]
corpus = [doc1.processed_text,doc2.processed_text]

vectorizer = TfidfVectorizer()

trsfm=vectorizer.fit_transform(corpus)
# print(trsfm.toarray())
pd.DataFrame(trsfm.toarray(),columns=vectorizer.get_feature_names_out(),index=['Document 1','Document 2'])

Unnamed: 0,1920sthe,9780743273565the,acclaimed,achievement,age,ally,alternate,america,amongst,army,...,trust,twentiethcentury,unexpected,upon,volume,war,wealthy,wellas,world,york
Document 1,0.0,0.0,0.0,0.0,0.0,0.103345,0.0,0.0,0.103345,0.103345,...,0.103345,0.0,0.103345,0.103345,0.103345,0.20669,0.0,0.103345,0.103345,0.147062
Document 2,0.116981,0.116981,0.116981,0.116981,0.116981,0.0,0.116981,0.116981,0.0,0.0,...,0.0,0.116981,0.0,0.0,0.0,0.0,0.116981,0.0,0.0,0.083233


In [45]:
cosine_similarity(trsfm[0:1], trsfm)

array([[1.        , 0.03596528]])

In [47]:
cosine_similarity(trsfm[0:1], trsfm)

array([[1.        , 0.09180299]])

## create a similarity matrix with the data

In [63]:
vectorizer = TfidfVectorizer()

# Fit and transform the corpus (processed_text column)
tfidf_matrix = vectorizer.fit_transform(data['processed_text'])

# Convert the TF-IDF matrix to DataFrame
tfidf_df = pd.DataFrame(tfidf_matrix.toarray(), columns=vectorizer.get_feature_names_out(), index=data.index)


In [64]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to get recommendations based on cosine similarity
def get_recommendations(book_index, cosine_sim=cosine_sim, df=df, top_n=5):
    # Get the index of the book
    idx = book_index if isinstance(book_index, int) else df[df['original_title'] == book_index].index[0]
    
    # Get pairwise similarity scores with other books
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    # Sort books based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top N similar books (excluding itself)
    sim_scores = sim_scores[1:top_n+1]
    
    # Get book indices
    book_indices = [i[0] for i in sim_scores]
    
    # Return recommended books
    return df.iloc[book_indices]

# Example: Get recommendations for a book by index or title
book_index_or_title = 'The Hunger Games'  # Change this to the book index or title you want recommendations for
recommendations = get_recommendations(book_index_or_title)
print(recommendations[['original_title', 'authors']])


           original_title          authors
19             Mockingjay  Suzanne Collins
16          Catching Fire  Suzanne Collins
1339  Horton Hears a Who!        Dr. Seuss
505               The One       Kiera Cass
715            The Choice  Nicholas Sparks


In [73]:
data['original_title'].sample(20)

766                                            Seabiscuit
118                                   The Handmaid's Tale
1348                                             One Shot
976                                       Shadow of Night
732                                Snow Falling on Cedars
1381    Sex, Drugs, and Cocoa Puffs: A Low Culture Man...
60                                  The Girl on the Train
935                                   Wolves of the Calla
1921                                         Bloody Bones
633                                  I've Got Your Number
1048                                     Handle with Care
1259                                         To the Nines
159                                    Great Expectations
469     I Am Malala: The Girl Who Stood Up for Educati...
1122                                        Summer Knight
377                                          The Namesake
1802                                             Faefever
1498          

In [68]:
recommendations = get_recommendations('The Bourne Supremacy')
print(recommendations[['original_title', 'authors']])

                original_title                      authors
1106                  Mr Maybe                   Jane Green
416        The Thirteenth Tale            Diane Setterfield
881               Storm Island                  Ken Follett
1745  A Bear Called Paddington  Michael Bond, Peggy Fortnum
968                 Steelheart            Brandon Sanderson


In [7]:
df = pd.read_csv('data/scraped_data.csv')
print(len(df))
df2 = pd.read_csv('data/scraped_data2.csv')
print(len(df2))


9900
4900


In [9]:
df3 = pd.concat([df,df2])
df3 = df3['URL']
df4 = df3.drop_duplicates()

In [13]:
print(df4)

0       https://www.goodreads.com/book/show/2657.To_Ki...
1       https://www.goodreads.com/book/show/72193.Harr...
2       https://www.goodreads.com/book/show/1885.Pride...
3       https://www.goodreads.com/book/show/48855.The_...
4       https://www.goodreads.com/book/show/170448.Ani...
                              ...                        
4894    https://www.goodreads.com/book/show/18158535-r...
4895    https://www.goodreads.com/book/show/21569527-t...
4897    https://www.goodreads.com/book/show/26085520-e...
4898    https://www.goodreads.com/book/show/56732449-t...
4899    https://www.goodreads.com/book/show/5598960-se...
Name: URL, Length: 12775, dtype: object


In [15]:
df4.to_csv('data/urls.csv',index=False)

In [16]:
df4

0       https://www.goodreads.com/book/show/2657.To_Ki...
1       https://www.goodreads.com/book/show/72193.Harr...
2       https://www.goodreads.com/book/show/1885.Pride...
3       https://www.goodreads.com/book/show/48855.The_...
4       https://www.goodreads.com/book/show/170448.Ani...
                              ...                        
4894    https://www.goodreads.com/book/show/18158535-r...
4895    https://www.goodreads.com/book/show/21569527-t...
4897    https://www.goodreads.com/book/show/26085520-e...
4898    https://www.goodreads.com/book/show/56732449-t...
4899    https://www.goodreads.com/book/show/5598960-se...
Name: URL, Length: 12775, dtype: object

In [32]:
# df = pd.read_csv('data/urls.csv')
df2 = pd.read_csv('data/scraped_data6.csv')
df2

Unnamed: 0.1,Unnamed: 0,URL
0,0,https://www.goodreads.com/book/show/11505797-b...
1,1,https://www.goodreads.com/book/show/10818853-f...
2,2,https://www.goodreads.com/book/show/62967897-h...
3,3,https://www.goodreads.com/book/show/11870085-t...
4,4,https://www.goodreads.com/book/show/13335037-d...
...,...,...
9895,9895,https://www.goodreads.com/book/show/13074628-p...
9896,9896,https://www.goodreads.com/book/show/833923.Eng...
9897,9897,https://www.goodreads.com/book/show/22515727-t...
9898,9898,https://www.goodreads.com/book/show/26312131-f...


In [33]:
df3 = pd.concat([df4,df2['URL']])
# df3 = df3['URL']
df4 = df3.drop_duplicates()
df4

0       https://www.goodreads.com/book/show/2657.To_Ki...
1       https://www.goodreads.com/book/show/72193.Harr...
2       https://www.goodreads.com/book/show/1885.Pride...
3       https://www.goodreads.com/book/show/48855.The_...
4       https://www.goodreads.com/book/show/170448.Ani...
                              ...                        
9895    https://www.goodreads.com/book/show/13074628-p...
9896    https://www.goodreads.com/book/show/833923.Eng...
9897    https://www.goodreads.com/book/show/22515727-t...
9898    https://www.goodreads.com/book/show/26312131-f...
9899    https://www.goodreads.com/book/show/248584.Jun...
Name: URL, Length: 29133, dtype: object

In [35]:
df4.to_csv('data/book_urls.csv',index=False)

In [36]:
pd.read_csv('data/book_urls.csv')

Unnamed: 0,URL
0,https://www.goodreads.com/book/show/2657.To_Ki...
1,https://www.goodreads.com/book/show/72193.Harr...
2,https://www.goodreads.com/book/show/1885.Pride...
3,https://www.goodreads.com/book/show/48855.The_...
4,https://www.goodreads.com/book/show/170448.Ani...
...,...
29128,https://www.goodreads.com/book/show/13074628-p...
29129,https://www.goodreads.com/book/show/833923.Eng...
29130,https://www.goodreads.com/book/show/22515727-t...
29131,https://www.goodreads.com/book/show/26312131-f...


In [3]:
import csv

In [2]:
df = pd.read_csv('data/data.csv', on_bad_lines='warn')
df2 = pd.read_csv('data/data2.csv',on_bad_lines='warn')
df3 = pd.read_csv('data/data3.csv',on_bad_lines='warn')

Skipping line 4929: expected 8 fields, saw 25
Skipping line 5581: expected 8 fields, saw 11



In [15]:
df.drop_duplicates()

Unnamed: 0,image_url,book_title,book_authors,book_rating,book_rating_count,book_review_count,book_desc,genres
0,https://images-na.ssl-images-amazon.com/images...,To Kill a Mockingbird,Harper Lee,4.26,5967122,114803,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ..."
1,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Philosopher’s Stone,J.K. Rowling,4.47,9774007,157105,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',..."
2,https://images-na.ssl-images-amazon.com/images...,Pride and Prejudice,Jane Austen,4.28,4156818,109653,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical..."
3,https://images-na.ssl-images-amazon.com/images...,The Diary of a Young Girl,Anne Frank,4.19,3643442,40341,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'Biography', 'Memoi..."
4,https://images-na.ssl-images-amazon.com/images...,Animal Farm,George Orwell,3.98,3776903,92003,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',..."
...,...,...,...,...,...,...,...,...
5579,https://images-na.ssl-images-amazon.com/images...,Kabul Beauty School: An American Woman Goes Be...,Deborah Rodriguez,3.66,25063,2943,"Soon after the fall of the Taliban, in 2001, D...","['Nonfiction', 'Memoir', 'Biography', 'Womens'..."
5580,https://images-na.ssl-images-amazon.com/images...,How to Be Good,Nick Hornby,3.22,56032,3212,"Die Ärztin Katie hat alles, was sie sich wünsc...","['Fiction', 'Contemporary', 'Humor', 'Novels',..."
5581,https://images-na.ssl-images-amazon.com/images...,Dreamland,Sarah Dessen,3.90,81362,4653,There is an alternate cover edition for this I...,"['Young Adult', 'Romance', 'Contemporary', 'Fi..."
5582,https://images-na.ssl-images-amazon.com/images...,So B. It,Sarah Weeks,4.11,34687,3078,Now a major motion picture starring Alfre Wood...,"['Young Adult', 'Realistic Fiction', 'Fiction'..."


In [9]:
df2

Unnamed: 0,image_url,book_title,book_authors,book_rating,book_rating_count,book_review_count,book_desc,genres
0,https://images-na.ssl-images-amazon.com/images...,To Kill a Mockingbird,Harper Lee,4.26,5967122,114803,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ..."
1,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Philosopher’s Stone,J.K. Rowling,4.47,9774007,157105,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',..."
2,https://images-na.ssl-images-amazon.com/images...,Pride and Prejudice,Jane Austen,4.28,4156818,109653,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical..."
3,https://images-na.ssl-images-amazon.com/images...,The Diary of a Young Girl,Anne Frank,4.19,3643442,40341,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'Biography', 'Memoi..."
4,https://images-na.ssl-images-amazon.com/images...,Animal Farm,George Orwell,3.98,3776744,92000,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',..."
...,...,...,...,...,...,...,...,...
5463,https://images-na.ssl-images-amazon.com/images...,The Witch's Dream,Victoria Danann,4.16,3804,308,Winner BEST PARANORMAL ROMANCE Series four yea...,"['Paranormal', 'Paranormal Romance', 'Romance'..."
5464,https://images-na.ssl-images-amazon.com/images...,Confessions of a Tween Superheroine,J.M. Guy,4.15,166,6,"I’M JAZMIN SKYLYN GILLETTE, AND I HAVE A CONFE...","['Young Adult', 'Fiction', 'Contemporary', 'Ne..."
5465,https://images-na.ssl-images-amazon.com/images...,The Book of Ivy,Amy Engel,4.14,23961,3120,"After a brutal nuclear war, the United States ...","['Dystopia', 'Young Adult', 'Romance', 'Fantas..."
5466,https://images-na.ssl-images-amazon.com/images...,A Beautiful Dark,Jocelyn Davies,3.89,17864,1327,"On the night of Skye’s seventeenth birthday, s...","['Angels', 'Young Adult', 'Paranormal', 'Fanta..."


In [6]:
data = pd.concat([df,df2])
data = pd.concat([data,df3])


In [7]:
d = data.drop_duplicates(subset=['book_title', 'book_authors', 'book_rating'],keep='first')

In [8]:
d

Unnamed: 0,image_url,book_title,book_authors,book_rating,book_rating_count,book_review_count,book_desc,genres
0,https://images-na.ssl-images-amazon.com/images...,To Kill a Mockingbird,Harper Lee,4.26,5967122,114803,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ..."
1,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Philosopher’s Stone,J.K. Rowling,4.47,9774007,157105,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',..."
2,https://images-na.ssl-images-amazon.com/images...,Pride and Prejudice,Jane Austen,4.28,4156818,109653,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical..."
3,https://images-na.ssl-images-amazon.com/images...,The Diary of a Young Girl,Anne Frank,4.19,3643442,40341,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'Biography', 'Memoi..."
4,https://images-na.ssl-images-amazon.com/images...,Animal Farm,George Orwell,3.98,3776903,92003,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',..."
...,...,...,...,...,...,...,...,...
16437,https://images-na.ssl-images-amazon.com/images...,Forever & Always,Jasinda Wilder,3.98,9517,951,"Ever,These letters are often all that get me t...","['Romance', 'New Adult', 'Contemporary', 'Cont..."
16438,https://images-na.ssl-images-amazon.com/images...,Stroke of Fear,Alla Kar,3.78,979,48,From the Amazon Best-Selling Author of the For...,"['New Adult', 'Romance', 'Contemporary', 'Youn..."
16439,https://images-na.ssl-images-amazon.com/images...,Unexpected,Lori Foster,3.86,3249,142,Eli Conners expected hired mercenary Ray Verek...,"['Romance', 'Contemporary Romance', 'Contempor..."
16440,https://images-na.ssl-images-amazon.com/images...,Eternity,Sonny Daise,4.25,16,1 review,Scarlett and Dante have had their share of obs...,[]


In [60]:
d[d['book_title'] == 'Throne of Glass']

Unnamed: 0,image_url,book_title,book_authors,book_rating,book_rating_count,book_review_count,book_desc,genres
908,https://images-na.ssl-images-amazon.com/images...,Throne of Glass,Sarah J. Maas,4.19,1284092,86589,Meet Celaena Sardothien.Beautiful. Deadly. Des...,"['Fantasy', 'Young Adult', 'Romance', 'Fiction..."


In [10]:
d = d.dropna()

In [17]:
df = d[d['image_url'].str.startswith('https')]

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26106 entries, 0 to 16441
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   image_url          26106 non-null  object 
 1   book_title         26106 non-null  object 
 2   book_authors       26106 non-null  object 
 3   book_rating        26106 non-null  float64
 4   book_rating_count  7689 non-null   float64
 5   book_review_count  25789 non-null  float64
 6   book_desc          26106 non-null  object 
 7   genres             26106 non-null  object 
dtypes: float64(3), object(5)
memory usage: 1.8+ MB


In [23]:
df['book_review_count'] = pd.to_numeric(df['book_review_count'], errors='coerce')
df['book_rating_count'] = df['book_rating_count'].str.replace(',', '').astype(float)
# df['book_rating_count'] = pd.to_numeric(df['book_rating_count'], errors='coerce')

In [28]:
df.dropna()

Unnamed: 0,image_url,book_title,book_authors,book_rating,book_rating_count,book_review_count,book_desc,genres
0,https://images-na.ssl-images-amazon.com/images...,To Kill a Mockingbird,Harper Lee,4.26,5967122.0,114803.0,The unforgettable novel of a childhood in a sl...,"['Classics', 'Fiction', 'Historical Fiction', ..."
1,https://images-na.ssl-images-amazon.com/images...,Harry Potter and the Philosopher’s Stone,J.K. Rowling,4.47,9774007.0,157105.0,Harry Potter thinks he is an ordinary boy - un...,"['Fantasy', 'Fiction', 'Young Adult', 'Magic',..."
2,https://images-na.ssl-images-amazon.com/images...,Pride and Prejudice,Jane Austen,4.28,4156818.0,109653.0,"Since its immediate success in 1813, Pride and...","['Classics', 'Fiction', 'Romance', 'Historical..."
3,https://images-na.ssl-images-amazon.com/images...,The Diary of a Young Girl,Anne Frank,4.19,3643442.0,40341.0,Discovered in the attic in which she spent the...,"['Classics', 'Nonfiction', 'Biography', 'Memoi..."
4,https://images-na.ssl-images-amazon.com/images...,Animal Farm,George Orwell,3.98,3776903.0,92003.0,Librarian's note: There is an Alternate Cover ...,"['Classics', 'Fiction', 'Dystopia', 'Fantasy',..."
...,...,...,...,...,...,...,...,...
16436,https://images-na.ssl-images-amazon.com/images...,Fire and Rain,Lauren Dane,3.91,1959.0,86.0,"Sequel to ""Ascension.""Rain is a waitress by da...","['Paranormal', 'Romance', 'Shapeshifters', 'Pa..."
16437,https://images-na.ssl-images-amazon.com/images...,Forever & Always,Jasinda Wilder,3.98,9517.0,951.0,"Ever,These letters are often all that get me t...","['Romance', 'New Adult', 'Contemporary', 'Cont..."
16438,https://images-na.ssl-images-amazon.com/images...,Stroke of Fear,Alla Kar,3.78,979.0,48.0,From the Amazon Best-Selling Author of the For...,"['New Adult', 'Romance', 'Contemporary', 'Youn..."
16439,https://images-na.ssl-images-amazon.com/images...,Unexpected,Lori Foster,3.86,3249.0,142.0,Eli Conners expected hired mercenary Ray Verek...,"['Romance', 'Contemporary Romance', 'Contempor..."


In [26]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26106 entries, 0 to 16441
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   image_url          26106 non-null  object 
 1   book_title         26106 non-null  object 
 2   book_authors       26106 non-null  object 
 3   book_rating        26106 non-null  float64
 4   book_rating_count  26106 non-null  float64
 5   book_review_count  25789 non-null  float64
 6   book_desc          26106 non-null  object 
 7   genres             26106 non-null  object 
dtypes: float64(3), object(5)
memory usage: 1.8+ MB


In [27]:
df.to_csv('data/book_data.csv',index=False)