In [2]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [2]:
# Creates a new Chrome webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)

# Trying Selenium

In [3]:
def get_book_info(book_id):
    url = f'https://www.google.com/books/edition/_/{book_id}?hl=en'

    # Pulls data
    driver.get(url)

    # Clicks on the "More" button to expand the subject field
    try:
        more_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, 'span[data-t="kno-fv-exp"]')))
        more_button.click()
    except:
        pass

    # Extracts the book information
    book_info = {'id': book_id}

    # Title
    try:
        subject_element = driver.find_element(By.CSS_SELECTOR, 'div.zNLTKd')
        book_info['title'] = subject_element.text.strip()
    except:
        pass

    # Sub-Title
    try:
        subject_element = driver.find_element(By.CSS_SELECTOR, 'div.Cxh5Uc')
        book_info['sub_title'] = subject_element.text.strip()
    except:
        pass

    # ISBN
    try:
        isbn_element = driver.find_element(By.XPATH, '//span[contains(@class, "isbn") or contains(text(), "ISBN")]/following-sibling::span')
        book_info['isbn'] = isbn_element.text.strip()
    except:
        pass

    # Page count
    try:
        page_count_element = driver.find_element(By.XPATH, '//span[contains(text(), "Page count")]/following-sibling::span')
        book_info['page_count'] = page_count_element.text.strip()
    except:
        pass

    # Publishing date
    try:
        publishing_date_element = driver.find_element(By.XPATH, '//span[contains(text(), "Published")]/following-sibling::span')
        book_info['publishing_date'] = publishing_date_element.text.strip()
    except:
        pass

    # Form
    try:
        form_element = driver.find_element(By.XPATH, '//span[contains(text(), "Form")]/following-sibling::span')
        book_info['form'] = form_element.text.strip()
    except:
        pass

    # Publisher
    try:
        publisher_element = driver.find_element(By.XPATH, '//span[contains(text(), "Publisher")]/following-sibling::span')
        book_info['publisher'] = publisher_element.text.strip()
    except:
        pass

    # Language
    try:
        language_element = driver.find_element(By.XPATH, '//span[contains(text(), "Language")]/following-sibling::span')
        book_info['language'] = language_element.text.strip()
    except:
        pass

    # Author
    try:
        author_element = driver.find_element(By.XPATH, '//span[contains(text(), "Author")]/following-sibling::span')
        book_info['author'] = author_element.text.strip()
    except:
        pass

    # Illustrator
    try:
        illustrator_element = driver.find_element(By.XPATH, '//span[contains(text(), "Illustrator")]/following-sibling::span')
        book_info['illustrator'] = illustrator_element.text.strip()
    except:
        pass

    # Originally published
    try:
        originally_published_element = driver.find_element(By.XPATH, '//span[contains(text(), "Originally published")]/following-sibling::span')
        book_info['originally_published'] = originally_published_element.text.strip()
    except:
        pass

    # Genres
    try:
        genres_element = driver.find_element(By.XPATH, '//span[contains(text(), "Genres")]/following-sibling::span')
        book_info['genres'] = genres_element.text.strip()
    except:
        pass

    # Subject
    try:
        subject_element = driver.find_element(By.XPATH, '//span[contains(text(), "Subject")]/following-sibling::span')
        book_info['subject'] = subject_element.text.strip()
    except:
        pass

    # Awards
    try:
        subject_element = driver.find_element(By.XPATH, '//span[contains(text(), "Awards")]/following-sibling::span')
        book_info['awards'] = subject_element.text.strip()
    except:
        pass

    # Nominations
    try:
        subject_element = driver.find_element(By.XPATH, '//span[contains(text(), "Nominations")]/following-sibling::span')
        book_info['nominations'] = subject_element.text.strip()
    except:
        pass

    # Characters
    try:
        subject_element = driver.find_element(By.XPATH, '//span[contains(text(), "Characters")]/following-sibling::span')
        book_info['characters'] = subject_element.text.strip()
    except:
        pass

    # description
    try:
        subject_element = driver.find_element(By.CSS_SELECTOR, 'div.Y0Qrof')
        book_info['description'] = subject_element.text.strip()
    except:
        pass

    # Returns dictionary
    return book_info


# Pulling books from isbn13_books_ids.csv list

In [4]:
# Reading in the book_ids
book_ids = pd.read_csv('../Data/final_books_ids.csv')
testing = book_ids
testing

Unnamed: 0,isbn13
0,h2Y-PgAACAAJ
1,FBXRzgEACAAJ
2,DAAAAAAACAAJ
3,LH5C9q83T6wC
4,62CEzQEACAAJ
...,...
2685,tcWMPAAACAAJ
2686,O2JfAAAAMAAJ
2687,y4kgSgAACAAJ
2688,TaQZzgEACAAJ


In [5]:
# Create an empty list to store the book_info dictionaries
book_info_list = []

# Loop over the book_ids and call the get_book_info function for each id
for book_id in testing['isbn13']:
    book_info = get_book_info(book_id)
    if book_info is not None:
        book_info_list.append(book_info)

In [6]:
# Create a dataframe from the list of dictionaries
testing_df = pd.DataFrame(book_info_list)

# Print the dataframe
testing_df

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,h2Y-PgAACAAJ,Harry Potter and the Chamber of Secrets,"9780439554893, 0439554896",341,1999,Hardcover,Scholastic Press,English,J. K. Rowling,Mary GrandPré,"July 2, 1998","Novel, Fantasy Fiction, Bildungsroman, High fa...",,Nestlé Smarties Book Prize for 9 to 11 years,Guardian Children's Fiction Prize,"Harry Potter, Hermione Granger, Lord Voldemort...",When the Chamber of Secrets is opened again at...,
1,FBXRzgEACAAJ,Harry Potter and the Prisoner of Azkaban,"9780439655484, 043965548X",560,May 2004,Trade paperback,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...",The third book in J.K. Rowling's bestselling s...,
2,DAAAAAAACAAJ,Harry Potter,"9780439682589, 0439682584",2000,October 2004,Trade paperback,Scholastic (Us),English,J. K. Rowling,,,,,,,,,"5 Years of Magic, Adventure, and Mystery at Ho..."
3,LH5C9q83T6wC,7,"9780976540601, 0976540606",152,2005,Paperback,Nimble Books LLC,English,W. Frederick Zimmerman,,December 2005,,"Children's stories, English, Fantasy fiction, ...",,,,Through the magic of print-on-demand technolog...,"Unauthorized Harry Potter Book Seven News ; ""H..."
4,62CEzQEACAAJ,Harry Potter and the Prisoner of Azkaban,,435,1999,,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...","""During his third year at Hogwarts School for ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2685,tcWMPAAACAAJ,Bella Y Oscura / Beautiful And Dark (Novela (B...,"9788432217289, 843221728X",204,2006,Paperback,Seix Barral,Spanish,Rosa Montero,,1993,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",
2686,O2JfAAAAMAAJ,La tía Julia y el escribidor,"9788432203237, 8432203238",447,1977,Hardcover,Seix Barral,Spanish,Mario Vargas Llosa,,1977,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,
2687,y4kgSgAACAAJ,O cavalo e o seu rapaz,"9789722330558, 9722330551",,,,Editorial Presença,Portuguese,"C. S. Lewis, Pauline Baynes",Pauline Baynes,"September 6, 1954","Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,
2688,TaQZzgEACAAJ,As crónicas de Nárnia,"9789722329989, 9722329987",,,,Editorial Presença,Portuguese,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,1956,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,


In [13]:
# Quit the driver
driver.quit()

In [8]:
# Exporting testing_df so i dont accidentally erase the data
testing_df.to_csv('All_books_isbn13.csv', index=False)

# Pulling books from isbn_books_ids.csv list

In [14]:
# Creates a new Chrome webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)

In [15]:
# Reading in the book_ids
isbn_book_ids = pd.read_csv('../Data/isbn_book_ids.csv')
isbn_book_ids

Unnamed: 0,isbn
0,FBXRzgEACAAJ
1,yyxXzQEACAAJ
2,YjAnfhsAQ8wC
3,xb4wSmJLnhAC
4,Qq9nQgAACAAJ
...,...
794,98-cPQAACAAJ
795,RZbQPAAACAAJ
796,4tuEuAAACAAJ
797,dmqguAAACAAJ


In [16]:
# Create an empty list to store the book_info dictionaries
book_info_list_2 = []

# Loop over the book_ids and call the get_book_info function for each id
for book_id in isbn_book_ids['isbn']:
    book_info = get_book_info(book_id)
    if book_info is not None:
        book_info_list_2.append(book_info)

In [17]:
# Create a dataframe from the list of dictionaries
isbn_book_df = pd.DataFrame(book_info_list_2)

# Print the dataframe
isbn_book_df

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,FBXRzgEACAAJ,Harry Potter and the Prisoner of Azkaban,"9780439655484, 043965548X",560,May 2004,Trade paperback,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...",The third book in J.K. Rowling's bestselling s...,
1,yyxXzQEACAAJ,The Hitchhiker's Guide to the Galaxy,"9781563892714, 1563892715",133,1979,,Ballantine Books,English,Douglas Adams,Steve Leialoha,,"Comics, Graphic novel","Humorous stories, Science fiction comic books,...",,,,,The Authorized Collection
2,YjAnfhsAQ8wC,A Short History of Nearly Everything,"9780767908184, 076790818X",544,"September 14, 2004",Paperback,Crown,English,Bill Bryson,,"February 4, 2003","Popular science, Non-fiction","History / World, Science / Essays, Science / N...",,Baillie Gifford Prize,,One of the world’s most beloved writers and Ne...,
3,xb4wSmJLnhAC,I'm a Stranger Here Myself,"9780767903820, 076790382X",304,"June 6, 2000",Paperback,Crown,English,Bill Bryson,,"November 5, 1998","Humor, Travel literature, Autobiography, Memoir","Biography & Autobiography / Personal Memoirs, ...",,,,A classic from the New York Times bestselling ...,Notes on Returning to America After 20 Years Away
4,Qq9nQgAACAAJ,Agile Web Development with Rails,"9780976694007, 097669400X",558,"February 16, 2006",Paperback,Pragmatic Bookshelf,English,"David Thomas, David Heinemeier Hansson, Leon B...",,2005,,"Internet programming, Ruby (Computer program l...",,,,"Rails is a full-stack, open source web framewo...",A Pragmatic Guide
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,98-cPQAACAAJ,La conspiración de los alquimistas,"9780307274113, 030727411X",484,2005,Paperback,Debolsillo,Spanish,Hania Czajkowski,,2001,"Fiction, Historical romance",Spanish language materials -- Fiction,,,,"La protagonista de esta historia es Ana, una c...",
795,RZbQPAAACAAJ,El Superzorro,"9789681907198, 9681907191",96,June 2000,Paperback,"Santillana USA Publishing Company, Incorporated",Spanish,"Roald Dahl, Horacio Elena",Horacio Elena,"June 1, 1970","Novel, Fiction, Children's literature","Farmers -- Juvenile fiction, Foxes -- Fiction,...",,,"Mr. Fox, Mrs. Fox, Walter Boggis, Nathan Bunce...","Había una vez un valle, y en el valle, tres gr...",
796,4tuEuAAACAAJ,Fantastic Mr. Fox (Cover to Cover),"9781855495098, 1855495090",90,1995,,Cover to Cover Cassettes,English,Roald Dahl,Quentin Blake,"June 1, 1970","Novel, Fiction, Children's literature",,,,"Mr. Fox, Mrs. Fox, Walter Boggis, Nathan Bunce...",Fantastic Mr Fox is a children's novel written...,
797,dmqguAAACAAJ,Narraciones Extraordinarias,"9789583006401, 9583006408",316,2003,Hardcover,Panamericana Editorial,Spanish,Edgar Allan Poe,,"February 15, 1845",,"Horror tales, American, Short stories, American",,,,,


In [18]:
# Exporting isbn_book_df so i dont accidentally erase the data
isbn_book_df.to_csv('All_books_isbn.csv', index=False)

# Combining all books

In [33]:
df1 = pd.read_csv('all_books_isbn.csv')
df2 = pd.read_csv('all_books_isbn13.csv')
df1

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,FBXRzgEACAAJ,Harry Potter and the Prisoner of Azkaban,"9780439655484, 043965548X",560,May 2004,Trade paperback,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...",The third book in J.K. Rowling's bestselling s...,
1,yyxXzQEACAAJ,The Hitchhiker's Guide to the Galaxy,"9781563892714, 1563892715",133,1979,,Ballantine Books,English,Douglas Adams,Steve Leialoha,,"Comics, Graphic novel","Humorous stories, Science fiction comic books,...",,,,,The Authorized Collection
2,YjAnfhsAQ8wC,A Short History of Nearly Everything,"9780767908184, 076790818X",544,"September 14, 2004",Paperback,Crown,English,Bill Bryson,,"February 4, 2003","Popular science, Non-fiction","History / World, Science / Essays, Science / N...",,Baillie Gifford Prize,,One of the world’s most beloved writers and Ne...,
3,xb4wSmJLnhAC,I'm a Stranger Here Myself,"9780767903820, 076790382X",304,"June 6, 2000",Paperback,Crown,English,Bill Bryson,,"November 5, 1998","Humor, Travel literature, Autobiography, Memoir","Biography & Autobiography / Personal Memoirs, ...",,,,A classic from the New York Times bestselling ...,Notes on Returning to America After 20 Years Away
4,Qq9nQgAACAAJ,Agile Web Development with Rails,"9780976694007, 097669400X",558,"February 16, 2006",Paperback,Pragmatic Bookshelf,English,"David Thomas, David Heinemeier Hansson, Leon B...",,2005,,"Internet programming, Ruby (Computer program l...",,,,"Rails is a full-stack, open source web framewo...",A Pragmatic Guide
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
794,98-cPQAACAAJ,La conspiración de los alquimistas,"9780307274113, 030727411X",484,2005,Paperback,Debolsillo,Spanish,Hania Czajkowski,,2001,"Fiction, Historical romance",Spanish language materials -- Fiction,,,,"La protagonista de esta historia es Ana, una c...",
795,RZbQPAAACAAJ,El Superzorro,"9789681907198, 9681907191",96,June 2000,Paperback,"Santillana USA Publishing Company, Incorporated",Spanish,"Roald Dahl, Horacio Elena",Horacio Elena,"June 1, 1970","Novel, Fiction, Children's literature","Farmers -- Juvenile fiction, Foxes -- Fiction,...",,,"Mr. Fox, Mrs. Fox, Walter Boggis, Nathan Bunce...","Había una vez un valle, y en el valle, tres gr...",
796,4tuEuAAACAAJ,Fantastic Mr. Fox (Cover to Cover),"9781855495098, 1855495090",90,1995,,Cover to Cover Cassettes,English,Roald Dahl,Quentin Blake,"June 1, 1970","Novel, Fiction, Children's literature",,,,"Mr. Fox, Mrs. Fox, Walter Boggis, Nathan Bunce...",Fantastic Mr Fox is a children's novel written...,
797,dmqguAAACAAJ,Narraciones Extraordinarias,"9789583006401, 9583006408",316,2003,Hardcover,Panamericana Editorial,Spanish,Edgar Allan Poe,,"February 15, 1845",,"Horror tales, American, Short stories, American",,,,,


In [34]:
df2

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,h2Y-PgAACAAJ,Harry Potter and the Chamber of Secrets,"9780439554893, 0439554896",341,1999,Hardcover,Scholastic Press,English,J. K. Rowling,Mary GrandPré,"July 2, 1998","Novel, Fantasy Fiction, Bildungsroman, High fa...",,Nestlé Smarties Book Prize for 9 to 11 years,Guardian Children's Fiction Prize,"Harry Potter, Hermione Granger, Lord Voldemort...",When the Chamber of Secrets is opened again at...,
1,FBXRzgEACAAJ,Harry Potter and the Prisoner of Azkaban,"9780439655484, 043965548X",560,May 2004,Trade paperback,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...",The third book in J.K. Rowling's bestselling s...,
2,DAAAAAAACAAJ,Harry Potter,"9780439682589, 0439682584",2000,October 2004,Trade paperback,Scholastic (Us),English,J. K. Rowling,,,,,,,,,"5 Years of Magic, Adventure, and Mystery at Ho..."
3,LH5C9q83T6wC,7,"9780976540601, 0976540606",152,2005,Paperback,Nimble Books LLC,English,W. Frederick Zimmerman,,December 2005,,"Children's stories, English, Fantasy fiction, ...",,,,Through the magic of print-on-demand technolog...,"Unauthorized Harry Potter Book Seven News ; ""H..."
4,62CEzQEACAAJ,Harry Potter and the Prisoner of Azkaban,,435,1999,,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...","""During his third year at Hogwarts School for ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2685,tcWMPAAACAAJ,Bella Y Oscura / Beautiful And Dark (Novela (B...,"9788432217289, 843221728X",204,2006,Paperback,Seix Barral,Spanish,Rosa Montero,,1993,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",
2686,O2JfAAAAMAAJ,La tía Julia y el escribidor,"9788432203237, 8432203238",447,1977,Hardcover,Seix Barral,Spanish,Mario Vargas Llosa,,1977,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,
2687,y4kgSgAACAAJ,O cavalo e o seu rapaz,"9789722330558, 9722330551",,,,Editorial Presença,Portuguese,"C. S. Lewis, Pauline Baynes",Pauline Baynes,"September 6, 1954","Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,
2688,TaQZzgEACAAJ,As crónicas de Nárnia,"9789722329989, 9722329987",,,,Editorial Presença,Portuguese,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,1956,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,


In [37]:
merged_df = pd.merge(df1, df2, on='id', how='right')
merged_df

Unnamed: 0,id,title_x,isbn_x,page_count_x,publishing_date_x,form_x,publisher_x,language_x,author_x,illustrator_x,...,author_y,illustrator_y,originally_published_y,genres_y,subject_y,awards_y,nominations_y,characters_y,description_y,sub_title_y
0,h2Y-PgAACAAJ,,,,,,,,,,...,J. K. Rowling,Mary GrandPré,"July 2, 1998","Novel, Fantasy Fiction, Bildungsroman, High fa...",,Nestlé Smarties Book Prize for 9 to 11 years,Guardian Children's Fiction Prize,"Harry Potter, Hermione Granger, Lord Voldemort...",When the Chamber of Secrets is opened again at...,
1,FBXRzgEACAAJ,Harry Potter and the Prisoner of Azkaban,"9780439655484, 043965548X",560,May 2004,Trade paperback,Arthur A. Levine Books,English,J. K. Rowling,Mary GrandPré,...,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...",The third book in J.K. Rowling's bestselling s...,
2,DAAAAAAACAAJ,,,,,,,,,,...,J. K. Rowling,,,,,,,,,"5 Years of Magic, Adventure, and Mystery at Ho..."
3,LH5C9q83T6wC,,,,,,,,,,...,W. Frederick Zimmerman,,December 2005,,"Children's stories, English, Fantasy fiction, ...",,,,Through the magic of print-on-demand technolog...,"Unauthorized Harry Potter Book Seven News ; ""H..."
4,62CEzQEACAAJ,,,,,,,,,,...,J. K. Rowling,Mary GrandPré,"July 8, 1999","Novel, Fantasy Fiction",,"Locus Award for Best Fantasy Novel, Nestlé Sma...","Carnegie Medal for Writing, Hugo Award for Bes...","Harry Potter, Hermione Granger, Sirius Black, ...","""During his third year at Hogwarts School for ...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2685,tcWMPAAACAAJ,,,,,,,,,,...,Rosa Montero,,1993,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",
2686,O2JfAAAAMAAJ,,,,,,,,,,...,Mario Vargas Llosa,,1977,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,
2687,y4kgSgAACAAJ,,,,,,,,,,...,"C. S. Lewis, Pauline Baynes",Pauline Baynes,"September 6, 1954","Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,
2688,TaQZzgEACAAJ,,,,,,,,,,...,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,1956,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,


In [46]:
merged_df = pd.concat([df1, df2]).drop_duplicates(subset=['id'], keep='last')
merged_df = merged_df.reset_index(drop=True)
merged_df

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,ilINAQAAMAAJ,Kahlil Gibran,"9781566562492, 156656249X",456,1998,Paperback,Interlink Books,English,"Jean Gibran, Kahlil Gibran",,,,"Authors, Arab -- Biography -- Lebanon -- Unite...",,,,Drawing on masses of new and rediscovered mate...,His Life and World
1,cOpWAQAACAAJ,LogoLounge,,191,September 2004,Paperback,Rockport Publishers,English,"Bill Gardner, Catharine Fishel",,2003,,"Design / General, Design / Graphic Arts / Comm...",,,,Logos -- for everything from food and fashion ...,"2,000 International Identities by Leading Desi..."
2,JJWMEAAAQBAJ,The Westing Game,"9780142401200, 014240120X",192,2004,Paperback,Penguin Young Readers Group,English,Ellen Raskin,,"May 1, 1978","Novel, Mystery, Humor, Young adult fiction, Ch...",Juvenile Fiction / Family / Marriage & Divorce...,John Newbery Medal,"John Newbery Medal, National Book Award for Ch...","Samuel W. Westing, Berthe Erica Crow, Catherin...","A Newbery Medal Winner\n\n""A supersharp myster...",
3,cGxOHgpsgX8C,Tyler's Ultimate,"9781400052387, 1400052386",254,2006,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,2006,,"Cooking, Cooking / Courses & Dishes / General,...",,,,As his millions of fans know from watching him...,Brilliant Simple Food to Make Any Time
4,nF2PcyCJb0UC,Eat this Book,"9781400052370, 1400052378",287,2005,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,2005,,"Cookery, Cooking, Cooking / Methods / General,...",,,,While traveling the globe as the host of Food ...,Cooking with Global Fresh Flavors
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,tcWMPAAACAAJ,Bella Y Oscura / Beautiful And Dark (Novela (B...,"9788432217289, 843221728X",204,2006,Paperback,Seix Barral,Spanish,Rosa Montero,,1993,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",
3283,O2JfAAAAMAAJ,La tía Julia y el escribidor,"9788432203237, 8432203238",447,1977,Hardcover,Seix Barral,Spanish,Mario Vargas Llosa,,1977,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,
3284,y4kgSgAACAAJ,O cavalo e o seu rapaz,"9789722330558, 9722330551",,,,Editorial Presença,Portuguese,"C. S. Lewis, Pauline Baynes",Pauline Baynes,"September 6, 1954","Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,
3285,TaQZzgEACAAJ,As crónicas de Nárnia,"9789722329989, 9722329987",,,,Editorial Presença,Portuguese,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,1956,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,


In [None]:
# Exporting isbn_book_df so i dont accidentally erase the data
merged_df.to_csv('books_data_scraped.csv', index=False)

## Merging books with ratings

In [3]:
ratings = pd.read_csv("../Data/all_books_ratings.csv")
books = pd.read_csv("all_books.csv")

In [6]:
ratings

Unnamed: 0,book_id,average_rating,ratings_count
0,FBXRzgEACAAJ,,
1,yyxXzQEACAAJ,,
2,YjAnfhsAQ8wC,4.0,2.0
3,xb4wSmJLnhAC,,
4,Qq9nQgAACAAJ,4.0,46.0
...,...,...,...
3282,tcWMPAAACAAJ,,
3283,O2JfAAAAMAAJ,,
3284,y4kgSgAACAAJ,,
3285,TaQZzgEACAAJ,,


In [5]:
books

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,originally_published,genres,subject,awards,nominations,characters,description,sub_title
0,ilINAQAAMAAJ,Kahlil Gibran,"9781566562492, 156656249X",456,1998,Paperback,Interlink Books,English,"Jean Gibran, Kahlil Gibran",,,,"Authors, Arab -- Biography -- Lebanon -- Unite...",,,,Drawing on masses of new and rediscovered mate...,His Life and World
1,cOpWAQAACAAJ,LogoLounge,,191,September 2004,Paperback,Rockport Publishers,English,"Bill Gardner, Catharine Fishel",,2003,,"Design / General, Design / Graphic Arts / Comm...",,,,Logos -- for everything from food and fashion ...,"2,000 International Identities by Leading Desi..."
2,JJWMEAAAQBAJ,The Westing Game,"9780142401200, 014240120X",192,2004,Paperback,Penguin Young Readers Group,English,Ellen Raskin,,"May 1, 1978","Novel, Mystery, Humor, Young adult fiction, Ch...",Juvenile Fiction / Family / Marriage & Divorce...,John Newbery Medal,"John Newbery Medal, National Book Award for Ch...","Samuel W. Westing, Berthe Erica Crow, Catherin...","A Newbery Medal Winner\n\n""A supersharp myster...",
3,cGxOHgpsgX8C,Tyler's Ultimate,"9781400052387, 1400052386",254,2006,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,2006,,"Cooking, Cooking / Courses & Dishes / General,...",,,,As his millions of fans know from watching him...,Brilliant Simple Food to Make Any Time
4,nF2PcyCJb0UC,Eat this Book,"9781400052370, 1400052378",287,2005,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,2005,,"Cookery, Cooking, Cooking / Methods / General,...",,,,While traveling the globe as the host of Food ...,Cooking with Global Fresh Flavors
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,tcWMPAAACAAJ,Bella Y Oscura / Beautiful And Dark (Novela (B...,"9788432217289, 843221728X",204,2006,Paperback,Seix Barral,Spanish,Rosa Montero,,1993,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",
3283,O2JfAAAAMAAJ,La tía Julia y el escribidor,"9788432203237, 8432203238",447,1977,Hardcover,Seix Barral,Spanish,Mario Vargas Llosa,,1977,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,
3284,y4kgSgAACAAJ,O cavalo e o seu rapaz,"9789722330558, 9722330551",,,,Editorial Presença,Portuguese,"C. S. Lewis, Pauline Baynes",Pauline Baynes,"September 6, 1954","Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,
3285,TaQZzgEACAAJ,As crónicas de Nárnia,"9789722329989, 9722329987",,,,Editorial Presença,Portuguese,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,1956,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,


In [8]:
merged_df = pd.merge(books, ratings, left_on="id", right_on="book_id", how="left")
merged_df

Unnamed: 0,id,title,isbn,page_count,publishing_date,form,publisher,language,author,illustrator,...,genres,subject,awards,nominations,characters,description,sub_title,book_id,average_rating,ratings_count
0,ilINAQAAMAAJ,Kahlil Gibran,"9781566562492, 156656249X",456,1998,Paperback,Interlink Books,English,"Jean Gibran, Kahlil Gibran",,...,,"Authors, Arab -- Biography -- Lebanon -- Unite...",,,,Drawing on masses of new and rediscovered mate...,His Life and World,ilINAQAAMAAJ,,
1,cOpWAQAACAAJ,LogoLounge,,191,September 2004,Paperback,Rockport Publishers,English,"Bill Gardner, Catharine Fishel",,...,,"Design / General, Design / Graphic Arts / Comm...",,,,Logos -- for everything from food and fashion ...,"2,000 International Identities by Leading Desi...",cOpWAQAACAAJ,,
2,JJWMEAAAQBAJ,The Westing Game,"9780142401200, 014240120X",192,2004,Paperback,Penguin Young Readers Group,English,Ellen Raskin,,...,"Novel, Mystery, Humor, Young adult fiction, Ch...",Juvenile Fiction / Family / Marriage & Divorce...,John Newbery Medal,"John Newbery Medal, National Book Award for Ch...","Samuel W. Westing, Berthe Erica Crow, Catherin...","A Newbery Medal Winner\n\n""A supersharp myster...",,JJWMEAAAQBAJ,4.0,1.0
3,cGxOHgpsgX8C,Tyler's Ultimate,"9781400052387, 1400052386",254,2006,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,...,,"Cooking, Cooking / Courses & Dishes / General,...",,,,As his millions of fans know from watching him...,Brilliant Simple Food to Make Any Time,cGxOHgpsgX8C,,
4,nF2PcyCJb0UC,Eat this Book,"9781400052370, 1400052378",287,2005,Hardcover,Clarkson Potter/Publishers,English,Tyler Florence,,...,,"Cookery, Cooking, Cooking / Methods / General,...",,,,While traveling the globe as the host of Food ...,Cooking with Global Fresh Flavors,nF2PcyCJb0UC,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3282,tcWMPAAACAAJ,Bella Y Oscura / Beautiful And Dark (Novela (B...,"9788432217289, 843221728X",204,2006,Paperback,Seix Barral,Spanish,Rosa Montero,,...,"Allegory, Domestic Fiction","Domestic fiction, Children -- Fiction -- Spain...",,,,"La historia de la infancia, vivida y soñada, d...",,tcWMPAAACAAJ,,
3283,O2JfAAAAMAAJ,La tía Julia y el escribidor,"9788432203237, 8432203238",447,1977,Hardcover,Seix Barral,Spanish,Mario Vargas Llosa,,...,"Novel, Humorous Fiction","Aunts -- Fiction, Courtship -- Fiction, Love s...",,,,La tía Julia y el escribidor es una novela sem...,,O2JfAAAAMAAJ,,
3284,y4kgSgAACAAJ,O cavalo e o seu rapaz,"9789722330558, 9722330551",,,,Editorial Presença,Portuguese,"C. S. Lewis, Pauline Baynes",Pauline Baynes,...,"Novel, Fantasy, Fantasy Fiction, Children's li...","Children -- Juvenile fiction, Fantasy fiction,...",,,"Shasta, Aravis, Bree, Rabadash, Hwin, Aslan, S...",The Horse and His Boy is a novel for children ...,,y4kgSgAACAAJ,,
3285,TaQZzgEACAAJ,As crónicas de Nárnia,"9789722329989, 9722329987",,,,Editorial Presença,Portuguese,"C. S. Lewis, A. Gonçalves Rodrigues, Ana Falcã...",Pauline Baynes,...,,"Children -- Juvenile fiction, Fantasy fiction,...",,,,,,TaQZzgEACAAJ,,


In [9]:
merged_df.columns

Index(['id', 'title', 'isbn', 'page_count', 'publishing_date', 'form',
       'publisher', 'language', 'author', 'illustrator',
       'originally_published', 'genres', 'subject', 'awards', 'nominations',
       'characters', 'description', 'sub_title', 'book_id', 'average_rating',
       'ratings_count'],
      dtype='object')

In [10]:
# Exporting merged_df so i dont accidentally erase the data
merged_df.to_csv('final_books.csv', index=False)