In [110]:
#Import dependencies
import os
import pandas as pd
import numpy as np

#Import API key
from config import api_key,secret_api

#API
import requests
import json
import rauth
from pprint import pprint

#Goodreads API library
import goodreads
from goodreads import client

#Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float

import plotly.express as px

ModuleNotFoundError: No module named 'plotly'

# Original CSV data 

### Read CSV into dataframe

In [2]:
csv = "Resources/book_data.csv"
book_data_df = pd.read_csv(csv)

book_data_df.dtypes

Title               object
Series              object
Volume               int64
Author              object
Language            object
Fiction/Non         object
Genre               object
Pgs                  int64
Publication_date     int64
year_read            int64
dtype: object

In [3]:
#Remove decimals
book_data_df["Volume"] = book_data_df["Volume"].astype(str).str.replace(".0","",regex=False)
book_data_df["Publication_date"] = book_data_df["Publication_date"].astype(str).str.replace(".0","",regex=False)
book_data_df["year_read"] = book_data_df["year_read"].astype(str).str.replace(".0","",regex=False)

#Rename columns
rename_bd_df = book_data_df.rename(columns={"Title":"book_title",
                                            "Series":"series_name", 
                                            "Volume":"volume", 
                                            "Author":"author_name",
                                            "Language":"language",
                                            "Fiction/Non":"fiction_non",
                                            "Genre":"genre",
                                            "Pgs":"number_of_pages", 
                                            "Publication_date":"date_of_publication"})

In [51]:
#Select relevant columns
final_book_df = rename_bd_df[["book_title",
                                  "series_name",
                                  "author_name",
                                  "language",
                                  "fiction_non",
                                  "genre", 
                                  "number_of_pages",
                                  "year_read"]]

final_book_df

Unnamed: 0,book_title,series_name,author_name,language,fiction_non,genre,number_of_pages,year_read
0,21,0,Jeremy Iversen,English,Fiction,Young Adult,261,0
1,", Said the Shotgun to the Head",0,Saul Williams,English,Non-Fiction,Poetry,182,0
2,13 Little Blue Envelopes,0,Maureen Johnson,English,Fiction,Young Adult,320,0
3,A Bite To Remember,Argeneu Novel,Lynsay Sands,English,Fiction,Paranormal Romance,362,0
4,A Clash of Kings,A Song of Ice and Fire,George R. R. Martin,English,Fiction,Fantasy,824,2012
...,...,...,...,...,...,...,...,...
715,You Slay Me,Green Dragons novel,Katie MacAlister,English,Fiction,Paranormal Romance,334,0
716,You Suck,A Love Story,Christopher Moore,English,Fiction,Fantasy,328,0
717,"You, Maybe",0,Rachel Vail,English,Fiction,Young Adult,199,0
718,You're the One That I Want,Gossip Girl,Cecily von Ziegesar,English,Fiction,Young Adult,227,0


In [109]:
#Generate HTML table
html_books= final_book_df.to_html("Pages/Book_List/html_books_table.html")

# Request information from Goodreads API

In [6]:
#Create Goodreads client instance
gc = client.GoodreadsClient(api_key,secret_api)

### Author hometown API request

In [8]:
#Prepare list of authors for API and unique author table
authors = rename_bd_df["author_name"]
single_authors = authors.drop_duplicates(keep="first")
index_authors = single_authors.reset_index(drop=True)

In [9]:
#Empty list to hold API data 
hometowns = []
works_count = []

#Loop for iterative API requests
for author in range(len(index_authors)):
    #Create a print log of each author
    print(f"Retrieving hometown{author} | {index_authors[author]}")
    
    #Hometown and number of works request
    author = gc.find_author(index_authors[author])
    cities = author.hometown
    works = author.works_count
    
    #Save to list
    hometowns.append(cities)
    works_count.append(works)

Retrieving hometown0 | Jeremy Iversen
Retrieving hometown1 | Saul Williams
Retrieving hometown2 | Maureen Johnson
Retrieving hometown3 | Lynsay Sands
Retrieving hometown4 | George R. R. Martin
Retrieving hometown5 | Christopher Rice
Retrieving hometown6 | Christopher Moore
Retrieving hometown7 | Kim Harrison
Retrieving hometown8 | Nick Hornby
Retrieving hometown9 | Wendy Mass
Retrieving hometown10 | William Shakespeare
Retrieving hometown11 | Ernest Hemingway
Retrieving hometown12 | Cupcake Brown
Retrieving hometown13 | Brian Sloan
Retrieving hometown14 | Alan Bradley
Retrieving hometown15 | Robert Goolrick
Retrieving hometown16 | Terry Pratchett
Retrieving hometown17 | Tennessee Williams
Retrieving hometown18 | Sir Arthur Conan Doyle
Retrieving hometown19 | Diane Duane
Retrieving hometown20 | Jeff Vandermeer
Retrieving hometown21 | Guillermo Martínez
Retrieving hometown22 | Sherrilyn Kenyon
Retrieving hometown23 |  Amelie Nothomb
Retrieving hometown24 | Hailey Abbott
Retrieving hometo

Retrieving hometown209 |  Agatha Christie
Retrieving hometown210 | Mistress Nan
Retrieving hometown211 | Mary Janice Davidson
Retrieving hometown212 |  Supervert
Retrieving hometown213 |  Santiago Gamboa
Retrieving hometown214 | Thomas Perry 
Retrieving hometown215 | Noelle Stevenson
Retrieving hometown216 | Miranda July
Retrieving hometown217 | Roberto Bolaño
Retrieving hometown218 | Jane Austen
Retrieving hometown219 | Joe Hill
Retrieving hometown220 | John Steinbeck
Retrieving hometown221 | Helen Fielding
Retrieving hometown222 | Harry G. Frankfurt
Retrieving hometown223 | Jean Ferris
Retrieving hometown224 |  Sylvain Neuvel
Retrieving hometown225 |  Truman Capote
Retrieving hometown226 | Christopher Morley
Retrieving hometown227 | Jodi Lynn Anderson
Retrieving hometown228 | Natasha Friends
Retrieving hometown229 |  Miranda Austin
Retrieving hometown230 | J. R. Jiménez
Retrieving hometown231 | Gail Giles
Retrieving hometown232 | A. S. King
Retrieving hometown233 | Curtis Sittenfield

In [44]:
#Create author and hometown dataframe
hometown_df = pd.DataFrame({"author_name":index_authors, "hometown":hometowns, "books_written":works_count})
drop_hometown = hometown_df.drop_duplicates(subset="author_name",keep="first")
index_hometown = drop_hometown.reset_index(drop=True).copy()

index_hometown

Unnamed: 0,author_name,hometown,books_written
0,Jeremy Iversen,New York City,2
1,Saul Williams,"Newburgh, New York",14
2,Maureen Johnson,"Philadelphia, PA",55
3,Lynsay Sands,Leamington,102
4,George R. R. Martin,"Bayonne, New Jersey",988
...,...,...,...
353,Marguerite Duras,Gia-Dinh,202
354,Emily Brontë,"Thornton, Yorkshire, England",536
355,Brian K. Vaughn,"Milwaukee, Wisconsin",91
356,Brian K. Vaughan,"Cleveland, Ohio",597


In [11]:
#Look for missing values in the data 
index_hometown.isna().any()

author_name      False
hometown          True
books_written    False
dtype: bool

In [50]:
#Remove authors with missing hometown values 
final_authors_df = index_hometown.dropna(how="any")

final_authors_df

Unnamed: 0,author_name,hometown,books_written
0,Jeremy Iversen,New York City,2
1,Saul Williams,"Newburgh, New York",14
2,Maureen Johnson,"Philadelphia, PA",55
3,Lynsay Sands,Leamington,102
4,George R. R. Martin,"Bayonne, New Jersey",988
...,...,...,...
353,Marguerite Duras,Gia-Dinh,202
354,Emily Brontë,"Thornton, Yorkshire, England",536
355,Brian K. Vaughn,"Milwaukee, Wisconsin",91
356,Brian K. Vaughan,"Cleveland, Ohio",597


# SQL and Flask API

In [101]:
#Create engine and connection to database
connection = "postgres:postgres@localhost:5432/books"
engine = create_engine(f'postgresql://{connection}')

In [102]:
#Check for tables
engine.table_names()

['authors', 'books', 'hometowns']

In [103]:
#Load Author dataframe into database
index_authors.to_sql("authors", con=engine, if_exists="append", index=False)

In [104]:
#Load Hometown dataframe into database
final_authors_df.to_sql("hometowns", con=engine, if_exists="append", index=False)

In [105]:
#Load Book dataframe into database
final_book_df.to_sql("books", con=engine, if_exists="append", index=False)

In [106]:
#Confirm data has been added to the authors database
pd.read_sql_query('SELECT * FROM authors', con=engine).head()

Unnamed: 0,author_id,author_name
0,1,Jeremy Iversen
1,2,Saul Williams
2,3,Maureen Johnson
3,4,Lynsay Sands
4,5,George R. R. Martin


In [107]:
#Confirm data has been added to the hometowns database
pd.read_sql_query('SELECT * FROM hometowns', con=engine).head()

Unnamed: 0,author_id,author_name,hometown,books_written
0,1,Jeremy Iversen,New York City,2
1,2,Saul Williams,"Newburgh, New York",14
2,3,Maureen Johnson,"Philadelphia, PA",55
3,4,Lynsay Sands,Leamington,102
4,5,George R. R. Martin,"Bayonne, New Jersey",988


In [108]:
#Confirm data has been added to the books database
pd.read_sql_query('SELECT * FROM books', con=engine).head()

Unnamed: 0,book_id,book_title,series_name,language,author_name,fiction_non,genre,number_of_pages,year_read
0,1,21,0,English,Jeremy Iversen,Fiction,Young Adult,261,0
1,2,", Said the Shotgun to the Head",0,English,Saul Williams,Non-Fiction,Poetry,182,0
2,3,13 Little Blue Envelopes,0,English,Maureen Johnson,Fiction,Young Adult,320,0
3,4,A Bite To Remember,Argeneu Novel,English,Lynsay Sands,Fiction,Paranormal Romance,362,0
4,5,A Clash of Kings,A Song of Ice and Fire,English,George R. R. Martin,Fiction,Fantasy,824,2012
