## Basic Data collection for children's authors and books

In [1]:
from openai import OpenAI
import os
client = OpenAI()

In [3]:
import json


def getGPTResponse(question):

  response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    response_format={ "type": "json_object" },
    messages=[
      {"role": "system", "content": "You are a helpful assistant designed to output JSON."},
      {"role": "user", "content": question}
    ]
  )
  return response.choices[0].message.content





### Generate a list of most popular Children's book authors
#### Utilize GPT3.5-Turbo's json reply format to get a list

In [12]:
question  = f"Suggest exactly 100 most famous children's authors. The response should have the key as 'authors_list'"

authors_list_json = getGPTResponse(question)
authors_list = json.loads(authors_list_json)
print(authors_list['authors_list'])


### Collect famous books by each author
#### For each author another API call is made to get the following data:
* 10 titles
* Genre
* Target age group
* detailed summary 


In [33]:
combined_author_book_data = {}
for author in authors_list['authors_list']:
    print(f"Current Author is {author}")
    book_data_generation_query = f"Suggest 10 titles by {author} with their genre, target age group and a detailed summary for each book."
    author_book_data = json.loads(getGPTResponse(book_data_generation_query))
    combined_author_book_data[author] = author_book_data
    # print(author_book_data)
    # break
    

Current Author is Dr. Seuss
Current Author is J.K. Rowling
Current Author is Roald Dahl
Current Author is Enid Blyton
Current Author is Shel Silverstein
Current Author is Beatrix Potter
Current Author is C.S. Lewis
Current Author is Maurice Sendak
Current Author is Eric Carle
Current Author is Margaret Wise Brown
Current Author is Lewis Carroll
Current Author is Laura Ingalls Wilder
Current Author is Hans Christian Andersen
Current Author is E.B. White
Current Author is A.A. Milne
Current Author is Chris Van Allsburg
Current Author is Beverly Cleary
Current Author is R.L. Stine
Current Author is Kevin Henkes
Current Author is Judith Kerr
Current Author is Jeff Kinney
Current Author is Mercer Mayer
Current Author is Dav Pilkey
Current Author is Jon Klassen
Current Author is Norton Juster
Current Author is P.D. Eastman
Current Author is Jan Brett
Current Author is Lemony Snicket
Current Author is Munro Leaf
Current Author is Kate DiCamillo
Current Author is Dick King-Smith
Current Author

### Save Author/book metadata to JSON

In [36]:
# Store the JSON data in a file
# json_output = json.loads(response.choices[0].message.content)
with open("author_book_data.json", "w") as file:
    json.dump(combined_author_book_data, file)

In [37]:
with open("author_book_data.json", "r") as file:
    data = json.load(file)

In [41]:
data.keys()

dict_keys(['Dr. Seuss', 'J.K. Rowling', 'Roald Dahl', 'Enid Blyton', 'Shel Silverstein', 'Beatrix Potter', 'C.S. Lewis', 'Maurice Sendak', 'Eric Carle', 'Margaret Wise Brown', 'Lewis Carroll', 'Laura Ingalls Wilder', 'Hans Christian Andersen', 'E.B. White', 'A.A. Milne', 'Chris Van Allsburg', 'Beverly Cleary', 'R.L. Stine', 'Kevin Henkes', 'Judith Kerr', 'Jeff Kinney', 'Mercer Mayer', 'Dav Pilkey', 'Jon Klassen', 'Norton Juster', 'P.D. Eastman', 'Jan Brett', 'Lemony Snicket', 'Munro Leaf', 'Kate DiCamillo', 'Dick King-Smith', 'Jerry Spinelli', 'Ezra Jack Keats', 'Brian Jacques', 'Francine Pascal', 'Taro Gomi', 'Mo Willems', 'Linda Sue Park', 'Ruth Stiles Gannett', "Madeleine L'Engle", 'Margaret Mahy', 'L.M. Montgomery', 'Diana Wynne Jones', 'Nancy Yi Fan', 'Albert Uderzo', 'Anna Sewell', 'Helen Beatrix Potter', 'Quentin Blake', 'Paul Jennings', 'Joan Aiken', 'Arthur Ransome', 'Lyman Frank Baum', 'E. Nesbit', 'William Steig', 'Sharon Creech', 'Jill Tomlinson', 'Ann M. Martin', 'Phyllis 