In [1]:
import pandas as pd
import os
import google.generativeai as genai
import requests
import ast
import re
from time import sleep

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
model = genai.GenerativeModel('gemini-1.5-pro-latest')

In [3]:
def read_txt_from_url(url):
  """Reads the content of a text file from a URL and returns it as a string.

  Args:
    url: The URL of the text file.

  Returns:
    The content of the text file as a string, or None if an error occurs.
  """
  try:
    response = requests.get(url)
    response.raise_for_status()  # Raise an error for bad status codes

    return response.text
  except requests.exceptions.RequestException as e:
    print(f"Error fetching URL: {e}")
    return None

In [4]:
df = pd.read_parquet("../streamlit/data/gutenberg_books.parquet")
df.head(5)

Unnamed: 0,id,title,author,author_birth,author_death,txt_link,emotions,emotion_scores,summaries
0,1,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,https://www.gutenberg.org/ebooks/1.txt.utf-8,"['anger', 'anger', 'anger', 'anger', 'anger', ...","[-0.8, -0.7, -0.9, -0.8, -0.9, -0.7, -0.8, -0....",['The King has refused to pass laws that are g...
1,2,The United States Bill of Rights: The Ten Orig...,United States,,,https://www.gutenberg.org/ebooks/2.txt.utf-8,"['Contentment', 'Empowerment', 'Relief', 'Secu...","[0.7, 0.8, 0.6, 0.9, 0.7, 0.8, 0.6, 0.7, 0.6, ...",['The Bill of Rights ensures freedom of religi...
2,3,John F. Kennedy's Inaugural Address,"Kennedy, John F. (John Fitzgerald)",1917.0,1963.0,https://www.gutenberg.org/ebooks/3.txt.utf-8,"['Hope', 'Pride', 'Determination', 'Loyalty', ...","[0.8, 0.6, 0.9, 0.7, -0.3, 0.5, 0.4, 0.5, 0.8,...","['Celebration of freedom and renewal', 'Americ..."
3,4,Lincoln's Gettysburg Address: Given November 1...,"Lincoln, Abraham",1809.0,1865.0,https://www.gutenberg.org/ebooks/4.txt.utf-8,"['Pride', 'Determination', 'Respect', 'Reveren...","[0.8, 0.9, 0.7, 1.0, 0.3, 0.9, 0.8, 0.6, 0.9, ...",['The founding fathers created a nation concei...
4,5,The United States Constitution,United States,,,https://www.gutenberg.org/ebooks/5.txt.utf-8,"['Pride', 'Hope', 'Determination', 'Liberty', ...","[0.5, 0.7, 0.6, 0.9, 0.4, 0.8, 0.5, 0.3, 0.4, ...",['The establishment of the United States Const...


In [5]:
for idx, row in df.loc[0:90].iterrows():

  if df.loc[df['id']==row['id'], 'emotions'].item() != '':
    print(f"{row['title']} already processed.")
    continue

  print(f"Processing {row['id']} - {row['title']} by {row['author']}")

  # Get the text of the book
  text = read_txt_from_url(row['txt_link'])
  if text is None:
    continue

  print("sleeping for 30 seconds")
  sleep(30)

  prompt = "Can you read this book and find the forty most important events? From those events, can you find the main emotion portrayed? Given the events and emotions, can you create a python list of tuples, with the first element in each tuple being the emotion as a string, the second element being your ranking of that emotion on a scale of -1 to +1, with +1 being the most positive emotion, and the third element being a short utf8 encoded string summary of the event. This list of tuples should be assignable to a python list using: ast.literal_eval(). Only this list should be returned."
  prompt = "Can you read this book and find the forty most important events? From those events, can you find the main emotion portrayed? Given the events and emotions, can you create a python list of tuples, enclosing all strings in double-quotation instead of single-quotation marks, with the first element in each tuple being the emotion as a string, the second element being your ranking of that emotion on a scale of -1 to +1, with +1 being the most positive emotion, and the third element being a short utf8 encoded string summary of the event. This list of tuples should be assignable to a python list using: ast.literal_eval(). Only this list should be returned."
  prompt = "You are an english literature professor. Please read this book and find the most important events. From those events, find the main emotion portrayed. Given the events and emotions, can you create a valid python list of tuples with all string elements enclosed in double-quotation marks instead, not single-quotation marks. The first element in each tuple is the emotion as a string, the second element is your ranking of that emotion on a scale of -1 to +1, with +1 being the most positive emotion and -1 being the most negative emotion, and the third element being a short utf8-encoded string summary of the event. This list of tuples should be assignable to a python list using: ast.literal_eval(). Only this list should be returned."
  prompt = "You are an english literature professor. Please read this book and find near forty most important events. From those events, find the main emotion portrayed. Given the events and emotions, can you create a python list of tuples. The first element in each tuple is the emotion as a string enclosed in double-quotation marks, the second element is your ranking of that emotion on a scale of -1 to +1, with +1 being the most positive emotion and -1 being the most negative emotion, and the third element being a short utf8-encoded string summary of the event enclosed in double-quotation marks. This list of tuples should be assignable to a python list using: ast.literal_eval(). Only this list should be returned."

  try:
    response = model.generate_content([prompt, text])
    ret_emotions = response.text.replace('’', '')
    #ret_emotions = ret_emotions.replace("'s", 's')
    #ret_emotions = ret_emotions.replace("'t", 't')

    lstLiteral = ret_emotions[ret_emotions.find('['):ret_emotions.find(']')+1]
    res = ast.literal_eval(lstLiteral)
    print(res)
    print(row['id'])

    df.loc[df['id']==row['id'], 'emotions'] = str([i[0] for i in res])
    df.loc[df['id']==row['id'], 'emotion_scores'] = str([i[1] for i in res])
    df.loc[df['id']==row['id'], 'summaries'] = str([i[2] for i in res])


  except Exception as e:
    print("ERROR - id:", row['id'], row['title'], e)
    if "response.parts" in e:
      print(response.prompt_feedback)





The Declaration of Independence of the United States of America already processed.
The United States Bill of Rights: The Ten Original Amendments to the Constitution of the United States already processed.
John F. Kennedy's Inaugural Address already processed.
Lincoln's Gettysburg Address: Given November 19, 1863 on the battlefield near Gettysburg, Pennsylvania, USA already processed.
The United States Constitution already processed.
Give Me Liberty or Give Me Death already processed.
Abraham Lincoln's Second Inaugural Address already processed.
Abraham Lincoln's First Inaugural Address already processed.
Alice's Adventures in Wonderland already processed.
Through the Looking-Glass already processed.
The Hunting of the Snark: An Agony in Eight Fits already processed.
Processing 14 - The 1990 CIA World Factbook by United States. Central Intelligence Agency
sleeping for 30 seconds
ERROR - id: 14 The 1990 CIA World Factbook 504 Deadline Exceeded
Processing 15 - Moby-Dick; or, The Whale by 

KeyboardInterrupt: 

In [None]:
df.to_parquet("../streamlit/data/gutenberg_books.parquet", index=False)
df.head()

Unnamed: 0,id,title,author,author_birth,author_death,txt_link,emotions,emotion_scores,summaries
0,1,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,https://www.gutenberg.org/ebooks/1.txt.utf-8,"['anger', 'anger', 'anger', 'anger', 'anger', ...","[-0.8, -0.7, -0.9, -0.8, -0.9, -0.7, -0.8, -0....",['The King has refused to pass laws that are g...
1,2,The United States Bill of Rights: The Ten Orig...,United States,,,https://www.gutenberg.org/ebooks/2.txt.utf-8,"['Contentment', 'Empowerment', 'Relief', 'Secu...","[0.7, 0.8, 0.6, 0.9, 0.7, 0.8, 0.6, 0.7, 0.6, ...",['The Bill of Rights ensures freedom of religi...
2,3,John F. Kennedy's Inaugural Address,"Kennedy, John F. (John Fitzgerald)",1917.0,1963.0,https://www.gutenberg.org/ebooks/3.txt.utf-8,"['Hope', 'Pride', 'Determination', 'Loyalty', ...","[0.8, 0.6, 0.9, 0.7, -0.3, 0.5, 0.4, 0.5, 0.8,...","['Celebration of freedom and renewal', 'Americ..."
3,4,Lincoln's Gettysburg Address: Given November 1...,"Lincoln, Abraham",1809.0,1865.0,https://www.gutenberg.org/ebooks/4.txt.utf-8,"['Pride', 'Determination', 'Respect', 'Reveren...","[0.8, 0.9, 0.7, 1.0, 0.3, 0.9, 0.8, 0.6, 0.9, ...",['The founding fathers created a nation concei...
4,5,The United States Constitution,United States,,,https://www.gutenberg.org/ebooks/5.txt.utf-8,"['Pride', 'Hope', 'Determination', 'Liberty', ...","[0.5, 0.7, 0.6, 0.9, 0.4, 0.8, 0.5, 0.3, 0.4, ...",['The establishment of the United States Const...


In [None]:
# df['emotions'] = ""
# df['emotion_scores'] = ""
# df['summaries'] = ""

In [None]:
df.head(15)

Unnamed: 0,id,title,author,author_birth,author_death,txt_link,emotions,emotion_scores,summaries
0,1,The Declaration of Independence of the United ...,"Jefferson, Thomas",1743.0,1826.0,https://www.gutenberg.org/ebooks/1.txt.utf-8,"['anger', 'anger', 'anger', 'anger', 'anger', ...","[-0.8, -0.7, -0.9, -0.8, -0.9, -0.7, -0.8, -0....",['The King has refused to pass laws that are g...
1,2,The United States Bill of Rights: The Ten Orig...,United States,,,https://www.gutenberg.org/ebooks/2.txt.utf-8,"['Contentment', 'Empowerment', 'Relief', 'Secu...","[0.7, 0.8, 0.6, 0.9, 0.7, 0.8, 0.6, 0.7, 0.6, ...",['The Bill of Rights ensures freedom of religi...
2,3,John F. Kennedy's Inaugural Address,"Kennedy, John F. (John Fitzgerald)",1917.0,1963.0,https://www.gutenberg.org/ebooks/3.txt.utf-8,"['Hope', 'Pride', 'Determination', 'Loyalty', ...","[0.8, 0.6, 0.9, 0.7, -0.3, 0.5, 0.4, 0.5, 0.8,...","['Celebration of freedom and renewal', 'Americ..."
3,4,Lincoln's Gettysburg Address: Given November 1...,"Lincoln, Abraham",1809.0,1865.0,https://www.gutenberg.org/ebooks/4.txt.utf-8,"['Pride', 'Determination', 'Respect', 'Reveren...","[0.8, 0.9, 0.7, 1.0, 0.3, 0.9, 0.8, 0.6, 0.9, ...",['The founding fathers created a nation concei...
4,5,The United States Constitution,United States,,,https://www.gutenberg.org/ebooks/5.txt.utf-8,"['Pride', 'Hope', 'Determination', 'Liberty', ...","[0.5, 0.7, 0.6, 0.9, 0.4, 0.8, 0.5, 0.3, 0.4, ...",['The establishment of the United States Const...
5,6,Give Me Liberty or Give Me Death,"Henry, Patrick",1736.0,1799.0,https://www.gutenberg.org/ebooks/6.txt.utf-8,"['Pride', 'Urgency', 'Fear', 'Hope', 'Distrust...","[0.7, 0.8, -0.6, 0.4, -0.8, -0.7, 0.9, -0.5, 0...",['Patrick Henry expresses pride in the patriot...
6,8,Abraham Lincoln's Second Inaugural Address,"Lincoln, Abraham",1809.0,1865.0,https://www.gutenberg.org/ebooks/8.txt.utf-8,"['anticipation', 'confidence', 'hope', 'anxiet...","[0.2, 0.5, 0.8, -0.4, -0.6, 0.7, -0.8, 0.3, -0...","[""Lincoln's second inaugural address"", 'Progre..."
7,9,Abraham Lincoln's First Inaugural Address,"Lincoln, Abraham",1809.0,1865.0,https://www.gutenberg.org/ebooks/9.txt.utf-8,"['Anticipation', 'Reassurance', 'Clarity', 'Pr...","[0.5, 0.75, 0.6, 0.75, -0.5, 0.4, 0.5, 0.6, 0....",['The incoming administration is aware of anxi...
8,11,Alice's Adventures in Wonderland,"Carroll, Lewis",1832.0,1898.0,https://www.gutenberg.org/ebooks/11.txt.utf-8,"['curiosity', 'confusion', 'fear', 'sadness', ...","[0.5, -0.2, -0.7, -0.5, 0.4, -0.4, 0.3, 0.6, -...",['Alice follows the White Rabbit down the rabb...
9,12,Through the Looking-Glass,"Carroll, Lewis",1832.0,1898.0,https://www.gutenberg.org/ebooks/12.txt.utf-8,"['Joy', 'Curiosity', 'Confusion', 'Frustration...","[0.7, 0.5, -0.3, -0.4, 0.6, -0.2, 0.4, -0.5, -...","['Alice enters the looking-glass world.', 'Ali..."


In [None]:
#How many df['emotion_scores'] are not empty?
df[df['emotion_scores'] != ''].shape[0]

37