In [12]:
from langchain_google_genai.llms import GoogleGenerativeAI
import pandas as pd
import json

from tqdm import tqdm
from IPython.display import display, Markdown

import os
import time
from dotenv import load_dotenv

In [13]:
load_dotenv()

True

In [14]:
llm = GoogleGenerativeAI(model="gemini-2.5-flash")

In [15]:
response = llm.invoke("Summarise the following Tolkien quote in simple English in a paragraph: 'Not all those who wander are lost.'")

In [16]:
display(Markdown(response))

This famous quote suggests that just because someone appears to be moving without a clear or obvious destination, or taking an indirect path, it doesn't mean they are confused, without purpose, or truly lost. Instead, they might be exploring, seeking new experiences, finding their own unique way, or simply enjoying the journey. Their unconventional path could be exactly where they need to be, leading them to something important or meaningful, even if others don't immediately understand their direction.

In [17]:
quotes_df = pd.read_json("quotes_with_prompt.json", lines=True).T
quotes_df.rename(columns={0:"info"}, inplace=True)
quotes_df

Unnamed: 0,info
0,"{'quote': 'All that is gold does not glitter, ..."
1,{'quote': 'Not all those who wander are lost. ...
2,{'quote': 'I wish it need not have happened in...
3,{'quote': 'I don't know half of you half as we...
4,{'quote': 'All we have to decide is what to do...
...,...
2989,{'quote': 'He that breaks a thing to find out ...
2990,"{'quote': 'Home is behind, the world ahead, An..."
2991,{'quote': 'There is more in you of good than y...
2992,"{'quote': 'Good Morning!"" said Bilbo, and he m..."


In [18]:
# indexing format: quotes_df[column][row][dict_key]
quotes_df["info"][0]["prompt_template"]

'Summarize the following quote in simple English in a paragraph: All that is gold does not glitter,\nNot all those who wander are lost;\nThe old that is strong does not wither,\nDeep roots are not reached by the frost.\n\nFrom the ashes a fire shall be woken,\nA light from the shadows shall spring;\nRenewed shall be blade that was broken,\nThe crownless again shall be king.\n'

In [19]:
# constants
output_dir = "summaries/"
batch_size = 100

In [20]:
# make sure summaries folder exists
os.makedirs(output_dir, exist_ok=True)

In [21]:
with open("quotes_with_prompt.json", "r", encoding="utf-8") as f:
    quotes = json.load(f)

quotes_with_summary = []

for start in range(0, len(quotes), batch_size):
    end = min(start + batch_size, len(quotes))
    print(f"Processing batch {start} to {end - 1}")

    batch = []

    for i in tqdm(range(start, end)):
        key = str(i)
        quote = quotes[key]

        # skip already generated summaries
        if "summary" in quote:
            batch.append(quote)
            quotes_with_summary.append(quote)
            continue

        # generate summary
        summary = llm.invoke(quote["prompt_template"])
        quote["summary"] = summary

        # Append the updated quote
        batch.append(quote)
        quotes_with_summary.append(quote)

        time.sleep(2)

    # Save the current batch
    batch_file = os.path.join(output_dir, f"summary_{start}_{end-1}.json")
    with open(batch_file, "w", encoding="utf-8") as f:
        json.dump(batch, f, ensure_ascii=False)
    print(f"Saved batch to {batch_file}")

    # Save full updated dataset
    with open("quotes_with_summary.json", "w", encoding="utf-8") as f:
        json.dump(quotes_with_summary, f, ensure_ascii=False)
    print("Updated quotes_with_summary.json")

Processing batch 0 to 99


100%|██████████| 100/100 [13:19<00:00,  8.00s/it]


Saved batch to summaries/summary_0_99.json
Updated quotes_with_summary.json
Processing batch 100 to 199


 36%|███▌      | 36/100 [04:51<08:30,  7.98s/it]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 250
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 45
}
].
 38%|███▊      | 38/100 [05:08<08:29,  8.21s/it]Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it rai

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. [violations {
  quota_metric: "generativelanguage.googleapis.com/generate_content_free_tier_requests"
  quota_id: "GenerateRequestsPerDayPerProjectPerModel-FreeTier"
  quota_dimensions {
    key: "model"
    value: "gemini-2.5-flash"
  }
  quota_dimensions {
    key: "location"
    value: "global"
  }
  quota_value: 250
}
, links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, retry_delay {
  seconds: 25
}
]