In [1]:
import os
import time
from collections import Counter
from dotenv import load_dotenv
load_dotenv()

#reload the api key if you want to use a different api key
#load_dotenv(override=True)

from pydantic import BaseModel
from typing import Literal

import pandas as pd
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
proposal_df = pd.read_excel('data/pycon_2024_proposal.xlsx', dtype={'id': str})

  warn("Workbook contains no default style, apply openpyxl's default")


In [3]:
proposal_df.id.is_unique

True

# Set the prompt file and model

In [4]:
simple_prompt_file = "prompt/simple_prompt.txt"
full_prompt_file = "prompt/full_prompt.txt"

flash_model = "gemini-2.0-flash-exp"
pro_model = "gemini-2.0-pro-exp-02-05"

exec_prompt = full_prompt_file
exec_model = flash_model

In [5]:
proposal_info_columns = [ 'title'
  , 'abstract'
  , 'detailed_description'
  , 'outline'
  , 'objective']

class ProposalReview(BaseModel):
    """Review for a PyCon proposal"""
    summary: str
    comment: str
    vote: Literal['+1', '+0', '-0', '-1']

with open(exec_prompt, "r") as f:
    prompt_template = f.read()

prompt = PromptTemplate(input_variables=['PROPOSAL_INFO'], template=prompt_template)

llm = ChatGoogleGenerativeAI(model=exec_model, temperature=0)
structured_llm = llm.with_structured_output(ProposalReview)
chain = prompt | structured_llm

# Initialize

In [6]:
result = []
sleep_time = 20

In [7]:
processed_proposals = set(r.get('proposal_id') for r in result)

for proposal_id in proposal_df.id:
    start_time = time.time()
    if proposal_id in processed_proposals:
        print(f"Skipping already processed proposal: {proposal_id}")
        continue
        
    print(f"Processing proposal: {proposal_id}")
    proposal_info = proposal_df[proposal_df.id == proposal_id][proposal_info_columns].to_dict(orient='records')[0]
    
    max_retries = 6
    for attempt in range(max_retries):
        try:
            review = chain.invoke({"PROPOSAL_INFO": str(proposal_info)})
            review_dict = review.model_dump()
            review_dict['proposal_id'] = proposal_id
            result.append(review_dict)
            break
        except Exception as e:
            print(f"LLM invoke failed for proposal {proposal_id} (Attempt {attempt + 1}/{max_retries}): {e}")
            if attempt == max_retries - 1:
                raise Exception(f"Max retries ({max_retries}) exceeded for proposal {proposal_id}")
            time.sleep(sleep_time)
    
    exec_time = time.time() - start_time
    print(f"Execution time for proposal {proposal_id}: {exec_time:.2f} seconds\n")

Processing proposal: 2178936233474917120
Execution time for proposal 2178936233474917120: 3.79 seconds

Processing proposal: 2178979854999880448
Execution time for proposal 2178979854999880448: 2.45 seconds

Processing proposal: 2178981707884004096
Execution time for proposal 2178981707884004096: 3.00 seconds

Processing proposal: 2179613228097602304
Execution time for proposal 2179613228097602304: 2.34 seconds

Processing proposal: 2180814504890204928
Execution time for proposal 2180814504890204928: 2.65 seconds

Processing proposal: 2181661100230050560
Execution time for proposal 2181661100230050560: 2.87 seconds

Processing proposal: 2181767915890541312
Execution time for proposal 2181767915890541312: 3.48 seconds

Processing proposal: 2183455404749488896
Execution time for proposal 2183455404749488896: 1.94 seconds

Processing proposal: 2184258951825064704
Execution time for proposal 2184258951825064704: 2.57 seconds

Processing proposal: 2184265081062163200
Execution time for prop

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2185844604283126528 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).
Execution time for proposal 2185844604283126528: 25.08 seconds

Processing proposal: 2185845802830660352
Execution time for proposal 2185845802830660352: 2.46 seconds

Processing proposal: 2185846315039064832
Execution time for proposal 2185846315039064832: 2.46 seconds

Processing proposal: 2186438925994689280
Execution time for proposal 2186438925994689280: 2.36 seconds

Processing proposal: 2186443284254032640
Execution time for proposal 2186443284254032640: 2.46 seconds

Processing proposal: 2186467836912730880
Execution time for proposal 2186467836912730880: 2.56 seconds

Processing proposal: 2188433923833332480
Execution time for proposal 2188433923833332480: 2.05 seconds

Processing proposal: 2188525315913941760
Execution time for proposal 2188525315913941760: 2.56 seconds

Processing proposal: 2189264747189240576
Execution time for proposal 2189264747189240576: 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2192450893792674560 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Execution time for proposal 2192450893792674560: 27.15 seconds

Processing proposal: 2192916629635990272
Execution time for proposal 2192916629635990272: 2.28 seconds

Processing proposal: 2192921804903809792
Execution time for proposal 2192921804903809792: 2.54 seconds

Processing proposal: 2194950561579664128
Execution time for proposal 2194950561579664128: 2.81 seconds

Processing proposal: 2195629600740999936
Execution time for proposal 2195629600740999936: 2.95 seconds

Processing proposal: 2195682525014131456
Execution time for proposal 2195682525014131456: 2.97 seconds

Processing proposal: 2195793247056429824
Execution time for proposal 2195793247056429824: 2.77 seconds

Processing proposal: 2197219917860700928
Execution time for proposal 2197219917860700928: 2.66 seconds

Processing proposal: 2197262136667800320
Execution time for proposal 2197262136667800320: 2.44 seconds

Processing proposal: 2197952451796009728
Execution time for proposal 2197952451796009728: 2.26 seconds



Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2198581276154266368 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


Execution time for proposal 2198581276154266368: 27.65 seconds

Processing proposal: 2201589261063422720
Execution time for proposal 2201589261063422720: 2.46 seconds

Processing proposal: 2201815482904871680


Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2201815482904871680 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).
Execution time for proposal 2201815482904871680: 26.21 seconds

Processing proposal: 2202583607111844608
Execution time for proposal 2202583607111844608: 2.26 seconds

Processing proposal: 2202977686752592640
Execution time for proposal 2202977686752592640: 2.56 seconds

Processing proposal: 2203075043099935488
Execution time for proposal 2203075043099935488: 2.15 seconds

Processing proposal: 2204392707969778432
Execution time for proposal 2204392707969778432: 2.76 seconds

Processing proposal: 2205027665625219840
Execution time for proposal 2205027665625219840: 3.69 seconds

Processing proposal: 2205136900467983104
Execution time for proposal 2205136900467983104: 3.02 seconds

Processing proposal: 2205248731861746432
Execution time for proposal 2205248731861746432: 2.71 seconds

Processing proposal: 2205337041422516992
Execution time for proposal 2205337041422516992: 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2206984221174530816 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).
Execution time for proposal 2206984221174530816: 25.24 seconds

Processing proposal: 2207288307711214336
Execution time for proposal 2207288307711214336: 2.66 seconds

Processing proposal: 2208409881428361984
Execution time for proposal 2208409881428361984: 2.94 seconds

Processing proposal: 2208578622766187264
Execution time for proposal 2208578622766187264: 2.15 seconds

Processing proposal: 2208789199635612416
Execution time for proposal 2208789199635612416: 3.07 seconds

Processing proposal: 2208992563552060160
Execution time for proposal 2208992563552060160: 3.38 seconds

Processing proposal: 2209033186929804032
Execution time for proposal 2209033186929804032: 2.26 seconds

Processing proposal: 2209040357629362944
Execution time for proposal 2209040357629362944: 2.66 seconds

Processing proposal: 2209120431347073792
Execution time for proposal 2209120431347073792: 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2209459737420890880 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).
Execution time for proposal 2209459737420890880: 25.01 seconds

Processing proposal: 2209507247787082496
Execution time for proposal 2209507247787082496: 2.04 seconds

Processing proposal: 2209587078117720832
Execution time for proposal 2209587078117720832: 3.84 seconds

Processing proposal: 2209677975304012544
Execution time for proposal 2209677975304012544: 3.02 seconds

Processing proposal: 2210220044032410368
Execution time for proposal 2210220044032410368: 2.56 seconds

Processing proposal: 2210288503076422400
Execution time for proposal 2210288503076422400: 2.66 seconds

Processing proposal: 2210341851536294656
Execution time for proposal 2210341851536294656: 2.40 seconds

Processing proposal: 2210394752925303552
Execution time for proposal 2210394752925303552: 2.61 seconds

Processing proposal: 2210405674750313216
Execution time for proposal 2210405674750313216: 

Retrying langchain_google_genai.chat_models._chat_with_retry.<locals>._chat_with_retry in 2.0 seconds as it raised ResourceExhausted: 429 Resource has been exhausted (e.g. check quota)..


LLM invoke failed for proposal 2210443378934416128 (Attempt 1/6): 429 Resource has been exhausted (e.g. check quota).
Execution time for proposal 2210443378934416128: 24.59 seconds

Processing proposal: 2210457031310050048
Execution time for proposal 2210457031310050048: 2.88 seconds

Processing proposal: 2210461538760786688
Execution time for proposal 2210461538760786688: 3.47 seconds

Processing proposal: 2210513372791702272
Execution time for proposal 2210513372791702272: 2.73 seconds

Processing proposal: 2210517591254893312
Execution time for proposal 2210517591254893312: 2.74 seconds

Processing proposal: 2210697815674323712
Execution time for proposal 2210697815674323712: 2.93 seconds

Processing proposal: 2210750583986455296
Execution time for proposal 2210750583986455296: 2.67 seconds

Processing proposal: 2210778103544808192
Execution time for proposal 2210778103544808192: 2.35 seconds

Processing proposal: 2211064418454733568
Execution time for proposal 2211064418454733568: 

In [8]:
Counter(r.get('vote') for r in result)


Counter({'+0': 66, '+1': 8, '-0': 3})

In [9]:
#notice! int may cause overflow

# simple_df = pd.DataFrame(result)
# simple_df['proposal_id'] = simple_df['proposal_id'].astype(str)
# simple_df.to_excel('data/simple_prompt_gemini_flash_0216.xlsx', index=False)

complete_df = pd.DataFrame(result)
complete_df['proposal_id'] = complete_df['proposal_id'].astype(str)
complete_df.to_excel('data/full_prompt_gemini_flash_0216.xlsx', index=False)