In [59]:
from dotenv import load_dotenv
load_dotenv()

import pandas as pd
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI

In [108]:
#"gemini-2.0-flash-thinking-exp-01-21"
#config = {'thinking_config': {'include_thoughts': True}}

#"gemini-2.0-flash-exp"

llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp",
                             temperature=0)
df = pd.read_excel("query_result_2025-01-25T07_49_01.047868Z.xlsx")
df = df[df.abstract.notna()]

  warn("Workbook contains no default style, apply openpyxl's default")


In [109]:
with open("prompt.txt", "r") as f:
    prompt = f.read()
prompt = PromptTemplate(input_variables=['PROPOSAL_INFO'],template=prompt)

chain = prompt | llm | JsonOutputParser()

proposal_info_columns = [ 'title'
  , 'abstract'
  , 'detailed_description'
  , 'outline'
  , 'objective']

In [129]:
result = []

In [132]:
#10 RPM
import time

processed_proposals = set(r.get('proposal_id') for r in result)
print(processed_proposals)

for proposal_id in df.proposal_id.unique():
    # Skip if already processed
    if proposal_id in processed_proposals:
        print(f"Skipping already processed proposal: {proposal_id}")
        continue
        
    print(f"Processing proposal: {proposal_id}")
    proposal_info = df[df.proposal_id == proposal_id][proposal_info_columns][:1].to_dict(orient='records')[0]
    proposal_info_str = [f'{x}: {proposal_info[x]}' for x in proposal_info_columns]
    chain = prompt | llm | JsonOutputParser()
    executive_method = chain.invoke({"PROPOSAL_INFO": proposal_info_str})
    
    # Add proposal_id to the result for tracking
    executive_method['proposal_id'] = proposal_id
    result.append(executive_method)
    time.sleep(6)

{2185846315039064832, 2185844604283126528, 2185842091785978624, 2205136900467983104, 2208409881428361984, 2185845802830660352, 2197952451796009728, 2181661100230050560, 2209040357629362944, 2188525315913941760, 2183455404749488896, 2180814504890204928, 2203075043099935488, 2202583607111844608, 2201815482904871680, 2197262136667800320, 2209120431347073792}
Skipping already processed proposal: 2185846315039064832
Skipping already processed proposal: 2185844604283126528
Skipping already processed proposal: 2205136900467983104
Skipping already processed proposal: 2208409881428361984
Skipping already processed proposal: 2185842091785978624
Skipping already processed proposal: 2185845802830660352
Skipping already processed proposal: 2197952451796009728
Skipping already processed proposal: 2181661100230050560
Skipping already processed proposal: 2209040357629362944
Skipping already processed proposal: 2188525315913941760
Skipping already processed proposal: 2197262136667800320
Skipping alread

In [177]:
Counter([ x['vote'] for x in result])


Counter({'+0': 10, '-0': 4, '+1': 2, '-1': 1})

# flash thinking

In [160]:
import os
from google import genai

client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"], http_options={'api_version':'v1alpha'})

config = {'thinking_config': {'include_thoughts': True}, 'temperature': 0}


In [162]:
thinking_result = []

for proposal_id in df.proposal_id.unique():
    print(proposal_id)
    proposal_info = df[df.proposal_id == proposal_id][proposal_info_columns][:1].to_dict(orient='records')[0]
    proposal_info_str = [f'{x}: {proposal_info[x]}' for x in proposal_info_columns]

    response = client.models.generate_content(
        model='gemini-2.0-flash-thinking-exp',
        contents=prompt.invoke(
            {"PROPOSAL_INFO": proposal_info_str}).text,
        config=config
)

    thinking_result.append(response)

2185846315039064832
2185844604283126528
2205136900467983104
2208409881428361984
2185842091785978624
2185845802830660352
2197952451796009728
2181661100230050560
2209040357629362944
2188525315913941760
2197262136667800320
2209120431347073792
2201815482904871680
2203075043099935488
2183455404749488896
2180814504890204928
2202583607111844608


In [169]:
print(thinking_result[0].text)

```json
{
  "comment": "提案主題關於 LLM 評估很有趣，但與 Python 的關聯性較弱。建議明確說明如何使用 Python 進行 LLM 實驗、數據分析或模型互動。例如，可以展示 Python 程式碼範例，介紹相關的 Python 函式庫，並深入探討 Python 在 LLM 評估中的技術細節。加強 Python 技術深度將使提案更符合 PyCon TW 主題。",
  "vote": "-0"
}
```


In [171]:
import json

parsed_result = [json.loads(x.text.strip('```json').strip('```').strip()) for x in thinking_result]

In [176]:
from collections import Counter
Counter([ x['vote'] for x in parsed_result])



Counter({'+0': 9, '-0': 4, '+1': 3, '-1': 1})

In [None]:
Counter({'+0': 10, '-0': 4, '+1': 2, '-1': 1})
Counter({'+0': 9, '-0': 4, '+1': 3, '-1': 1}) #thinking

In [None]:
pivot_df = df.pivot_table(
    index='proposal_id',
    columns=df.groupby('proposal_id').cumcount(),
    values='comment',
    aggfunc='first'
)
pivot_df.columns = [f'comment_{i+1}' for i in range(pivot_df.shape[1])]
pivot_df.reset_index(inplace=True)


In [212]:
# Create DataFrame from LLM results
regular_result_df = pd.DataFrame(result)

# Rename columns to distinguish results
regular_result_df = regular_result_df.add_prefix('LLM_')

proposal_df = df[['proposal_id'] + proposal_info_columns].drop_duplicates(subset=['proposal_id']).sort_values(by='proposal_id', ascending=False)

# pivot_df.merge(regular_result_df, left_on='proposal_id', right_on='LLM_proposal_id', how='left').merge(
#     proposal_df, on='proposal_id', how='left')

final_df = proposal_df.merge(regular_result_df, left_on='proposal_id', right_on='LLM_proposal_id', how='left').merge(
    pivot_df, on='proposal_id', how='left').sort_values(by='proposal_id', ascending=False)


In [214]:
final_df['human_eval'] = ''
final_df.to_excel('final_df_0127.xlsx', index=False)

In [217]:
final_df

Unnamed: 0,proposal_id,title,abstract,detailed_description,outline,objective,LLM_comment,LLM_vote,LLM_proposal_id,comment_1,comment_2,comment_3,comment_4,comment_5,comment_6,comment_7,human_eval
0,2209120431347073792,Cracking the Code: Decoding Anti-Bot Systems!,Join me for a masterclass where I share the my...,Join me for a masterclass where I share the my...,"The presentation spans 35 minutes, followed by...","Web scraping is gaining momentum, particularly...",這個提案的主題非常有趣且實用，特別是在當前網路爬蟲技術日益重要的背景下。提案詳細介紹了反爬蟲...,0,2209120431347073792,Thanks for this comprehensive proposal. I also...,I did look into the video talk. And together w...,,,,,,
1,2209040357629362944,自動化電話錄音評測系統：提升超過 30 倍生產力,公司每天有上百通電話錄音需要進行稽核，以往僅能透過抽查的方式進行人工稽核，於是利用 STT(...,STT 工具：\r\nhttps://github.com/openai/whisper\r...,"5min 既有流程簡介\r\n5min 簡介 STT, LLM\r\n10min 程式碼分享...",幫助對於在客服或是電銷團隊能夠加速稽核流程，透過 Python 開源套件能在短時間建構出適合...,"這個提案展示了如何利用 Python 開源工具 (STT, LLM) 解決實際業務問題，具備...",1,2209040357629362944,The outline is quite sparse and lacks substant...,這份提案介紹了一個自動化電話錄音評測系統，利用STT（Speech To Text）和LLM...,,,,,,
2,2208409881428361984,Eco Smart Guide: 透過 Python 打造一站式 ESG 資訊查詢及報告生成平台,隨著企業越來越重視環境、社會和治理（ESG）的可持續發展目標，對於準確且全面的 ESG 資訊...,### Domain: ESG (環境、社會和治理)\r\nESG 指的是評估企業在環境保護...,* 引言：ESG的重要性、了解LLM與RAG [5 mins]\r\n * ESG對現...,本次演講目標是向以下受眾展示 LLM 和 RAG 技術在 ESG 資訊查詢和報告生成中的創新...,這個提案的主題非常有趣且具有現實意義，將 LLM 和 RAG 技術應用於 ESG 資訊查詢和...,0,2208409881428361984,看起來是在介紹LLM應用，但對於python主題看不出實際有何特殊處,The talk is undoubtedly fascinating and insigh...,,,,,,
3,2205136900467983104,oh my gai - 你的 GAI 資訊助手,生成式人工智慧(Generative Artificial Intelligence，後文簡...,### 提案大綱詳細說明\r\n1. GAI 簡介: 介紹 GAI 目前的應用，及開發過程可...,### 30 分鐘 \r\n1. 簡介生成式 AI [5min] \r\n2. 蒐集資料與使...,對於 GAI 有興趣或者是在工作中有使用到 GAI 的人來說，本演講將分享一個運用 GAI ...,這個提案的主題非常熱門且實用，利用 GAI 來處理 GAI 相關資訊，解決了資訊爆炸的問題。...,0,2205136900467983104,感覺上和python的關係較低，更像是在介紹AI應用而已,"While your platform ""oh my gai"" is an interest...",,,,,,
4,2203075043099935488,Query your structured data with a LangChain AI...,"In this talk, we will explore the capabilities...",LangChain: https://python.langchain.com/docs/g...,1. Intro to LLMs and prompt [5 min]\r\n2. What...,The intended audience of this talk are Python ...,這個提案的主題明確，聚焦於使用 LangChain 處理 Text-to-SQL 的問題，並...,0,2203075043099935488,LangChain 已經問世一年左右了，有好也有壞處。\r\n提案中未涉及LangChain...,,,,,,,
5,2202583607111844608,Driving Efficiency through Automation: Leverag...,Efficiently tackling some of the resource prov...,"During this session, I'll provide a comprehens...",1. Introduction: (2 minutes)\r\n . Briefly ...,By codifying our infrastructure configurations...,這個提案的主題明確，聚焦於使用 Python CDK 在 AWS 上進行基礎設施自動化，與 ...,0,2202583607111844608,I like the topic of infrastructure as code and...,,,,,,,
6,2201815482904871680,Building Retrieval Augmented Generation (RAG) ...,Retrieval Augmented Generation (RAG) has emerg...,What is RAG: https://medium.com/building-the-o...,1. Intro to LLMs and prompt [10 min]\r\n2. Wha...,The intended audience of this talk are Python ...,這個提案的主題非常熱門且與當前趨勢相關，RAG 和 LangChain 都是目前 AI 領域...,0,2201815482904871680,1. 技術創新度：\r\n雖然涉及使用 LangChain 構建RAG應用，這是目前在自然語...,,,,,,,
7,2197952451796009728,"Introduction to Threads, Processes, Coroutines...",We typically execute our Python programs seque...,- [Threads](https://docs.python.org/3/library/...,- Introduction\r\n- Concurrency and Parallelis...,The python developers want to learn more about...,"這個提案涵蓋了 Python 中並行和並發的重要概念，包括 threads, process...",1,2197952451796009728,round 2 update: 提案者沒有改善提案。\r\n\r\n\r\n==\r\n1....,,,,,,,
8,2197262136667800320,利用 LangChain 與 FastAPI 實現 LLM 數位孿生系統,聊天機器人是大語言模型產品（如 ChatGPT 等）的常見使用界面，被視為一種對話式的使用者...,講者將以自己開發的開源框架 (https://github.com/towningtek/L...,- 對話式的使用者介面 (Conversational User Interface (CU...,Python 開發者在面對大型語言模型（LLM）等生成式 AI 專案開發時，經常面臨提示工程...,這個提案的主題非常有趣且實用，結合了 LangChain 和 FastAPI 來構建 LLM...,0,2197262136667800320,While the topic of integrating LangChain and F...,,,,,,,
9,2188525315913941760,Meeting-itis,We no longer know how to read and write. Inste...,"In this talk, I will try to bring your attenti...","1. meeting-itis - 2m\r\n2. Bible, Iliad and ...",Bring attention that the lack of written commu...,這個提案的主題雖然有趣且具社會意義，但與 PyCon 的主題關聯性薄弱。它主要探討的是溝通模...,-1,2188525315913941760,While the impact of video meetings on focus an...,此演講提案的主題與Python並無直接關聯，而是著重在討論現代溝通方式對專注力與知識保留的影...,,,,,,
