In [12]:
import pandas as pd
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langfuse.callback import CallbackHandler
import os

load_dotenv()

handler = CallbackHandler(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY"))

S2W_SAMPLE = "../data/s2w_sample_improved.csv"
S2W_SUMMARIZED = "../data/s2w_summarized.csv"

MODEL = "gpt-3.5-turbo-instruct"

In [13]:
s2w_sample = pd.read_csv(S2W_SAMPLE)
s2w_sample.drop(columns=['Unnamed: 0'], inplace=True)

s2w_complex = pd.read_csv(S2W_SUMMARIZED)
s2w_complex.drop(columns=['Unnamed: 0'], inplace=True)


In [14]:
from notebooks.prompts import SUMMARY_PROMPT_FS

def create_summary(id, prompt=SUMMARY_PROMPT_FS, temp=0.7):

    llm = OpenAI(model_name=MODEL,temperature=temp)
    chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])

    summaries = s2w_sample[s2w_sample["screenId"]==id]["summary"].values
    summaries_str = "\n".join([f"Summary {i+1}: {summary}" for i, summary in enumerate(summaries)])

    try: 
        response = chain.run(summaries=summaries_str, callbacks=[handler])
    except Exception as e:
        print(e)
    
    return response

In [15]:
screens = s2w_sample["screenId"].unique()
for screen_id in screens:
    result = create_summary(screen_id)
    new_row = [screen_id, result]
    s2w_complex.loc[len(s2w_complex)] = new_row
    print(f"{screen_id}: DONE")


300: DONE
33: DONE
486: DONE
494: DONE
498: DONE
549: DONE
70: DONE
761: DONE
596: DONE
495: DONE


In [16]:
s2w_complex.to_csv("../data/s2w_summarized.csv")