In [1]:
import pandas as pd
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langfuse.callback import CallbackHandler
import os

load_dotenv()

handler = CallbackHandler(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY"))

S2W_SAMPLE = "../data/s2w_sample_improved.csv"
S2W_FULL = "../data/Screen2Words.csv"
S2W_SUMMARIZED = "../data/s2w_summarized.csv"

MODEL = "gpt-3.5-turbo-instruct"

In [2]:
s2w_sample = pd.read_csv(S2W_SAMPLE)
s2w_sample.drop(columns=['Unnamed: 0'], inplace=True)

s2w_full = pd.read_csv(S2W_FULL)
s2w_full.drop(columns=['Unnamed: 0'], inplace=True)

s2w_complex = pd.read_csv(S2W_SUMMARIZED)
s2w_complex.drop(columns=['Unnamed: 0'], inplace=True)


In [3]:
from notebooks.prompts import SUMMARY_PROMPT_FS_FILTERED, SUMMARY_PROMPT_FS_FULL

def create_summary(id, handler, prompt=SUMMARY_PROMPT_FS_FILTERED, temp=0.7, complete_s2w = False):

    llm = OpenAI(model_name=MODEL,temperature=temp)

    if complete_s2w:
        summaries = s2w_full[s2w_full["screenId"]==id]["summary"].values
        prompt = SUMMARY_PROMPT_FS_FULL
    else: 
        summaries = s2w_sample[s2w_sample["screenId"]==id]["summary"].values
        prompt = SUMMARY_PROMPT_FS_FILTERED
    
    chain = LLMChain(llm=llm, prompt=prompt, callbacks=[handler])

    
    summaries_str = "\n".join([f"Summary {i+1}: {summary}" for i, summary in enumerate(summaries)])
    
    try: 
        response = chain.run(summaries=summaries_str, callbacks=[handler])
    except Exception as e:
        print(e)
    
    return response

In [4]:
s2w_complex

Unnamed: 0,screenId,s2w_summary,llm_summary,s2w_full,s2w_single
0,300,The mobile screen is a pop-up in a learning ap...,"Based on the screenshot provided, the mobile p...",The mobile screen is part of a learning applic...,page showing quiz in an learning application
1,33,The mobile screen displays different buttons f...,"Based on the screenshot provided, the mobile p...",The Screen is the home page of an application ...,page showing variety of numbers
2,486,The Screen displays a pop-up with multiple sha...,The mobile page is displaying a popup with the...,The screen is a pop-up in a social app that di...,page displaying multiple share options
3,494,The mobile screen is part of a music app and a...,The mobile page is a playlist creation interfa...,The mobile screen is part of a music app and d...,page displaying the create a playlist
4,498,The screen is a pop-up in a podcast applicatio...,The screenshot shows a podcast application wit...,The screen is a pop-up alert message in a podc...,pop up to delete podcast
5,549,The mobile screen is a settings page for a rel...,The mobile page is a settings screen of an app...,The mobile screen is part of a religious app a...,multiple options shown in settings
6,70,The screen is part of a scientific app and dis...,The mobile page displayed appears to be a vide...,The mobile screen is part of an app that displ...,menu of videos in the nasa app
7,761,The mobile screen is part of a settings app an...,The screenshot appears to be from a voice reco...,"The Screen is part of a settings app, displayi...",page showing different setting options on an app
8,596,The mobile screen is part of a printer app and...,The mobile page shown appears to be a settings...,The screen is part of a printing application a...,display shows advanced categories page in prin...
9,495,The screen is part of a podcast application th...,"Based on the screenshot provided, the mobile p...",The screen is part of an entertainment applica...,displaying episodes i like screen of a podcast...


In [5]:
import random
from langfuse.client import Langfuse


def summaries_pipeline():
    screens = s2w_sample["screenId"].unique()
    for screen_id in screens:
        langfuse = Langfuse()
        trace = langfuse.trace(name=f"summary_creation_{screen_id}_filtered")
        filtered_result = create_summary(screen_id, complete_s2w=False, handler=trace.get_langchain_handler())

        langfuse = Langfuse()
        trace = langfuse.trace(name=f"summary_creation_{screen_id}_full")
        full_result = create_summary(screen_id, complete_s2w=True, handler=trace.get_langchain_handler())

        single_result = random.choice(s2w_sample[s2w_sample["screenId"]==screen_id]["summary"].values)
        
        new_row = [screen_id,filtered_result,"LLM_SUMMARY",full_result,single_result]
        s2w_complex.loc[len(s2w_complex)] = new_row
        print(f"{screen_id}: DONE")


In [6]:
summaries_pipeline()

11317: DONE
11131: DONE
25415: DONE
15711: DONE
32607: DONE
25749: DONE
30627: DONE
18880: DONE
14453: DONE
22180: DONE
17148: DONE
13612: DONE
34797: DONE
20559: DONE
27228: DONE
18719: DONE
35725: DONE
33177: DONE
31189: DONE
15074: DONE
24218: DONE
12878: DONE
12810: DONE
17908: DONE
13296: DONE
17651: DONE
23663: DONE
20950: DONE
21268: DONE
20145: DONE
20807: DONE
22996: DONE
29222: DONE
2181: DONE
25174: DONE
17896: DONE
3456: DONE
300: DONE
3501: DONE
27595: DONE
22605: DONE
12179: DONE
18877: DONE
19984: DONE
27064: DONE
10535: DONE
19704: DONE
23501: DONE
3546: DONE
26080: DONE
27104: DONE
33250: DONE
32740: DONE
34702: DONE
28001: DONE
10307: DONE
33: DONE
13658: DONE
28847: DONE
21654: DONE
21292: DONE
3332: DONE
2509: DONE
15316: DONE
32861: DONE
27744: DONE
29664: DONE
13354: DONE
31080: DONE
28528: DONE
34381: DONE
30193: DONE
24692: DONE
30998: DONE
31715: DONE
13495: DONE
23200: DONE
32739: DONE
1002: DONE
23673: DONE
20123: DONE
15019: DONE
22896: DONE
30986: DONE
1368

In [7]:
s2w_complex.to_csv("../data/s2w_summarized.csv")