In [100]:
import pandas as pd
from langfuse.callback import CallbackHandler
import os
from dotenv import load_dotenv
from langchain.prompts import PromptTemplate
import base64
import requests


load_dotenv()
handler = CallbackHandler(os.environ.get("LANGFUSE_PUBLIC_KEY"), os.environ.get("LANGFUSE_SECRET_KEY"))

MODEL = "gpt-4-vision-preview"

S2W_SAMPLE = "../data/s2w_sample.csv"
S2W = "../data/Screen2Words_tfidf.csv"
IMAGE_INPUT_FOLDER = "../data/s2w_sample1/"
CORRECT_SUMMARIES = "../data/s2w_sample_correct_summaries.csv"
S2W_IMPROVED = "../data/s2w_sample_improved.csv"


PROMPT = PromptTemplate(
    input_variables=["summaries"],
    template="""Based on the provided Screenshot of a mobile page and a set of summaries, that describe the given mobile screen, classify which summaries contain correct inforamation and which summaries incorrectly describe the screen: 

    {summaries}

    provide a python List element with the indexes of the correct summaries."""
)


In [101]:
s2w = pd.read_csv(S2W)
s2w.drop(columns=["Unnamed: 0"], inplace=True)

s2w_sample = pd.read_csv(S2W_SAMPLE)
s2w_sample.drop(columns=["Unnamed: 0"], inplace=True)

In [102]:
def encode_image(id):
    image_path = IMAGE_INPUT_FOLDER + f"{id}.jpg"
    print(image_path)
    with open(image_path,"rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')    

In [103]:


def detect_incorrect_labels(id):
    api_key = os.getenv("OPENAI_API_KEY")
    base64_image = encode_image(id)
    
    summaries = s2w[s2w["screenId"]==id]["summary"].values
    row_indexes = s2w.index[s2w['screenId'] == id].tolist()

    summaries_str = "\n".join([f"Summary {row_indexes[i]}: {summary}" for i, summary in enumerate(summaries)])


    print(base64_image)

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "system",
                "content": [
                {
                    "type": "text",
                    "text": "Given a screenshot of a mobile screen, you will receive five brief descriptions about it's content. Your task is to evaluate these descriptions for accuracy and compile a Python list containing the indexes of the summaries that correctly depict the mobile screen's details. Please distinguish between the summaries that accurately describe the screen and those that do not."
                }]
            },
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": f"{summaries_str}. "
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}"
                        }
                    },
                    {   "type": "text",
                        "text": "make sure to provide only the pyton list object. Nothing else"
                    }
                ]
            },
        ],
        "max_tokens": 150
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response

In [104]:
correct_summaries = pd.read_csv(CORRECT_SUMMARIES)
correct_summaries.drop(columns=["Unnamed: 0"], inplace=True)
 
screens = s2w_sample["screenId"].iloc[499:500].values
screens

array([72069], dtype=int64)

In [96]:
def quality_improvement_pipeline():
    screens = s2w_sample["screenId"].iloc[499:500].values
    for screen_id in screens:
        result = detect_incorrect_labels(screen_id)
        new_row = [screen_id, result.json().get("choices")[0].get("message").get("content")]
        correct_summaries.loc[len(correct_summaries)] = new_row
        print(f"{screen_id}: DONE")

In [97]:
quality_improvement_pipeline()

../data/s2w_sample1/72069.jpg
/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAeABDgDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACirmnadd6vqMGn2MTTXVw4SNB1JNe/eHv2c7EWccniHU53uWGWhtcKiH03EEt9eKAPnWivdfGX7Pzafp8t94avJboxqWa0nA3kDrtYYyfYivDGUqSrAgg4IPagBtFFF

In [105]:
import ast

s2w_improved = pd.read_csv(S2W_IMPROVED)
s2w_improved.drop(columns=["Unnamed: 0"], inplace=True)

correct_summaries_s2w = s2w[s2w["screenId"].isin(correct_summaries["screenId"])]

correct_summaries_s2w = s2w[s2w["screenId"].isin(correct_summaries["screenId"])]
for value in correct_summaries_s2w["screenId"].unique():
    df = s2w[s2w.index.isin(ast.literal_eval(correct_summaries[correct_summaries["screenId"]==value]["summaries"].iloc[0]))]
    s2w_improved = pd.concat([s2w_improved, df], ignore_index=True)

s2w_improved


  s2w_improved = pd.concat([s2w_improved, df], ignore_index=True)


Unnamed: 0,screenId,summary,split,tfidf_sum,tfidf_grouped_sum
0,11317,display of screen shows a login page,train,2.117984,10.245288
1,11317,login and sign up page for the application wit...,train,2.118153,10.245288
2,11317,page displays to login into an app,train,1.808303,10.245288
3,11317,welcome displaying to sign in to an menstruati...,train,1.961512,10.245288
4,11131,pop up alert to choose country to search in,train,2.178230,10.631040
...,...,...,...,...,...
1652,39184,page shows some text in an music application,train,2.055975,10.279091
1653,42071,display page showing few options in a translat...,test,2.121762,10.080606
1654,42071,screen page of a language translator application,test,1.974318,10.080606
1655,42071,screen showing page of an translator application,test,1.917233,10.080606


In [106]:
#correct_summaries.to_csv("../data/s2w_sample_correct_summaries.csv")
s2w_improved.to_csv("../data/s2w_sample_improved.csv")