In [7]:
from dotenv import load_dotenv
import pandas as pd
import os
import base64
import requests

In [8]:
load_dotenv()

IMAGE_INPUT_FOLDER = "../data/s2w_sample1/"

S2W = "../data/s2w_summarized.csv"


s2w = pd.read_csv(S2W)
s2w.drop(columns=["Unnamed: 0"], inplace=True)
s2w.drop(columns=["Unnamed: 0.1"], inplace=True)

In [9]:
PROMPT = """
Q: 
Given a screenshot of a mobile page. Provide a text in the form of a list, that describes the functional requirements of the given mobile page. Focus solely on the required functionality. Ignore Layout and design characteristics. Include only information, that is visible in the screenshot, by Ignoring the android status bar at the top of the screenshot and the android navigation bar at the bottom. If a popup is visible, split the Requirements betweeen the pop up and the underlying mobile page.

A:
```JSON
{
    "typeOfApp": "language-learning app",
    "isPopUp": false,
    "requirements": {
        "popUp": [],
        "mobilePage": [
            "Display a sentence in a foreign language that the user is expected to translate",
            "Provide audio playback of the sentence for the user to hear the pronunciation",
            "Allow the user to input their translation into a text field",
            "Submit the translation for verification by clicking a 'CHECK' button",
            "Offer an option to skip the current sentence and move to the next one with a 'SKIP' button",
            "Display a progress bar or indicator to show the user's progress through the lesson or activity"
        ]
    }
}
```
Q: 
Given a screenshot of a mobile page. Provide a text in the form of a list, that describes the functional requirements of the given mobile page. Focus solely on the required functionality. Ignore Layout and design characteristics. Include only information, that is visible in the screenshot, by Ignoring the android status bar at the top of the screenshot and the android navigation bar at the bottom. If a popup is visible, split the Requirements betweeen the pop up and the underlying mobile page.

A:
```JSON
{
    "typeOfApp": "fittnes app",
    "isPopUp": false,
    "requirements": 
    {
        "popUp": [],
        "mobilePage":
        [
            "Select a Date in a calendar",
            "Selected Date is ",
            "Choose the number of circuits the user wishes to perform during the workout",
            "Select an instructor or possibly a type of guidance for the workout",
            "Provide access to a navigation menu at the bottom, with tabs for: 'Workout' (where the user is currently) to presumably start and configure workouts. 'Learn' to offer educational content or instructions. 'Achievements' to show the user's progress or rewards. 'Track' to presumably monitor the user's workout activity or progress over time"
        ]
    }
}
``` 
Q: Given a screenshot of a mobile page. Provide a text in the form of a list, that describes the functional requirements of the given mobile page. Focus solely on the required functionality. Ignore Layout and design characteristics. Include only information, that is visible in the screenshot, by Ignoring the android status bar at the top of the screenshot and the android navigation bar at the bottom. If a popup is visible, split the Requirements betweeen the pop up and the underlying mobile page.

A:    
```JSON
{
    "typeOfApp": "Birthday notification app",
    "isPopUp": true,
    "requirements": {
        "popUp":
        [
            "Display a calendar view for selecting a date",
            "Highlight the current selected date",
            "Show the month for the currently displayed days in the calendar",
            "Provide buttons to navigate to the previous or next month",
            "Display a confirmation button labeled 'DONE' for finalizing the date selection"
        ],
        "mobilePage": []
    }
}
```           
Q: Given a screenshot of a mobile page. Provide a text in the form of a list, that describes the functional requirements of the given mobile page. Focus solely on the required functionality. Ignore Layout and design characteristics. Include only information, that is visible in the screenshot, by Ignoring the android status bar at the top of the screenshot and the android navigation bar at the bottom. If a popup is visible, split the Requirements betweeen the pop up and the underlying mobile page.

A:    
"""

In [10]:
def encode_image(id):
    image_path = IMAGE_INPUT_FOLDER + f"{id}.jpg"
    with open(image_path,"rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')    
   

In [11]:
def describe_image(id):
    api_key = os.getenv("OPENAI_API_KEY")
    base64_image = encode_image(id)

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": 
                    f"{PROMPT}"
                }
            ]
        },
        {
            "role": "user",
            "content": [
            {
                "type": "image_url",
                "image_url": {
                "url": f"data:image/jpeg;base64,{base64_image}"
                }
            }
            ]
        }
        ],
        "max_tokens": 300
    }
    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response

In [78]:
s2w["screenId"].iloc[37:50]

37      300
38     3501
39    27595
40    22605
41    12179
42    18877
43    19984
44    27064
45    10535
46    19704
47    23501
48     3546
49    26080
Name: screenId, dtype: int64

In [79]:
screens = s2w["screenId"].iloc[37:50]
for screen_id in screens:
    result = describe_image(screen_id)
    s2w.loc[s2w['screenId'] == screen_id, 'llm_summary'] = result.json().get("choices")[0].get("message").get("content")
    print(f"{screen_id}: DONE")

300: DONE
3501: DONE
27595: DONE
22605: DONE
12179: DONE
18877: DONE
19984: DONE
27064: DONE
10535: DONE
19704: DONE
23501: DONE
3546: DONE
26080: DONE


In [73]:
s2w["llm_summary"][9]

"I'm sorry, but the image you've provided is too dark and lacks visible content which makes it difficult to determine the functional requirements of the mobile page. It appears to be a camera interface possibly in a dark environment or with the lens covered. If you have an image with visible elements, I would be happy to provide the functional requirements for that page."

In [80]:
s2w.to_csv("../data/s2w_summarized.csv")

In [28]:
text = s2w["llm_summary"][0]

In [29]:
import json

# Removing the ```JSON and ``` markers
cleaned_string = text.replace("```JSON", "").replace("```", "").strip()

# Convert the cleaned string to a Python object
python_object = json.loads(cleaned_string)

# Print the Python object
print(python_object)

{'typeOfApp': 'health tracking app', 'isPopUp': False, 'requirements': {'popUp': [], 'mobilePage': ['Provide an option for users to sign up for a free account', 'Offer a secure and automatic data backup service on a monthly basis for signed-up users', 'Include an option for users who already have an account to log in and restore their data', 'Present an option to skip the account creation or login process']}}
