# imports

In [None]:
%pip install google-generativeai
%pip install pillow
%pip install python-dotenv

In [103]:
import google.generativeai as genai
from dotenv import load_dotenv
import os
from PIL import Image
import json

load_dotenv() 
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])


In [116]:
programs = genai.protos.Schema(
    type=genai.protos.Type.OBJECT,
    properties={
        'program_name': genai.protos.Schema(type=genai.protos.Type.STRING),
        'program_description': genai.protos.Schema(
            type=genai.protos.Type.STRING,
            description="Description of the program"
        ),
        'is_active': genai.protos.Schema(
            type=genai.protos.Type.BOOLEAN,
            description="Whether the program is currently active"
        ),
        'program_type': genai.protos.Schema(
            type=genai.protos.Type.STRING,
            enum=[
                "browser",
                "email",
                "chat",
                "calendar"
            ],
            description="Type of the program"
        ),
        'program_info': genai.protos.Schema(
            type=genai.protos.Type.STRING,
            description="valid json information about the program based on the program type, e.g email client with email showing should have subject, body text, content, sender, etc. if it is a browser it will be the url, page title, etc. if it is a calendar it will be the date, time, highlighted, conflict"
        ),

    },
    required=['program_name', 'program_description', 'program_type']
)

In [123]:
screenshot_data_schema = genai.protos.Schema(
    type=genai.protos.Type.OBJECT,
    properties={
        'general_scene_description': genai.protos.Schema(
            type=genai.protos.Type.STRING,
            description="Description of screenshot and user activity in the scene. Including text in Email body"
        ),
        'programs_in_scene': genai.protos.Schema(
            type=genai.protos.Type.ARRAY,
            items=programs,
            description="An array of all programs running in the screenshot it may only be one"
        ),
        'mouse_action': genai.protos.Schema(
            type=genai.protos.Type.STRING,
            description="what the user is doing with the mouse, hovering a button, clicking a button, etc."
        )
    },
    required=['general_scene_description', 'programs_in_scene', 'mouse_action']
)

In [124]:
screenshot_tools = genai.protos.FunctionDeclaration(
    name="screenshot_data",
    description="Get data about the screenshot and what the user is doing with the mouse",
    parameters=screenshot_data_schema
)

In [125]:
model = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    tools=[screenshot_tools]
)

# Images to text

In [135]:

current_dir = os.getcwd()
image_dir = os.path.join(current_dir, "final_images")

files = sorted(os.listdir(image_dir), 
              key=lambda x: int(x.replace('output_', '').split('.')[0]))
results = []
for file in files: #[21:22]:
    print(f"{file}")
    image = Image.open(os.path.join(image_dir, file))
    result = model.generate_content(
        ["describe the screenshot and what the user is doing with the mouse", image]
    )
    results.append(result)

output_01.png
output_02.png
output_03.png
output_04.png
output_05.png
output_06.png
output_07.png
output_08.png
output_09.png
output_10.png
output_11.png
output_12.png
output_13.png
output_14.png
output_15.png
output_16.png
output_17.png
output_18.png
output_19.png
output_20.png
output_21.png
output_22.png
output_23.png
output_24.png
output_25.png
output_26.png
output_27.png
output_28.png
output_29.png
output_30.png


In [136]:
def get_text_from_result(result):
    # Convert MapComposite to regular dictionary
    data_dict = dict(result.candidates[0].content.parts[0].function_call.args)
    # print(data_dict)
    # If you need text/string values
    text_data = {}
    for key, value in data_dict.items():
        if hasattr(value, 'text_value'):
            text_data[key] = value.text_value
        elif isinstance(value, str):
            text_data[key] = value
        elif key == 'programs_in_scene':
            for k in ['program_name', 'program_description', 'program_type']:
                text_data[k] = dict(value[0]).get(k,'Unknown')

            
    return text_data

json_data_from_images = [{"action":get_text_from_result(result)} for result in results]


In [137]:
for i, j in enumerate(json_data_from_images):
    print(f"{i}: {j}")
    os.makedirs('json_data_from_images', exist_ok=True)
    with open(f'json_data_from_images/{i:03d}.json', 'w') as f:
        json.dump(j, f)

0: {'action': {'mouse_action': 'The mouse cursor is hovering over the empty inbox.', 'program_name': 'Email Client', 'program_description': 'Email client', 'program_type': 'email', 'general_scene_description': 'The screenshot shows an email client. The inbox is empty and displays a hot air balloon image with the text "What a productive day! You\\\'ve accomplished a lot". The bottom left corner shows "Syncing...".'}}
1: {'action': {'general_scene_description': 'The screenshot shows an email client. The inbox is currently empty, and a message displays "What a productive day! You\\\'ve accomplished a lot." The client is syncing.', 'mouse_action': 'The mouse cursor is hovering over the email list.', 'program_name': 'Email Client', 'program_description': 'Email client', 'program_type': 'email'}}
2: {'action': {'general_scene_description': 'The screenshot shows an email client. The inbox is empty and displays a hot air balloon image with the text "What a productive day! You\\\'ve accomplishe

# knowledge_base: generate what and why database 

In [138]:
llm = genai.GenerativeModel(
    "gemini-1.5-pro-latest"
)

In [161]:
# read all json files in json_data_from_images json files
file_count_in_folder = len(os.listdir('json_data_from_images'))
json_data_from_images = [json.load(open(f'json_data_from_images/{i:03d}.json')) for i in range(file_count_in_folder)]
json_data_from_images

knowledge_base = []
for i in range(1,len(json_data_from_images)-2):
    data = json_data_from_images[i:i+2]
    response =llm.generate_content([f"Describe in details what the user is doing and why the user took the last action. Return the results in json format whith two keys: 'what' and 'why'. Here is the situation data: {data}"])
    knowledge_base.append(response.candidates[0].content.parts[0].text.replace("```json","").replace("```",""))
    print(f"{i}: data {data}: {knowledge_base[-1]}")

1: data [{'action': {'general_scene_description': 'The screenshot shows an email client. The inbox is currently empty, and a message displays "What a productive day! You\\\'ve accomplished a lot." The client is syncing.', 'mouse_action': 'The mouse cursor is hovering over the email list.', 'program_name': 'Email Client', 'program_description': 'Email client', 'program_type': 'email'}}, {'action': {'general_scene_description': 'The screenshot shows an email client. The inbox is empty and displays a hot air balloon image with the text "What a productive day! You\\\'ve accomplished a lot".', 'program_name': 'Email Client', 'program_description': 'Email client', 'program_type': 'email', 'mouse_action': 'The mouse is not interacting with the email client.'}}]: 
{
  "what": "The user was checking their email inbox.",
  "why": "Although the inbox was empty and displayed a congratulatory message about a productive day, the user's initial action of hovering the mouse over the email list suggest

# Ask Joshu

In [163]:
# lessons = """# Lessons
# * **Client Prioritization:** This interaction highlights the importance of prioritizing client needs, especially when dealing with key clients. The employee recognizes the potential conflict and immediately seeks guidance from their boss on how to best accommodate the client.
# * **Communication is Key:**  The employee effectively communicates the scheduling conflict to their boss and asks for clear instructions.  This proactive communication ensures everyone is aware of the situation and prevents any misunderstandings.
# * **Time Management and Scheduling:**  The scenario emphasizes the need for effective time management and scheduling practices.  Using a calendar application and checking it before rescheduling meetings helps prevent conflicts.
# * **Following Chain of Command:** The employee appropriately consults their boss before making a decision that could impact other important meetings, demonstrating a good understanding of workplace hierarchy.
# * **Importance of Documentation (Indirect):** While not explicitly shown, the detailed "Action History" underlines the importance of documenting actions, especially when multiple parties are involved. This record can be useful for reviewing the decision-making process and for training purposes."""

context = f"""Your objective as an assistant is to utilize the lessons learnt, avoid the same mistakes, promote independence and autonomy, including avoiding unnecessary actions where possible. 
The following are actions with reasons of what happened in the past. {knowledge_base}. """
response =llm.generate_content([f"""
{context}
I have just received an email from an important client, asking to reschedule a meeting. 
The desired reschedulling time conflicts with an internal meeting. given the lessons wwee have learned what shoudl i do?""",])
print(response.candidates[0].content.parts[0].text)



You've encountered this situation before, so let's apply what you've learned to be efficient and autonomous:

1. **Review the conflict:** Check your calendar to confirm the exact overlap between the client meeting and the internal meeting.  Note the duration of both meetings.

2. **Assess Importance:**  Prioritize the client meeting given past lessons about client importance.

3. **Propose an Alternative (if possible):** Before involving your boss, see if you can propose an alternative time to the client that works for you.  Check your calendar for any free slots on the same day or a nearby day.  Even a slightly earlier or later time might work.

4. **Communicate Clearly with Client:**  Reply promptly to the client acknowledging their request and proposing your alternative time, if you found one. If not, explain the conflict and ask for their availability for a different time slot. Be professional and courteous.

5. **Consult your Boss (if necessary):**  If you can't find a suitable al

In [157]:

email_from_boss = """
Sure,
Fujitsu is a very important client, we should always put Fujitsu first.
Feel free to reschedule our Very Important Meeting.
For future reference, you do not need to ask my permission to reschedule a meeting with a client. Clients always come first.
Just reschedule the meeting with the client, without the need to ask my permission, nor to contact me.

"""
response =llm.generate_content([f"""
                                Your objective as an assistant is to utilize the lessons learnt, avoid the same mistakes, including repeating teh same actions if they can be avoided. 
                                This is an email from a boss to an employee asking to reschedule an internal meeting: {email_from_boss}
                                    I have just received an email from an important client, asking to reschedule a meeting. The desired reschedulling time conflicts with a very important internal meeting. Should I reschedule the meeting or contact my boss first?""",])
print(response.candidates[0].content.parts[0].text)


Reschedule the meeting with the client. Your boss has explicitly stated that client meetings take precedence and you do not need their permission to reschedule internal meetings in such cases.  Do so without contacting your boss.



Contact your boss first.  Previous examples show that attempting to reschedule directly before consulting your boss can lead to further complications and the need for more rescheduling.  Getting your boss's input on prioritizing meetings and potential solutions will likely save time and effort in the long run.

