In [26]:
API_KEY = 'REDACTED'

# Imports

In [27]:
import pandas as pd
import os
import google.generativeai as genai
from datetime import datetime
import csv
import re

# Prompt

In [33]:
def get_prompt(CATEGORY, SUBCATEGORY):
    return f"""
    CATEGORY
    {CATEGORY}
    
    SUBCATEGORY
    {SUBCATEGORY}
    
    COLUMNS
    Index, Category, Subcategory, Task,	App, Difficulty (1-10), Frequency (1-10)
    
    INSTRUCTION
    generate a table called TABLE, an exhaustive and comprehensive list of tasks that would be done on a computer in the given category: {CATEGORY} and subcategory: {SUBCATEGORY}
    
    each task needs to be specific - i need to be able to give this table to someone and get them to do screen recordings of themselves doing each task. 
    
    the tasks cannot be too complicated or vague or require additional information beyond the task description.
    
    Lean towards over-estimating the task difficulty. 

    The frequency should be estimating how often a computer user would need to do that task

    Do not give choice in the Task e.g. "Enable or disable the system mouse scroll wheel" is bad and should rather be something like "Enable and then disable the system mouse scroll wheel"
    
    make sure to create a new row in the table for each app / website that can be used to accomplish each task
    
    a reasonable average person should agree with the time and difficulty estimates.
    
    make the examples as specific as possible without revealing personal information. 

    do not use any ambiguity e.g. do not say "Various Forums" for the app, rather pick specific forums, for example.

    specify the platform / os if there is any ambiguity e.g. "Finder" -> "MacOS Finder"

    for broad subcategories like "Application Launch", do not return a row for every possible application but rather pick a few representative examples and make sure all platforms are covered

    start by reflecting on the apps / websites that a desktop computer user would use in the given category and subcategory then write all the tasks that can be completed so that the final TABLE contains all tasks across all apps/websites that apply to the category and subcategory

    be sure to make the table contain rows for all common apps / operating systems / websites in the given category/subcategory
    
    return nothing else but TABLE with "|" delimiters.
    """

# Setup Gemini

In [34]:
genai.configure(api_key=API_KEY)

generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 64,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
}

safety_settings = [
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_MEDIUM_AND_ABOVE",
    },
]

# Load Jobs

In [40]:
# Load the CSV file
file_path = 'categories.csv'
categories = pd.read_csv(file_path)

print(categories.head())
print("...")
print(categories.tail())

   Index           Category              Subcategory
0      1  Operating Systems          File Navigation
1      2  Operating Systems       Application Launch
2      3  Operating Systems          System Settings
3      4  Operating Systems          Task Management
4      5  Operating Systems  User Account Management
...
     Index             Category                                  Subcategory
228    229   Security & Privacy                             Password Manager
229    230   Security & Privacy                              Data Encryption
230    231  Finance and Banking                        Cryptocurrency Basics
231    232  Finance and Banking  Cryptocurrency Decentralized (e.g. Uniswap)
232    233         Social Media                                        x.com


# Use gemini to get tasks for each job

In [41]:
for index, row in categories.iterrows():
    CATEGORY = row['Category']
    SUBCATEGORY = row['Subcategory']

    # output csv of tasks for this job
    FILENAME = f'TASKS/{index}_{CATEGORY}_{SUBCATEGORY}.csv'

    PROMPT = get_prompt(CATEGORY, SUBCATEGORY)

    #print(PROMPT)

    # skip if already done
    if os.path.exists(FILENAME):
        print(f"[loading saved data] {index+1}/{len(categories)} CATEGORY: {CATEGORY}, SUBCATEGORY: {SUBCATEGORY}")
    else:
        print(f"{index+1}/{len(categories)} CATEGORY: {CATEGORY}, SUBCATEGORY: {SUBCATEGORY}")

        try:
            model = None
            
            # get jobs CSV using gemini
            model = genai.GenerativeModel(
                model_name="gemini-1.5-flash",
                safety_settings=safety_settings,
                generation_config=generation_config,
            )
            
            start_time = datetime.now()
            print(f"Start time: {start_time}")
            
            chat_session = model.start_chat(
                history=[
                    {
                        "role": "user",
                        "parts": [
                            PROMPT,
                        ],
                    }
                ]
            )
            
            response = chat_session.send_message("Produce a TABLE as per the instructions above.")
            end_time = datetime.now()
            duration = end_time - start_time
            
            print(f"End time: {end_time}")
            print(f"Gemini API request duration: {duration}")
        
            # Remove extra newlines and split the response text into lines
            lines = response.text.strip().split('\n')[0:]
            
            # Extract the header and rows
            header = re.split(r'\s*\|\s*', lines[0].strip('|'))
            rows = [re.split(r'\s*\|\s*', line.strip('|')) for line in lines[1:]]
            
            # Write to CSV
            with open(FILENAME, 'w', newline='') as csvfile:
                csvwriter = csv.writer(csvfile)
                csvwriter.writerow(header)
                csvwriter.writerows(rows)
        
            print("Saving file")
        except Exception as e:
            print(f"[ERROR] {index+1}/{len(categories)} CATEGORY: {CATEGORY}, SUBCATEGORY: {SUBCATEGORY}")
            print(e)

[loading saved data] 1/233 CATEGORY: Operating Systems, SUBCATEGORY: File Navigation
[loading saved data] 2/233 CATEGORY: Operating Systems, SUBCATEGORY: Application Launch
[loading saved data] 3/233 CATEGORY: Operating Systems, SUBCATEGORY: System Settings
[loading saved data] 4/233 CATEGORY: Operating Systems, SUBCATEGORY: Task Management
[loading saved data] 5/233 CATEGORY: Operating Systems, SUBCATEGORY: User Account Management
[loading saved data] 6/233 CATEGORY: Operating Systems, SUBCATEGORY: Security Settings
[loading saved data] 7/233 CATEGORY: Operating Systems, SUBCATEGORY: Updates or Patches
[loading saved data] 8/233 CATEGORY: Productivity Software, SUBCATEGORY: Document Creation
[loading saved data] 9/233 CATEGORY: Productivity Software, SUBCATEGORY: Spreadsheet Manipulation
[loading saved data] 10/233 CATEGORY: Productivity Software, SUBCATEGORY: Presentation Design
[loading saved data] 11/233 CATEGORY: Productivity Software, SUBCATEGORY: Email Composition
[loading saved