In [1]:
import requests
import json
import time

class OpenAIAssistant:
    def __init__(self, api_key, assistant_id):
        self.api_key = api_key
        self.assistant_id = assistant_id
        self.headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.api_key}",
            "OpenAI-Beta": "assistants=v2",
        }

    def create_thread(self):
        response = requests.post("https://api.openai.com/v1/threads", headers=self.headers, json={})
        response.raise_for_status()  # Ensure request was successful
        return response.json()['id']

    def send_message(self, thread_id, message_content):
        url = f"https://api.openai.com/v1/threads/{thread_id}/messages"
        payload = {"role": "user", "content": message_content}
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()

    def start_run(self, thread_id):
        url = f"https://api.openai.com/v1/threads/{thread_id}/runs"
        payload = {"assistant_id": self.assistant_id}
        response = requests.post(url, headers=self.headers, json=payload)
        response.raise_for_status()
        return response.json()['id']

    def poll_run_status_and_get_response(self, thread_id, run_id):
        url = f"https://api.openai.com/v1/threads/{thread_id}/runs/{run_id}"
        while True:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()  # Ensure request was successful
            run_data = response.json()
            status = run_data['status']
            print(f"Checking run status: {status}")  # Debugging output

            if status == 'completed':
                print("Run completed successfully. Fetching messages...")
                return self.fetch_messages(thread_id)
            elif status in ['failed', 'cancelled', 'expired']:
                raise Exception(f"Run did not complete successfully: {status}")
            else:
                time.sleep(0.1)  # Wait for a few seconds before retrying

    def fetch_messages(self, thread_id):
        url = f"https://api.openai.com/v1/threads/{thread_id}/messages"
        response = requests.get(url, headers=self.headers)
        response.raise_for_status()  # Ensure request was successful
        messages_data = response.json()
        print("Messages fetched successfully.")
        return messages_data['data']

    def process_message(self, thread_id, prompt_message):
        self.send_message(thread_id, prompt_message)
        run_id = self.start_run(thread_id)
        messages = self.poll_run_status_and_get_response(thread_id, run_id)
        if messages and messages[0]['role'] == 'assistant':
            content = messages[0]['content'][0]['text']['value']
            json_string = content.replace('```json\n', '').replace('\n```', '')
            extracted_json = json.loads(json_string)
            return extracted_json
        else:
            print("No assistant message found or the first message is not from the assistant.")
            return None


In [8]:
import os
import base64
import requests

def retrieve_api_key():
    """Retrieve the API key from environment variables."""
    api_key = os.getenv('OPENAI_API_KEY')
    if api_key is None:
        raise ValueError("API key not found in environment variables")
    return api_key

class GPTManager:
    def __init__(self):
        self.api_key = retrieve_api_key()

    def encode_image(self, image_path):
        """Encode the image at the given path to Base64."""
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    def send_request(self, image_path):
        """Send a request to the OpenAI API with the encoded image and extract specific response."""
        base64_image = self.encode_image(image_path)
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.api_key}"
        }

        prompt = """
            As a sysadmin bot, your task is to analyze a macOS terminal screenshot and extract relevant text, organizing it into a structured JSON format. Focus on the following components:
            {
                "highlighted": "All highlighted text in the terminal",
                "history": [
                    {
                        "command": "The executed command",
                        "output": "Output from the command"
                    }
                ],
                "messagesToAssistant": {
                    "sbMessages": "Commands starting with 'sb' directed to the assistant"
                },
                "mostRecent": {
                    "item": "Most recent item, which could be a command, error message, or 'sb' message"
                }
            }
            Extraction Rules:
            - Exclude any user and PC names from the output.
            - Separate commands and their outputs in the history.
            - Include only text that starts with 'sb' in the 'messagesToAssistant'.
            - Identify and log only the single most recent item (command, error, or 'sb' message) under 'mostRecent'.
            Ensure strict adherence to the JSON structure for compatibility with further processing.
        """
        payload = {
            "model": "gpt-4o",  # Ensure to use a valid model identifier
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        },
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/jpeg;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            "max_tokens": 1000  # Adjusted max_tokens for possibly longer responses needed by the detailed prompt
        }

        try:
            response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
            response.raise_for_status()  # Raises an HTTPError for bad responses
            # Extracting only the message content from the response
            content = response.json()['choices'][0]['message']['content']
            print('\nThis is the content of the response:')#, response.json(), '\n')
            from pprint import pprint
            pprint(response.json())
            
            json_string = content.replace('```json\n', '').replace('\n```', '')
            extracted_json = json.loads(json_string)
            return extracted_json
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}")


In [9]:
import os

def list_sorted_images(directory):
    """List image files in a directory sorted numerically by filename."""
    # List all files in the directory
    files = os.listdir(directory)
    # Filter out only files that end with '.png'
    images = [file for file in files if file.endswith('.png')]
    # Sort files numerically by extracting the integer from filenames assuming format 'number.png'
    images_sorted = sorted(images, key=lambda x: int(x.split('.')[0]))
    return images_sorted

# Usage example
#directory_path = 'terminal_content/flow_1/images/'
directory_path = 'terminal_content/flow_2/images/'
sorted_images = list_sorted_images(directory_path)
img_paths = [os.path.join(directory_path, img) for img in sorted_images]

In [10]:
# Usage Example
api_key = retrieve_api_key()
assistant_id = "asst_IQyOH1i0Qjs0agZsBE23nQrS"
assistant = OpenAIAssistant(api_key, assistant_id)
thread_id = assistant.create_thread()  # Create thread outside the process message

# Usage
gpt_manager = GPTManager()

results = []
cc = 0
for img_path in img_paths:
    dict_img_text_content = gpt_manager.send_request(img_path)
    #dict_assistant_response = assistant.process_message(thread_id, prompt_message=json.dumps(dict_img_text_content))

    #result = {
    #    'img_path': img_path,
    #    'dict_img_text_content': dict_img_text_content,
    #    'dict_model_response': dict_assistant_response
    #}
    #results.append(result)
    break


This is the content of the response:
{'choices': [{'finish_reason': 'stop',
              'index': 0,
              'logprobs': None,
              'message': {'content': '{\n'
                                     '    "highlighted": "",\n'
                                     '    "history": [],\n'
                                     '    "messagesToAssistant": {\n'
                                     '        "sbMessages": ""\n'
                                     '    },\n'
                                     '    "mostRecent": {\n'
                                     '        "item": ""\n'
                                     '    }\n'
                                     '}',
                          'role': 'assistant'}}],
 'created': 1717854317,
 'id': 'chatcmpl-9Xqfdn1UMrTKJ2lM6s4FLms0LU4xH',
 'model': 'gpt-4o-2024-05-13',
 'object': 'chat.completion',
 'system_fingerprint': 'fp_aa87380ac5',
 'usage': {'completion_tokens': 41,
           'prompt_tokens': 1002,
          

In [9]:
assistant_id = "asst_IQyOH1i0Qjs0agZsBE23nQrS"
assistant = OpenAIAssistant(api_key, assistant_id)
thread_id = assistant.create_thread()  # Create thread outside the process message
dict_assistant_response = assistant.process_message(thread_id, prompt_message=json.dumps(dict_img_text_content))
print('Response from GPT:\n')
print(dict_assistant_response)

Checking run status: in_progress
Checking run status: completed
Run completed successfully. Fetching messages...
Messages fetched successfully.
Response from GPT:

{'intention': 'create and run a Python script with pandas', 'command': 'python3 script.py'}


In [7]:
results[0]['dict_model_response']

{'intention': 'run the created Python script', 'command': 'python script.py'}

In [37]:
def generate_markdown_report(results, flow_number):
    md_content = "# Image Processing Report\n\n"
    for result in results:
        relative_img_path = f"../{result['img_path']}"  # Adjusting path to be relative to the Markdown file location
        md_content += f"## Image: {os.path.basename(result['img_path'])}\n\n"
        md_content += f"![Image]({relative_img_path})\n\n"
        # Formatting the extracted text content as JSON with syntax highlighting
        md_content += f"**Extracted Text:**\n\n```json\n{json.dumps(result['dict_img_text_content'], indent=4)}\n```\n\n"
        # Formatting the AI response as JSON with syntax highlighting
        md_content += f"**AI Response:**\n\n```json\n{json.dumps(result['dict_model_response'], indent=4)}\n```\n\n"

    report_dir = './reports'
    os.makedirs(report_dir, exist_ok=True)
    with open(f"{report_dir}/flow_{flow_number}.md", 'w') as file:
        file.write(md_content)

# Example usage, assuming results and flow_number have been defined and set appropriately
generate_markdown_report(results, 1)
