# Setup

## Verify we're in the Conda environment

In [None]:
import sys
print(sys.executable)

## Import python packages

In [None]:
import os
import sys
import json
import openai
from PIL import Image
import base64
import io
from dotenv import load_dotenv
import requests
from openai import OpenAI
import pprint
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import subprocess
import textwrap

## openAI API key

In [None]:
# Set up your OpenAI API key
# api_key = os.environ.get("OPENAI_API_KEY")

# Load the .env file
load_dotenv()

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)


# Helper functions

## Function to base64 encode an image

In [None]:
def encode_image(image_path):
    """Encode the image to base64 format to send to OpenAI."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')

## Function to load existing results from JSON

In [None]:
def load_existing_results(filename):
    """Load existing data from JSON file if it exists."""
    if os.path.exists(filename):
        with open(filename, 'r') as f:
            return json.load(f)
    return []

## Function to review the generated stories for each image

In [None]:
# Display for review
def review(input_file):
    inputs = []

    # Read the input file
    if os.path.exists(input_file):
        with open(input_file, 'r') as f:
            try:
                inputs = json.load(f)
            except json.JSONDecodeError:
                print(f"Error: Could not parse existing data in {input_file}.")
                sys.exit(1)
    else:
        print(f"Error: Could not find input_file {input_file}.")
        sys.exit(1)
    

    # Create a set of image paths
    inputs_image_paths = {entry["image"] for entry in inputs}

    # Base directory containing all the folders
    base_dir = Path("/Scandisk/onicai/charles/images")
    image_paths = [path for path in base_dir.glob("**/*.png") if not path.name.startswith(".")]
    thumbnail_size = (200, 200)

    icount = 0
    count_not_enough_sentences = 0
    for image_path in image_paths:
        icount += 1
        existing_entry = None

        # Find the image
        if str(image_path) in inputs_image_paths:
            existing_entry = next(entry for entry in inputs if entry["image"] == str(image_path))
            accepted_opening_sentences_with_stories = existing_entry["response"]["accepted_opening_sentences_with_stories"]
            # rejected_opening_sentences_with_stories = existing_entry["response"]["rejected_opening_sentences_with_stories"]
            if len(accepted_opening_sentences_with_stories) < 10:
                count_not_enough_sentences += 1
                print(f"------------------\n image {icount}: {image_path}")
                # Open and display the image
                image = Image.open(image_path)
                image.thumbnail(thumbnail_size)  # Resize the image to a thumbnail
                plt.figure(figsize=(4, 4))  # Adjust figure size
                plt.imshow(image)
                plt.axis('off')  # Hide axes for better view
                plt.show()
                print(f"# accepted opening sentences = {len(accepted_opening_sentences_with_stories)}")
                # print(json.dumps(existing_entry, indent=4))
            continue
        
        # Not yet processed, ERROR
        print("ERROR: Stories for this image were not yet judged.")
        sys.exit(1)

    print(f"Number of images with not exactly than 10 sentences = {count_not_enough_sentences}")

## Function to call llama2.c

In [None]:
def generate_story(opening_sentence):
    # Define the command as a list
    command = [
        "/Users/arjaan/icppWorld/repos/llama2.c/run",
        "/Users/arjaan/icppWorld/repos/charles/models/out-09/model.bin",
        "-z", "/Users/arjaan/icppWorld/repos/charles/models/out-09/tok4096.bin",
        "-t", "0.1",
        "-p", "0.9",
        "-i", opening_sentence
    ]

    # Run the command and capture the output
    result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    # Process the output to exclude the "achieved tok/s" line
    output_lines = result.stdout.strip().split('\n')
    story = "\n".join(line for line in output_lines if "achieved tok/s" not in line)

    return story

## Function to reformat for bioniq

In [None]:
def reformat(input_file, output_file, image_start, image_end):
    inputs = []
    outputs = []

    # Read the input file
    if os.path.exists(input_file):
        with open(input_file, 'r') as f:
            try:
                inputs = json.load(f)
            except json.JSONDecodeError:
                print(f"Error: Could not parse existing data in {input_file}.")
                sys.exit(1)
    else:
        print(f"Error: Could not find input_file {input_file}.")
        sys.exit(1)
    

    base_dir = Path("/Users/arjaan/icppWorld/repos/charles/assets/images_charles_for_bioniq")
    thumbnail_size = (200, 200)

    for input in inputs:
        reviewed = input["reviewed"]
        image_index = input["image_index"]
        image_category = input["image_category"]
        image_filename = input["image_filename"]
        num_prompts = input["num_prompts"]
        prompts = input["prompts"]

        image_path = base_dir / image_filename
        
        if not reviewed or num_prompts != len(prompts) or num_prompts != 10:
            # Open and display the image
            print(f"--------------------------------------------------")
            image = Image.open(image_path)
            image.thumbnail(thumbnail_size)  # Resize the image to a thumbnail
            plt.figure(figsize=(2, 2))  # Adjust figure size
            plt.imshow(image)
            plt.axis('off')  # Hide axes for better view
            plt.show()

            if not reviewed:
                print(f"To be reviewed")
            else:
                print("Is reviewed, but num_prompts is wrong...???...")

            print(f"reviewed        : {reviewed}")
            print(f"image_index     : {image_index}")
            print(f"image_category  : {image_category}")
            print(f"image_filename  : {image_filename}")
            print(f"image_path      : {str(image_path)}")
            print(f"num_prompts     : {num_prompts}")
            print(f"len(prompts)    : {len(prompts)}")
            print("prompts:")
            for prompt_index, prompt in enumerate(prompts):
                print("=====================")
                plt.figure(figsize=(2, 2))  # Adjust figure size
                plt.imshow(image)
                plt.axis('off')  # Hide axes for better view
                plt.show()
                print(f"image_filename  : {image_filename}")
                print("")
                print(f"{prompt_index}: {prompt}")
                
                
                story = generate_story(prompt)
                
                # Wrap the text to 80 characters per line
                print("")
                wrapped_text = textwrap.fill(story, width=80)
                print(wrapped_text)
                print("")
            

            print("STILL MORE REVIEW WORK TO DO...")
            sys.exit(1)
        
    #     # reformat into double-pages structure
    #     print("IMPLEMENT REFORMAT INTO DOUBLE-PAGES STRUCTURE...")
    #     sys.exit(1)
    #     output = {
    #         "reviewed": False,
    #         "image_index": image_index,
    #         "image_category": image_category,
    #         "image_filename": image_filename,
    #         "image_path": str(image_path),
    #         "num_prompts": len(prompts),
    #         "prompts": prompts
    #     }
    #     outputs.append(output)


    # # Save the reformatted data to the output file
    # with open(output_file, 'w') as f:
    #     json.dump(outputs, f, indent=4)

    # print(f"Saved everything to : {output_file}")

# Run it

In [None]:
input_file = './5-reformat.json'

image_start = 0
image_end = 500

output_file = './no-output-file'
reformat(input_file, output_file, image_start, image_end)