In [11]:
import os
import json
import cv2
import numpy as np
import openai
from sklearn.model_selection import train_test_split


def prepare_data_for_finetuning(image_folder, test_size=0.2, output_file="fine_tune_dataset.jsonl"):
    """
    Prepares a dataset for fine-tuning GPT-4 using grayscale images and corresponding masks.

    Args:
        image_folder (str): Path to the folder containing grayscale images and a "masks" subfolder.
        test_size (float): Proportion of the dataset to include in the test split.
        output_file (str): Output JSONL file for fine-tuning.
    
    Returns:
        tuple: Paths to the training and testing datasets in JSONL format.
    """
    def convert_to_16bit(image):
        """
        Converts an image to 16-bit format.
        """
        if image.dtype == np.uint8:  # Convert 8-bit to 16-bit
            image = cv2.normalize(image, None, 0, 65535, cv2.NORM_MINMAX, dtype=cv2.CV_16U)
        elif image.dtype == np.float32:  # Convert float32 to 16-bit
            image = (image * 65535).clip(0, 65535).astype(np.uint16)
        elif image.dtype != np.uint16:  # Convert other types to 16-bit
            image = cv2.normalize(image, None, 0, 65535, cv2.NORM_MINMAX, dtype=cv2.CV_16U)
        return image

    images_path = [f for f in os.listdir(image_folder) if os.path.isfile(os.path.join(image_folder, f))]
    masks_folder = os.path.join(image_folder, "masks")
    data = []

    for image_file in images_path:
        image_path = os.path.join(image_folder, image_file)
        mask_path = os.path.join(masks_folder, image_file)
        
        if not os.path.exists(mask_path):
            continue  # Skip if mask does not exist
        
        # Load image and mask
        image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
        mask = cv2.imread(mask_path, cv2.IMREAD_UNCHANGED)

        # Skip if images fail to load
        if image is None or mask is None:
            print(f"Skipping {image_file}: Unable to load.")
            continue

        # Convert to 16-bit if necessary
        image = convert_to_16bit(image)
        mask = convert_to_16bit(mask)

        # Create a prompt-response pair
        prompt = f"Generate a mask for the grayscale image with dimensions {image.shape}."
        completion = f"The mask has been generated with specific contours and intensity ranges."
        
        # Append to the dataset
        data.append({"prompt": prompt, "completion": completion})

    # Split the data
    train_data, test_data = train_test_split(data, test_size=test_size, random_state=42)

    # Save the datasets
    train_file = f"train_{output_file}"
    test_file = f"test_{output_file}"
    
    with open(train_file, "w") as train_f:
        for entry in train_data:
            train_f.write(json.dumps(entry) + "\n")
    
    with open(test_file, "w") as test_f:
        for entry in test_data:
            test_f.write(json.dumps(entry) + "\n")

    return train_file, test_file


def fine_tune_gpt4(train_file, model="gpt-4", epochs=4, learning_rate=1e-4):
    """
    Fine-tunes GPT-4 using the prepared training dataset.

    Args:
        train_file (str): Path to the training dataset in JSONL format.
        model (str): Base GPT-4 model to fine-tune.
        epochs (int): Number of epochs for fine-tuning.
        learning_rate (float): Learning rate for fine-tuning.
    
    Returns:
        str: Fine-tuned model ID.
    """
    openai.api_key = os.getenv("OPENAI_API_KEY")

    # Upload the training file
    print("Uploading training file...")
    try:
        with open(train_file, "rb") as f:
            response = openai.File.create(file=f, purpose="fine-tune")
        training_file_id = response["id"]
    except Exception as e:
        print(f"Error uploading training file: {e}")
        return None

    # Fine-tune the model
    print("Starting fine-tuning...")
    try:
        fine_tune_response = openai.FineTune.create(
            model=model,
            training_file=training_file_id,
            n_epochs=epochs,
            learning_rate=learning_rate
        )
        fine_tune_id = fine_tune_response["id"]
        print(f"Fine-tuning initiated with ID: {fine_tune_id}")
        return fine_tune_id
    except Exception as e:
        print(f"Error during fine-tuning: {e}")
        return None


def save_fine_tuned_model(fine_tune_id, output_model_path):
    """
    Saves the fine-tuned model to disk.

    Args:
        fine_tune_id (str): The fine-tune ID returned after training.
        output_model_path (str): Path to save the fine-tuned model.
    """
    try:
        fine_tune_details = openai.FineTune.retrieve(id=fine_tune_id)
        fine_tuned_model = fine_tune_details["fine_tuned_model"]
        
        with open(output_model_path, "w") as f:
            f.write(fine_tuned_model)
        print(f"Fine-tuned model saved to: {output_model_path}")
    except Exception as e:
        print(f"Error saving fine-tuned model: {e}")


In [12]:
image_folder = '/nas_mnt/carruthers/Einar/dsred/train'
test_size=0.1

# Step 1: Prepare data
train_file, test_file = prepare_data_for_finetuning(image_folder)

# Step 2: Fine-tune GPT-4
fine_tune_id = fine_tune_gpt4(train_file, epochs=3, learning_rate=5e-5)

# Step 3: Save the fine-tuned model
save_fine_tuned_model(fine_tune_id, "./fine_tuned_gpt4_mask_generator.txt")


Uploading training file...
Error uploading training file: 

You tried to access openai.File, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742

Error saving fine-tuned model: 

You tried to access openai.FineTune, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussion