### Importing necessary libraries

In [1]:
from types import SimpleNamespace
import json
import re
import time
from pandas import json_normalize
from pprint import pprint
from IPython.core.display import HTML
from tqdm import tqdm
tqdm.pandas()

from dotenv import load_dotenv
load_dotenv()

from openai import OpenAI
client = OpenAI()

### Step 1: We need to ask users for their input on the scope of the guideline - their responses would be used as prompts into the model

In [2]:
# objective = input("What is the objective or purpose of the guideline?")

# author = input("Who is the author?")

# audience = input("Who is the intended audience?")

# format = input("What would be your preferred format of the guideline?")

# instructions = input("Please provide necessary and additional details that are needed to complete each step.")

In [3]:
objective = "To generate clear instructions for data labellers for image annotation"

author = "I am a Project Manager for the data science use case"

audience = "It is intended for data labellers"

format = "It should be in a table format, with an additional column for images as example illustration"

instructions = "For each image, please annotate cars and trees with their corresponding given labels. Do not annotate taxis as cars."

### Step 2: Let's start by setting up the variables we need to input into the model

For now, lets build the SOP generator with a specific use case in mind - in this case it would be the data labelling case study

what are the variables that I need to develop a data annotation guideline?

1. we need the model duh
2. the messages are key requirements as well
3. the context should be set as well, given to the system

In [4]:
model_input = SimpleNamespace()

model_input.model_id = "gpt-4-1106-preview"
model_input.messages = [objective,author,audience,format,instructions]
model_input.context = "You are an expert Standard Operating Procedure (SOP) generator in a JSON table format.\n \
    Make sure that the instructions given are clear and comprehensive, good enough to generate images as well."


In [5]:
model_input

namespace(model_id='gpt-4-1106-preview',
          messages=['To generate clear instructions for data labellers for image annotation',
                    'I am a Project Manager for the data science use case',
                    'It is intended for data labellers',
                    'It should be in a table format, with an additional column for images as example illustration',
                    'For each image, please annotate cars and trees with their corresponding given labels. Do not annotate taxis as cars.'],
          context='You are an expert Standard Operating Procedure (SOP) generator in a JSON table format.\n     Make sure that the instructions given are clear and comprehensive, good enough to generate images as well.')

### Step 3: Next to define a function that enables multi-step conversation

In [6]:
def GetMessageMemory(NewQuestion, PreviousResponse, systemContext):
    if PreviousResponse is not None:
        response = client.chat.completions.create(
            model=model_input.model_id,
            response_format={ "type": "json_object" },
            messages=[
                {"role": "system", "content": systemContext},
                {"role": "user", "content": NewQuestion},
                {"role": "assistant", "content": PreviousResponse},
            ],
            temperature=0,
            seed=42
            )
    else:
        response = client.chat.completions.create(
            model=model_input.model_id,
            response_format={ "type": "json_object" },
            messages=[
                {"role": "system", "content": systemContext},
                {"role": "user", "content": NewQuestion}
            ],
            temperature=0,
            seed=42
            )

    return response.choices[0].message.content
    

In [7]:
last_response = None

for question in tqdm(model_input.messages):
    chat_response = GetMessageMemory(question, last_response, model_input.context)
    # pprint(chat_response)
    last_response = chat_response

100%|██████████| 5/5 [04:51<00:00, 58.22s/it]


In [8]:
json_response = json.loads(last_response)

json_response

{'SOP_Table': {'Title': 'Standard Operating Procedure for Image Annotation',
  'Purpose': 'To provide clear guidelines for annotating images with cars and trees while excluding taxis from the car category.',
  'Scope': 'This SOP applies to all image annotators tasked with labeling images for machine learning and computer vision training datasets.',
  'Procedure': [{'Step_Number': '1',
    'Description': 'Open Annotation Tool',
    'Instructions': 'Launch the image annotation software and load the image to be annotated.',
    'Image_Example': 'open_annotation_tool.jpg'},
   {'Step_Number': '2',
    'Description': 'Identify Objects',
    'Instructions': 'Carefully examine the image to identify all instances of cars and trees. Remember not to annotate taxis as cars.',
    'Image_Example': 'identify_objects.jpg'},
   {'Step_Number': '3',
    'Description': 'Annotate Cars',
    'Instructions': "Use the 'Car' label to annotate all vehicles that are not taxis. Draw a bounding box around each 

### Step 4: Converting JSON response into a flattened pandas dataframe

In [9]:
try:
     df = json_normalize(json_response, record_path=['SOP_Table','rows'])

except:   
     df = json_normalize(json_response, record_path=['SOP_Table','Procedure'])

In [10]:
df

Unnamed: 0,Step_Number,Description,Instructions,Image_Example
0,1,Open Annotation Tool,Launch the image annotation software and load ...,open_annotation_tool.jpg
1,2,Identify Objects,Carefully examine the image to identify all in...,identify_objects.jpg
2,3,Annotate Cars,Use the 'Car' label to annotate all vehicles t...,annotate_cars.jpg
3,4,Annotate Trees,Use the 'Tree' label to annotate all visible t...,annotate_trees.jpg
4,5,Exclude Taxis,Do not annotate taxis. Taxis can typically be ...,exclude_taxis.jpg
5,6,Review Annotations,Review all annotations to ensure accuracy and ...,review_annotations.jpg
6,7,Save and Submit,Save the annotated image in the designated for...,save_and_submit.jpg


### Step 5: Now to generate images for each step - essentially this is suppose to be a visual aid to perform the task more accurately

In [11]:
# let's start by writing a simple function to generate the images for each step

def GetImages(image_description: str):
    response = client.images.generate(
        model="dall-e-3",
        prompt=image_description,
        size="1024x1024",
        quality="standard",
        n=1,
    )

    time.sleep(5)

    return response.data[0].url

In [12]:
# next let us now join the description strings from our df to create the prompts for image generation

df["image_prompt"] = df[df.columns.to_list()[1:]]\
    .apply(lambda x: "Generate image where " + x[0] + ". " + x[1] + ". Name the image as " + x[2], axis=1)

df

Unnamed: 0,Step_Number,Description,Instructions,Image_Example,image_prompt
0,1,Open Annotation Tool,Launch the image annotation software and load ...,open_annotation_tool.jpg,Generate image where Open Annotation Tool. Lau...
1,2,Identify Objects,Carefully examine the image to identify all in...,identify_objects.jpg,Generate image where Identify Objects. Careful...
2,3,Annotate Cars,Use the 'Car' label to annotate all vehicles t...,annotate_cars.jpg,Generate image where Annotate Cars. Use the 'C...
3,4,Annotate Trees,Use the 'Tree' label to annotate all visible t...,annotate_trees.jpg,Generate image where Annotate Trees. Use the '...
4,5,Exclude Taxis,Do not annotate taxis. Taxis can typically be ...,exclude_taxis.jpg,Generate image where Exclude Taxis. Do not ann...
5,6,Review Annotations,Review all annotations to ensure accuracy and ...,review_annotations.jpg,Generate image where Review Annotations. Revie...
6,7,Save and Submit,Save the annotated image in the designated for...,save_and_submit.jpg,Generate image where Save and Submit. Save the...


In [13]:
# now to generate the urls for the images

df['Example'] = df['image_prompt'].progress_apply(GetImages)

df

100%|██████████| 7/7 [02:21<00:00, 20.23s/it]


Unnamed: 0,Step_Number,Description,Instructions,Image_Example,image_prompt,Example
0,1,Open Annotation Tool,Launch the image annotation software and load ...,open_annotation_tool.jpg,Generate image where Open Annotation Tool. Lau...,https://oaidalleapiprodscus.blob.core.windows....
1,2,Identify Objects,Carefully examine the image to identify all in...,identify_objects.jpg,Generate image where Identify Objects. Careful...,https://oaidalleapiprodscus.blob.core.windows....
2,3,Annotate Cars,Use the 'Car' label to annotate all vehicles t...,annotate_cars.jpg,Generate image where Annotate Cars. Use the 'C...,https://oaidalleapiprodscus.blob.core.windows....
3,4,Annotate Trees,Use the 'Tree' label to annotate all visible t...,annotate_trees.jpg,Generate image where Annotate Trees. Use the '...,https://oaidalleapiprodscus.blob.core.windows....
4,5,Exclude Taxis,Do not annotate taxis. Taxis can typically be ...,exclude_taxis.jpg,Generate image where Exclude Taxis. Do not ann...,https://oaidalleapiprodscus.blob.core.windows....
5,6,Review Annotations,Review all annotations to ensure accuracy and ...,review_annotations.jpg,Generate image where Review Annotations. Revie...,https://oaidalleapiprodscus.blob.core.windows....
6,7,Save and Submit,Save the annotated image in the designated for...,save_and_submit.jpg,Generate image where Save and Submit. Save the...,https://oaidalleapiprodscus.blob.core.windows....


### Step 6: Now that we have the image URLs, we now need to render the images in the dataframe

In [14]:
# moving forward, we now want to convert the image URLs into actual images displated in the dataframe

# Converting links to html tags
def path_to_image_html(path):
    return '<img src="'+ path + '" width="120" >'

# Rendering the images in the dataframe using the HTML method.
HTML(df.to_html(escape=False,formatters=dict(Example=path_to_image_html)))

Unnamed: 0,Step_Number,Description,Instructions,Image_Example,image_prompt,Example
0,1,Open Annotation Tool,Launch the image annotation software and load the image to be annotated.,open_annotation_tool.jpg,Generate image where Open Annotation Tool. Launch the image annotation software and load the image to be annotated.. Name the image as open_annotation_tool.jpg,
1,2,Identify Objects,Carefully examine the image to identify all instances of cars and trees. Remember not to annotate taxis as cars.,identify_objects.jpg,Generate image where Identify Objects. Carefully examine the image to identify all instances of cars and trees. Remember not to annotate taxis as cars.. Name the image as identify_objects.jpg,
2,3,Annotate Cars,"Use the 'Car' label to annotate all vehicles that are not taxis. Draw a bounding box around each car, ensuring it fits the object as closely as possible without including background elements.",annotate_cars.jpg,"Generate image where Annotate Cars. Use the 'Car' label to annotate all vehicles that are not taxis. Draw a bounding box around each car, ensuring it fits the object as closely as possible without including background elements.. Name the image as annotate_cars.jpg",
3,4,Annotate Trees,"Use the 'Tree' label to annotate all visible trees in the image. Draw a bounding box or use polygonal segmentation to outline the full extent of the tree, including branches and foliage.",annotate_trees.jpg,"Generate image where Annotate Trees. Use the 'Tree' label to annotate all visible trees in the image. Draw a bounding box or use polygonal segmentation to outline the full extent of the tree, including branches and foliage.. Name the image as annotate_trees.jpg",
4,5,Exclude Taxis,"Do not annotate taxis. Taxis can typically be identified by their distinct color (often yellow) and markings. If unsure, refer to local taxi identification guidelines.",exclude_taxis.jpg,"Generate image where Exclude Taxis. Do not annotate taxis. Taxis can typically be identified by their distinct color (often yellow) and markings. If unsure, refer to local taxi identification guidelines.. Name the image as exclude_taxis.jpg",
5,6,Review Annotations,Review all annotations to ensure accuracy and completeness. Make sure all cars (excluding taxis) and trees are labeled correctly and that the bounding boxes or polygons are precise.,review_annotations.jpg,Generate image where Review Annotations. Review all annotations to ensure accuracy and completeness. Make sure all cars (excluding taxis) and trees are labeled correctly and that the bounding boxes or polygons are precise.. Name the image as review_annotations.jpg,
6,7,Save and Submit,Save the annotated image in the designated format and submit it through the appropriate channel for review or inclusion in the dataset.,save_and_submit.jpg,Generate image where Save and Submit. Save the annotated image in the designated format and submit it through the appropriate channel for review or inclusion in the dataset.. Name the image as save_and_submit.jpg,


In [15]:
# We no longer need to the image prompt column, we can drop it
df.drop(columns=['image_prompt'], inplace=True)

df_html = df.to_html(escape=False,
                     formatters=dict(Example=path_to_image_html),
                     index=False)

print(df_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Step_Number</th>
      <th>Description</th>
      <th>Instructions</th>
      <th>Image_Example</th>
      <th>Example</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>1</td>
      <td>Open Annotation Tool</td>
      <td>Launch the image annotation software and load the image to be annotated.</td>
      <td>open_annotation_tool.jpg</td>
      <td><img src="https://oaidalleapiprodscus.blob.core.windows.net/private/org-Pzwaj2iF9vX70guDVIDpMNlx/user-przJTvyPtKSAMFVgep6sMi8C/img-E08Gzt8i31Eor5Aq7GZRuQmx.png?st=2023-12-19T06%3A23%3A01Z&se=2023-12-19T08%3A23%3A01Z&sp=r&sv=2021-08-06&sr=b&rscd=inline&rsct=image/png&skoid=6aaadede-4fb3-4698-a8f6-684d7786b067&sktid=a48cca56-e6da-484e-a814-9c849652bcb3&skt=2023-12-19T01%3A05%3A49Z&ske=2023-12-20T01%3A05%3A49Z&sks=b&skv=2021-08-06&sig=wdC3kcl8YjjpN3zmn9enOl0GHiEZqyIXHVs8wFGkYRE%3D" width="120" ></td>
    </tr>
    <tr>
      <td>2</td>
      <td>Ide

In [16]:
try:
     summary = json_normalize(json_response)
     summary.drop(columns='SOP_Table.Procedure_Steps', inplace=True)

except:
     summary = json_normalize(json_response)
     summary.drop(columns='SOP_Table.Procedure', inplace=True)

In [17]:
summary.columns = [re.sub("^(.*\.)", "", col) for col in summary.columns]

summary

Unnamed: 0,Title,Purpose,Scope,Image Annotator,Quality Assurance,Annotation Logs,Review Records,Frequency,Method
0,Standard Operating Procedure for Image Annotation,To provide clear guidelines for annotating ima...,This SOP applies to all image annotators taske...,Responsible for accurately identifying and ann...,Responsible for reviewing annotated images to ...,"A record of annotated images, including the an...","A log of quality assurance reviews, including ...",This SOP should be reviewed bi-annually or whe...,The review will be conducted by the annotation...


In [18]:
summary_html = summary.to_html(index=False)

print(summary_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Title</th>
      <th>Purpose</th>
      <th>Scope</th>
      <th>Image Annotator</th>
      <th>Quality Assurance</th>
      <th>Annotation Logs</th>
      <th>Review Records</th>
      <th>Frequency</th>
      <th>Method</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Standard Operating Procedure for Image Annotation</td>
      <td>To provide clear guidelines for annotating images with cars and trees while excluding taxis from the car category.</td>
      <td>This SOP applies to all image annotators tasked with labeling images for machine learning and computer vision training datasets.</td>
      <td>Responsible for accurately identifying and annotating cars and trees in images according to the guidelines provided in this SOP.</td>
      <td>Responsible for reviewing annotated images to ensure they meet the dataset criteria and SOP standards.</td>
      <td>A record of annotated images,

### Step 7: Now to merge both summary html and instruction html in one html page

In [19]:
# Merge HTML content with a gap between tables
merged_html_content = f"""
<!DOCTYPE html>
<html>
<head>
    <style>
        .table-container {{
            margin-bottom: 50px;  /* Adjust the gap between tables */
        }}

        table {{
            border-collapse: collapse;
            width: 50%;
            margin: 20px;
        }}
        th, td {{
            border: 1px solid black;
            padding: 20px;
            text-align: left;
        }}
        h2 {{
            color: blue;
        }}

        body {{ background-color: #e0e0e0; }}
    </style>
</head>
<body>
    <h2>Responsibilities and Governance</h2>
        {summary_html}
    <h2>Instructions and Guidelines</h2>
        {df_html}
    </div>
</body>
</html>
"""


# Save the merged HTML content into a new file
with open("Data_Annotation_SOP.html", "w") as merged_file:
    merged_file.write(merged_html_content)