In [1]:
import json
from pathlib import Path
import random
import re
from pprint import pprint

import openai
from tqdm import tqdm
from rouge_score import rouge_scorer

In [2]:
openai.api_key_path = '../OPENAI_KEY'


In [3]:
gpt4toolsdata = json.load(open('../data/gpt4tools_71k.json'))

In [3]:
# gen_tools_path = Path('../data/gpt3_generations_tools/gen_tools.jsonl')
gen_tools_path = Path('../data/gpt3_generations_tools_taskmatrix/gen_tools.jsonl')

gen_tools = [json.loads(l) for l in gen_tools_path.open()]
print(f"Loadeed {len(gen_tools)} generated tools")


gen_examples_path = Path('../data/gpt3_generations_tools_taskmatrix/gen_examples.jsonl')
gen_examples = []
if gen_examples_path.exists():
    gen_examples.extend([json.loads(l) for l in gen_examples_path.open()])
    print(f"loaded {len(gen_examples)} examples")

Loadeed 55 generated tools


In [4]:
def make_gpt3_request(prompt: str):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
                {"role": "system", "content": "You are a helpful assistant, useful for data generation."},
                {"role": "user", "content": prompt},
            ],
        stop=None,
        )
    return response

def extract(response: openai.openai_object.OpenAIObject):
    return response['choices'][0]['message']['content']

def sample_tools(tools: list, n=1):
    return random.sample(tools, min(len(tools), n))

def encode_prompt(tools: list):
    """Encode multiple tools into a single string."""
    prompt = f"Please generate instruction for each of the given tools.\n"
    prompt += "Each tool is defined as \"<Tool Name>: <usage scenario>\"\n"
    for idx, tool in enumerate(tools):
        prompt += f"{idx+1}. {tool['name']}: {tool['description']}\n"

    prompt += "Each example should follow the format \"<instruction>, [<tool name>, <input arguments>]\".\n\n"

    example_str = 'Here is an example for the tool "Video Splitter" -- "Divide the video located at /path/to/video.mp4 into 10-second intervals.", [Video Splitter, "/path/to/video.mp4", "10 seconds"]"\n'
    prompt += example_str
    prompt += "Try not repeating the words from tool description, where possible. Provide diverse instructions."
    return prompt

In [52]:
tool_batch = sample_tools(gen_tools, 8)

In [53]:
prompt = encode_prompt(tool_batch)
print(prompt)

Please generate instruction for each of the given tools.
Each tool is defined as "<Tool Name>: <usage scenario>"
1. Speech Recognition: useful when you want to recognize speech from a microphone or audio file. The input to this tool should be an audio file path or a microphone input.
2. Object Tracking in Image: useful when you want to track the position of an object in an image across multiple frames. The input to this tool should be a string, representing the path of the image file sequence.
3. Audio Speed Changer: useful when you want to change the speed of an audio file. The input to this tool should be a string, representing the path of the audio file, and another string, representing the new speed.
4. Video Editing: useful when you want to edit a video by trimming, cropping, adding music, or enhancing the video quality. The input to this tool should be a string, representing the path of the video file.
5. Background Removal: useful when you want to remove the background from an i

In [54]:
r = make_gpt3_request(prompt)

In [55]:
print(extract(r))

1. Convert speech from an audio file located at /path/to/audio.wav, [Speech Recognition, "/path/to/audio.wav"].
2. Track the position of a car in a sequence of images located at /path/to/image_sequence, [Object Tracking in Image, "/path/to/image_sequence"].
3. Speed up an audio file located at /path/to/audio.mp3 by 50%, [Audio Speed Changer, "/path/to/audio.mp3", "50% faster"].
4. Enhance the quality of the video located at /path/to/video.mov, [Video Editing, "/path/to/video.mov"].
5. Remove the background from an image located at /path/to/image.png and create a transparent background, [Background Removal, "/path/to/image.png"].
6. Generate an audio recording of the text "Hello, how are you?", [Speech Synthesis, "Hello, how are you?"].
7. Resize an image located at /path/to/image.jpg to a width of 800 pixels, [Image Resize, "/path/to/image.jpg", "800px width"].
8. Split a video located at /path/to/video.mp4 into three parts of equal length, [Video Splitter, "/path/to/video.mp4", "3 par

In [13]:
def postprocess_response(response):
    text = extract(response)
    new_examples = []
    for line in text.split('\n'):
        line = re.sub('^\d+.', '', line).strip()
        query, name_args = line.split(',', maxsplit=1)
        name_args = name_args.strip()
        name_args = name_args.removeprefix('[').removesuffix('].').removesuffix(']')
        tool_name, input_args = name_args.split(',', maxsplit=1)
        input_args = input_args.strip()
        new_examples.append({
            'name': tool_name,
            'query': query,
            'input_args': input_args
        })
    return new_examples


In [None]:

new_examples_batch = postprocess_response(r)
pprint(new_examples_batch)

In [15]:
num_epochs = 1
batch_size = 8
num_requests = 0

In [16]:
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx + n, l)]

In [17]:

for epoch in tqdm(range(num_epochs)):

    random.shuffle(gen_tools)

    for tool_batch in tqdm(batch(gen_tools, batch_size), total=len(gen_tools) // batch_size):

        prompt = encode_prompt(tool_batch)
        print(f"doing {num_requests} request")
        response = make_gpt3_request(prompt)
        num_requests += 1
        new_examples_batch = postprocess_response(response)
        gen_examples.extend(new_examples_batch)

        with open(gen_examples_path, 'a') as fout:
            for ex in gen_examples:
                fout.write(json.dumps(ex) + '\n')



  0%|          | 0/1 [00:00<?, ?it/s]

doing 0 request


  0%|          | 0/6 [00:26<?, ?it/s]
100%|██████████| 1/1 [00:26<00:00, 26.16s/it]


In [10]:
gen_examples

[{'name': 'Object Tracking in Image',
  'query': 'Track the movement of a car in the image sequence located at /path/to/car/images across different frames.',
  'input_args': '"/path/to/car/images"]'},
 {'name': 'Image Resize',
  'query': 'Resize the image located at /path/to/image.jpg to a size of 800x600.',
  'input_args': '"/path/to/image.jpg", "800x600"]'},
 {'name': 'Text Summarization',
  'query': 'Summarize the long report found at /path/to/report.docx into a brief summary.',
  'input_args': '"/path/to/report.docx"]'},
 {'name': 'Video Splitter',
  'query': 'Split the video located at /path/to/video.mp4 into different 1-minute parts.',
  'input_args': '"/path/to/video.mp4", "1 minute"]'},
 {'name': 'Style Transfer',
  'query': "Transfer the style of Van Gogh's painting to a picture of a sunflower located at /path/to/sunflower.jpg.",
  'input_args': '"/path/to/sunflower.jpg", "/path/to/van-gogh.jpg"]'},
 {'name': 'Face Detection and Recognition',
  'query': 'Detect and recognize f