###  Training GPT-3 on synthetic dataset

In [1]:
# Creating some synthetic data for fine-tuning 
# We are going to create serveral characters for video games in here: 

import os
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")

# Define input values
ages = ['30', '40', '50']
genders = ['man', 'woman']
superpowers = ['read in the thoughts', 'turning lead into gold', 'immortality'] 

# Create an empty list to store results
results = []

# Loop through input values
record_number = 1
for age in ages:
    for gender in genders:
        for power in superpowers:
            # Define the prompt with placeholders
            prompt = f"Imagine a complete and detailed description of a {age}-year-old {gender} fictional character who has the superpower of {power}. Write out the entire description in a maximum of 100 words in great detail."

            # Generate the response using OpenAI's GPT-3 model
            response = openai.Completion.create(
                model="text-davinci-003",
                prompt=prompt,
                temperature=0.7,  # Adjust temperature for response creativity
                max_tokens=100,  # Limit response to 100 tokens (adjust as needed)
                n=1,  # Generate a single response
                stop=None,  # Let the model determine when to stop
            )

            # Extract and store the response text
            response_text = response.choices[0].text.strip()

            # Create a subprompt string
            subprompt = f"{age}, {gender}, {power}"

            # Store the results in a dictionary
            result = {
                'record number': record_number,
                'age': age,
                'gender': gender,
                'power': power,
                'prompt': prompt,
                'subprompt': subprompt,
                'response': response_text
            }

            results.append(result)

            # Increment the record number
            record_number += 1

# Create a DataFrame from the results
df = pd.DataFrame(results)

# Save the DataFrame to a CSV file
df.to_csv("character_descriptions.csv", index=False)


In [2]:
import pandas as pd
import openai
import subprocess

# read the previous file with the generated data 
df = pd.read_csv("character_descriptions.csv")

# now we are using the parameters of the prompt (subprompt) as final prompt for the training. 
prepared_data = df.loc[:,['subprompt','response']]
prepared_data.rename(columns={'subprompt':'prompt', 'response':'completion'}, inplace=True)
prepared_data.to_csv('prepared_data.csv',index=False)

## prepared_data.csv --> prepared_data_prepared.json
subprocess.run('openai tools fine_tunes.prepare_data --file prepared_data.csv --quiet'.split())

## Start fine-tuning
subprocess.run('openai api fine_tunes.create --training_file prepared_data_prepared.jsonl --model davinci --suffix "SuperHero"'.split())

CompletedProcess(args=['openai', 'api', 'fine_tunes.create', '--training_file', 'prepared_data_prepared.jsonl', '--model', 'davinci', '--suffix', '"SuperHero"'], returncode=0)

In [4]:
create_args = {
	"training_file": "file-2sCcE5tGNelmNGrMElshJlMw",
	"validation_file": "file-YdohajIDRBOTOZn0wRphiJ8z",
	"model": "davinci",
	"n_epochs": 15,
	"batch_size": 3,
	"learning_rate_multiplier": 0.3
}

response = openai.FineTune.create(**create_args)
job_id = response["id"]
status = response["status"]

print(f'Fine-tunning model with jobID: {job_id}.')
print(f"Training Response: {response}")
print(f"Training Status: {status}")

Fine-tunning model with jobID: ft-Xx7IYCjGUxBDecEQCTs8oV2n.
Training Response: {
  "object": "fine-tune",
  "id": "ft-Xx7IYCjGUxBDecEQCTs8oV2n",
  "hyperparams": {
    "n_epochs": 15,
    "batch_size": 3,
    "prompt_loss_weight": 0.01,
    "learning_rate_multiplier": 0.3
  },
  "organization_id": "org-6eT84cKNSkcX5WqEyFAc5rPD",
  "model": "davinci",
  "training_files": [
    {
      "object": "file",
      "id": "file-2sCcE5tGNelmNGrMElshJlMw",
      "purpose": "fine-tune",
      "filename": "file",
      "bytes": 1356,
      "created_at": 1696240295,
      "status": "processed",
      "status_details": null
    }
  ],
  "validation_files": [
    {
      "object": "file",
      "id": "file-YdohajIDRBOTOZn0wRphiJ8z",
      "purpose": "fine-tune",
      "filename": "file",
      "bytes": 1044,
      "created_at": 1696240296,
      "status": "processed",
      "status_details": null
    }
  ],
  "result_files": [],
  "created_at": 1696414011,
  "updated_at": 1696414011,
  "status": "pend

In [5]:
# This pending status does not provide any relevant information.
# However, we can have more insight into the training process by running the following code:

import signal
import datetime

def signal_handler(sig, frame):
    status = openai.FineTune.retrieve(job_id).status
    print(f"Stream interrupted. Job is still {status}.")
    return

print(f'Streaming events for the fine-tuning job: {job_id}')
signal.signal(signal.SIGINT, signal_handler)

events = openai.FineTune.stream_events(job_id)
try:
    for event in events:
        print(f'{datetime.datetime.fromtimestamp(event["created_at"])} {event["message"]}')

except Exception:
    print("Stream interrupted (client disconnected).")


Streaming events for the fine-tuning job: ft-Xx7IYCjGUxBDecEQCTs8oV2n
2023-10-04 12:06:51 Created fine-tune: ft-Xx7IYCjGUxBDecEQCTs8oV2n
Stream interrupted (client disconnected).


In [18]:
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

response = openai.Completion.create(
  model="davinci:ft-hal149:superhero-2023-10-04-09-56-47",
  prompt="18, man, inmortality ->\n",
  temperature=0.7,
  max_tokens=500,
  top_p=1,
  frequency_penalty=0,
  presence_penalty=0,
  stop=["END"]
)

text = response["choices"][0]["text"]
print(text)


He has a lean and athletic build, his skin is a deep bronze and his eyes are an intense sapphire blue. His hair is dark and his face is covered in a short but distinct stubble. He has a confident and charismatic presence and can often be seen wearing a large and warm smile on his face. He wears mostly plain clothing - a simple t-shirt and jeans on most occasions, as well as a leather jacket on cooler days. He has a distinct style that is effortlessly cool and draws the attention of everyone around him. He moves with confidence and poise and his strength is limitless. He is the perfect example of immortality - he never gets sick and can heal his wounds at will. He can also control the elements and the weather to a certain extent. He has the powers of super strength and endurance, high levels of endurance and an accelerated healing factor. His senses are stronger than a normal human's, he can read people's emotions and 
