# **Fine-Tuning a ChatGPT Model**

In [2]:
pip install openai

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openai
  Downloading openai-0.27.4-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.3/70.3 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
Collecting multidict<7.0,>=4.5
  Downloading multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting yarl<2.0,>=1.0
  Downloading yarl-1.8.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (264 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 kB[0m [31m26.3 MB/s[0m eta [3

In [25]:
#Import necessary libraries
import openai
import pandas as pd
import subprocess

In [15]:
#Adding the API key
openai.api_key = "XXXXX"

**1- Creation of a synthetic set of data for fine-tuning**

In [19]:
#Define all the parameters we need to create our synthetic dataset and define a pandas dataframe
l_age = ['18', '20', '30', '40', '50', '60', '90']
l_gender = ['man', 'woman']
l_power = ['invisibility', 'read in the thoughts', 'turning lead into gold', 'immortality', 'telepathy', 'teleport', 'flight'] 

f_prompt = "Imagine a complete and detailed description of a {age}-year-old {gender} fictional character who has the superpower of {power}. Write out the entire description in a maximum of 100 words in great detail:"
f_sub_prompt = "{age}, {gender}, {power}"

df = pd.DataFrame()

In [22]:
#For each element on the lists of parameters, we demand to create a description of the given superhero

#We iterate over different values of age, gender, and power.
for age in l_age:
 for gender in l_gender:
  for power in l_power:
   for i in range(3): ## 3 times each
    prompt = f_prompt.format(age=age, gender=gender, power=power)
    sub_prompt = f_sub_prompt.format(age=age, gender=gender, power=power)
    print(sub_prompt)

    #We use the openai.Completion.create function to ask GPT to generate a response to our prompt
    response = openai.Completion.create(
     model="text-ada-001",
     prompt=prompt,
     temperature=1,
     max_tokens=290,
     top_p=1,
     frequency_penalty=0,
     presence_penalty=0
    )
    
    finish_reason = response['choices'][0]['finish_reason']
    response_txt = response['choices'][0]['text']
    
    new_row = {
      'age':age, 
      'gender':gender, 
      'power':power, 
      'prompt':prompt, 
      'sub_prompt':sub_prompt, 
      'response_txt':response_txt, 
      'finish_reason':finish_reason}
    new_row = pd.DataFrame([new_row])
    df = pd.concat([df, new_row], axis=0, ignore_index=True)


18, man, invisibility
18, man, invisibility
18, man, invisibility
18, man, read in the thoughts
18, man, read in the thoughts
18, man, read in the thoughts
18, man, turning lead into gold
18, man, turning lead into gold
18, man, turning lead into gold
18, man, immortality
18, man, immortality
18, man, immortality
18, man, telepathy
18, man, telepathy
18, man, telepathy
18, man, teleport
18, man, teleport
18, man, teleport
18, man, flight
18, man, flight
18, man, flight
18, woman, invisibility
18, woman, invisibility
18, woman, invisibility
18, woman, read in the thoughts
18, woman, read in the thoughts
18, woman, read in the thoughts
18, woman, turning lead into gold
18, woman, turning lead into gold
18, woman, turning lead into gold
18, woman, immortality
18, woman, immortality
18, woman, immortality
18, woman, telepathy
18, woman, telepathy
18, woman, telepathy
18, woman, teleport
18, woman, teleport
18, woman, teleport
18, woman, flight
18, woman, flight
18, woman, flight
20, man, i

In [23]:
#Convert the pandas dataframe to csv file to store data
df.to_csv("out_openai_completion.csv")

**2- Fine-tuning the GPT model**

In [27]:
#Read the csv file previously created
df = pd.read_csv("out_openai_completion.csv")

#Extract the colons and then rename them to prompt and completion, respectively.
prepared_data = df.loc[:,['sub_prompt','response_txt']]
prepared_data.rename(columns={'sub_prompt':'prompt', 'response_txt':'completion'}, inplace=True)

#Store the resulting dataframe in a new file prepared_data.csv
prepared_data.to_csv('prepared_data.csv',index=False)


## prepared_data.csv --> prepared_data_prepared.json
subprocess.run('openai tools fine_tunes.prepare_data --file prepared_data.csv --quiet'.split())

## Start fine-tuning
subprocess.run('openai api fine_tunes.create --training_file prepared_data_prepared.jsonl --model ada --suffix "kplr"'.split())

CompletedProcess(args=['openai', 'api', 'fine_tunes.create', '--training_file', 'prepared_data_prepared.jsonl', '--model', 'ada', '--suffix', '"kplr"'], returncode=1)