# Open AI Fine Tuning

pip commands needed to run this notebook:

In [None]:
!pip3 install openai
!pip3 install python-dotenv

Importing Libraries

In [None]:
import json
import openai
import datetime
import sys
import os

## Open AI / ChatGPT

Make sure to get an OpenAI key from https://platform.openai.com/account/api-keys

Create a ".env" file and put your OpenAI key in that file
```bash
OPENAI_API_KEY='your_key_here'
```

In [None]:
# get keys from .env file (need to create .env file and put key in there.

from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

## Step 1: Fine Tuning File
- Your fine tuning file must in the the format of JSON Lines (jsonl) 
- Sample file is provided (data.jsonl) will be used as the training data
- We will validate the file before uploading to openai

In [None]:
def validate_file(filename):
    try:
        with open(filename, 'r') as file:
            lines = file.readlines()
            the_file = [json.loads(line) for line in lines]
            return True
    except Exception as e:
        print("Error reading file, invalid format: ", e)
        return False

In [None]:
is_file_valid = validate_file("data.jsonl")
if is_file_valid == False:
    print("File is not valid")
    sys.exit()

print("\nFile is valid...\n")

## Step 2: Upload File to OpenAI
- This will upload the file data.jsonl to OpenAI
- This will take a few minutes to complete
- Output will be a json object with the id of the uploaded file

In [None]:
ft_file = openai.File.create(file=open("data.jsonl", "rb"), purpose='fine-tune')
print(ft_file)

## Step 3: File Validation at OpenAI
- You should check the status of your fine tuning file by running the following command
- You will not be able to proceed to the next step until the status is "processed"

In [None]:
def pretty_table_s3(f):
    print(f"\n{'ID':<33} {'Purpose':<20} {'Status':<12} {'Created At'}")
    print('-' * 88)
    for file in f['data']:
        created_at = datetime.datetime.fromtimestamp(file['created_at']).strftime('%Y-%m-%d %H:%M:%S')
        print(f"{file['id']:<33} {file['purpose']:<20} {file['status']:<12} {created_at}")

In [None]:
file_list = openai.File.list(limit=25)
# print(file_list)
pretty_table_s3(file_list)

## Step 4: Create job for Fine Tuned Model at OpenAI
- Using the ID from step 2 or 3, update the "TRAINING_FILE_ID" variable in step4_create_finetuned_model.py
- Run the following command to create your fine tuned model

In [None]:
# check status of file
file_status = openai.File.retrieve(ft_file["id"])
TRAINING_FILE_ID = None
if file_status["status"] == "processed":
    print(f"\nFile processed:")
    TRAINING_FILE_ID = ft_file["id"]
    print("Training file id: ", TRAINING_FILE_ID)
else:
    print(f"\nHOLD...Still waiting on STATUS: {file_status['status']}\n")

- We will now use the training file id to create a fine tuned model
- This could take some time based on where you are in the queue anywhere from 5 minutes to 20 minutes


In [None]:
if TRAINING_FILE_ID:
    ft_job = openai.FineTuningJob.create(training_file=TRAINING_FILE_ID, model="gpt-3.5-turbo-0613") 
    print(ft_job)  

## Step 5: Model Validation at OpenAI
- You should check the status of your fine tuned model by running the following command
- You will not be able to proceed to the next step until the status is "succeeded" and Fine Tuned Model has a value.
- You will also receive an email from OpenAI when the model is ready or run the following command to check the status

In [None]:
def pretty_table_s5(f):
    print(f"\n{'ID':<33} {'Created At':<22} {'Finished At':<22} {'Status':<13} {'Fine Tuned Model'} ")
    print('-' * 140)
    for job in f['data']:
        created_at = datetime.datetime.fromtimestamp(job['created_at']).strftime('%Y-%m-%d %H:%M:%S')
        finished_at = ""
        if job['finished_at']:
            finished_at = datetime.datetime.fromtimestamp(job['finished_at']).strftime('%Y-%m-%d %H:%M:%S')

        print(f"{job['id']:<33} {created_at:<22} {finished_at:<22} {job['status']:<13} {job['fine_tuned_model']} ")

In [None]:
job_list = openai.FineTuningJob.list(limit=25)
# print(job_list)
pretty_table_s5(job_list)

In [None]:
fine_tuning_job = ft_job["id"]
print("fine_tuning_job = " + fine_tuning_job)
model_status = openai.FineTuningJob.retrieve(fine_tuning_job)
model_id = None
if model_status["status"] == "succeeded":
    model_id = model_status["fine_tuned_model"]
    print(f"\nFinally... Model created, Model ID: {model_id}")
else:
    print("\n***** DO NOT PROCEED YET *****\nStill waiting for fine tuned model... We need that ID")



## Step 6: Test New Model
- Using the Fine Tuned Model ID from step 4 or 5, update the "FINE_TUNED_MODEL_ID" variable in step6_test_finetuned_model.py
- Run the following command to test your new model
- You will need to use the FINE_TUNED_MODEL_ID with the one you got from the previous step.

In [None]:
if model_id:
    FINE_TUNED_MODEL_ID = model_id
    print("FINE_TUNED_MODEL_ID = " + FINE_TUNED_MODEL_ID)
    completion = openai.ChatCompletion.create(
        model=FINE_TUNED_MODEL_ID,
        temperature=0.0,
        messages=[
            {"role": "system", "content": "You are a helpful and professional customer service representative"},
            {"role": "user", "content": "dude, i need to mail you a check, what is your address?"},
        ]
    )
    
    print(completion.choices[0].message)
else:
    print("fine tuned model is not ready... need to run previous step until model is ready... Need that model id ")