### Example running fine-tuning on Azure Open AI's SQL query generation problem.
We use the same base dataset sql_examples.jsonl 

Use Azure Region **Sweden Central**

#### Data preparation   

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
#load raw data
sql_raw = pd.read_json("../llama2/data/sql_examples.jsonl", lines=True).to_dict(orient="records") # load raw data from jsonl file
template= '{{"messages": [{{"role": "system", "content": "You are querying the sales database, what is the SQL query for the following input question?"}}, {{"role": "user", "content": "{question}"}}, {{"role": "assistant", "content": "{sql_query}"}}]}}'
#apply the template to the raw data
sql_data = [template.format(question=d["input"], sql_query=d["output"]) for d in sql_raw]
#save the data to a jsonl file
sql_data_train, sql_data_test = train_test_split(sql_data, test_size=0.2, random_state=42) 
with open("../data/sql_examples_training.jsonl", "w") as f:
    for line in sql_data_train:
        f.write(line + "\n")
with open("../data/sql_examples_validation.jsonl", "w") as f:
    for line in sql_data_test:
        f.write(line + "\n")


In [2]:
from openai import AzureOpenAI
from dotenv import load_dotenv

load_dotenv()

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_KEY"),  
  api_version="2024-02-15-preview"
)



In [None]:
file_id = client.files.create(
  file=open("../data/sql_examples_training.jsonl", "rb"),
  purpose="fine-tune"
)

In [7]:

client.fine_tuning.jobs.create(
  training_file=file_id.id, 
  model="gpt-35-turbo-0125"
)

FineTuningJob(id='ftjob-d3e17f61183e4418a1a087daed0ffb85', created_at=1709682399, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=-1, batch_size=-1, learning_rate_multiplier=1), model='gpt-35-turbo-0125', object='fine_tuning.job', organization_id=None, result_files=None, status='pending', trained_tokens=None, training_file='file-3aa6886a393742228166759ba9379136', validation_file=None)

In [3]:
# List 10 fine-tuning jobs
client.fine_tuning.jobs.list(limit=10)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-d3e17f61183e4418a1a087daed0ffb85', created_at=1709682399, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=-1, batch_size=-1, learning_rate_multiplier=1), model='gpt-35-turbo-0125', object='fine_tuning.job', organization_id=None, result_files=None, status='pending', trained_tokens=None, training_file='file-3aa6886a393742228166759ba9379136', validation_file=None), FineTuningJob(id='ftjob-6d9da57bc84a4815a00fce1abfad4c92', created_at=1709682305, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=-1, batch_size=-1, learning_rate_multiplier=1), model='gpt-35-turbo-0613', object='fine_tuning.job', organization_id=None, result_files=None, status='running', trained_tokens=None, training_file='file-e40dc3569422454ba96d86ba5d9cff3f', validation_file='file-c849ad30f0b146ecabe378d615d448e8')], has_more=False, object='list')

In [4]:
# Retrieve the state of a fine-tune
client.fine_tuning.jobs.retrieve("ftjob-6d9da57bc84a4815a00fce1abfad4c92")


FineTuningJob(id='ftjob-6d9da57bc84a4815a00fce1abfad4c92', created_at=1709682305, error=None, fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs=-1, batch_size=-1, learning_rate_multiplier=1), model='gpt-35-turbo-0613', object='fine_tuning.job', organization_id=None, result_files=None, status='running', trained_tokens=None, training_file='file-e40dc3569422454ba96d86ba5d9cff3f', validation_file='file-c849ad30f0b146ecabe378d615d448e8')

In [5]:
# Cancel a job
client.fine_tuning.jobs.cancel("ftjob-6d9da57bc84a4815a00fce1abfad4c92")

FineTuningJob(id='ftjob-6d9da57bc84a4815a00fce1abfad4c92', created_at=1709682305, error=None, fine_tuned_model=None, finished_at=1709683413, hyperparameters=Hyperparameters(n_epochs=-1, batch_size=-1, learning_rate_multiplier=1), model='gpt-35-turbo-0613', object='fine_tuning.job', organization_id=None, result_files=None, status='cancelled', trained_tokens=None, training_file='file-e40dc3569422454ba96d86ba5d9cff3f', validation_file='file-c849ad30f0b146ecabe378d615d448e8')

In [9]:
# List up to 10 events from a fine-tuning job
events = client.fine_tuning.jobs.list_events(fine_tuning_job_id="ftjob-6d9da57bc84a4815a00fce1abfad4c92", limit=10)

In [12]:
for e in events:
    print(e.message)

Training started.
Job started.
Preprocessing started.
Job enqueued. Waiting for jobs ahead to complete.


# Use a fine-tuned model

In [None]:
from openai import AzureOpenAI
from dotenv import load_dotenv

load_dotenv()

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"), 
  api_key=os.getenv("AZURE_OPENAI_KEY"),  
  api_version="2024-02-15-preview"
)

completion = client.chat.completions.create(
  model="ft:gpt-3.5-turbo:my-org:custom_suffix:id",
  messages=[
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Hello!"}
  ]
)
print(completion.choices[0].message)