In [7]:
import os
import pandas as pd
import json
from openai import OpenAI
import openai
from dotenv import load_dotenv

# Load environment variables from .env 
load_dotenv()

# Access the API key from environment variables
api_key = os.getenv("OPENAI_API_KEY")
# Set the OpenAI API key
os.environ['OPENAI_API_KEY'] = api_key
#API Key to OpenAI Library
openai.api_key = os.getenv("OPENAI_API_KEY")

In [8]:
train_df = pd.read_parquet("hf://datasets/kuroschin/airline-customer-support/data/train-00000-of-00001.parquet")

In [9]:
output_file = 'training.jsonl'
with open(output_file, 'w') as f:
    for _, row in train_df.iterrows():
        json_line = json.dumps({
        "messages": [
            {"role": "system", "content": "You are a helpful assistant as FAQ support."},
            {"role": "user", "content": row["input"]},
            {"role": "assistant", "content": row["output"]}
            ]
        })
        f.write(json_line + '\n')

print(f"Dataset converted and saved to {output_file}")

Dataset converted and saved to training.jsonl


In [22]:
client = OpenAI()

In [23]:
uploaded_file = client.files.create(
    file=open(output_file, "rb"),
    purpose="fine-tune"
)
print(f"File uploaded successfully. File ID: {uploaded_file.id}")

File uploaded successfully. File ID: file-Vk9kQcqp2CVEG2zMVQxsJw


In [24]:
fine_tune_job = client.fine_tuning.jobs.create(
    training_file=uploaded_file.id,
    suffix="custom-fine-tuned-model",
    model="gpt-4o-mini-2024-07-18" 
)
print(f"Fine-tuning job started. Job ID: {fine_tune_job.id}")

Fine-tuning job started. Job ID: ftjob-DhFZltTShabcnh4BaIeWMXmC


## List of fine-tuning jobs, get job details

In [25]:
# List fine-tuning jobs
jobs = client.fine_tuning.jobs.list(limit=10)
print("Recent fine-tuning jobs:", jobs)

Recent fine-tuning jobs: SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-DhFZltTShabcnh4BaIeWMXmC', created_at=1735446440, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-irqPgDHDyaL9rgQQ62IoNn4R', result_files=[], seed=1744134386, status='validating_files', trained_tokens=None, training_file='file-Vk9kQcqp2CVEG2zMVQxsJw', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto')), type='supervised'), user_provided_suffix='custom-fine-tuned-model'), FineTuningJob(id='ftjob-3efBSbA5ik0ApGCSXaXxwEON', created_at=1735446078, error=Error(code=None, message=None, param=None), fine_tuned_mo

## Get job details 



In [26]:
# Retrieve job details
job_details = client.fine_tuning.jobs.retrieve(fine_tune_job.id)
print("Fine-tuning job details:", job_details)

Fine-tuning job details: FineTuningJob(id='ftjob-DhFZltTShabcnh4BaIeWMXmC', created_at=1735446440, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-irqPgDHDyaL9rgQQ62IoNn4R', result_files=[], seed=1744134386, status='validating_files', trained_tokens=None, training_file='file-Vk9kQcqp2CVEG2zMVQxsJw', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size='auto', learning_rate_multiplier='auto', n_epochs='auto')), type='supervised'), user_provided_suffix='custom-fine-tuned-model')


In [28]:
# Retrieve after model is trained to get the model name
job_details = client.fine_tuning.jobs.retrieve(fine_tune_job.id)
print("Fine-tuning job details:", job_details)

Fine-tuning job details: FineTuningJob(id='ftjob-DhFZltTShabcnh4BaIeWMXmC', created_at=1735446440, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=3), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-irqPgDHDyaL9rgQQ62IoNn4R', result_files=[], seed=1744134386, status='running', trained_tokens=None, training_file='file-Vk9kQcqp2CVEG2zMVQxsJw', validation_file=None, estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=1, learning_rate_multiplier=1.8, n_epochs=3)), type='supervised'), user_provided_suffix='custom-fine-tuned-model')


In [35]:
completion = client.chat.completions.create(
    model="ft:gpt-4o-mini-2024-07-18:llmprojects:custom-fine-tuned-model:AjfIKSw3",
    # Replace with the actual model name retrieved in above cell fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal:custom-fine-tuned-model:AbOWr1n9'
    messages=[
        {"role": "system", "content": "You are a helpful assistant which acts as FAQ Support Assistant."},
        {"role": "user", "content": "How do I check in online?"}
    ]
)
print("Fine-tuned model response:", completion.choices[0].message.content)

Fine-tuned model response: To check in online, visit our website or mobile app and enter your booking reference and last name. Follow the prompts to complete the check-in process.
