In [1]:
import json
import tiktoken
import numpy as np
from collections import defaultdict

encoding = tiktoken.get_encoding("o200k_base")  # default encoding for gpt-4o models. This requires the latest version of tiktoken to be installed.

def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3
    return num_tokens

def num_assistant_tokens_from_messages(messages):
    num_tokens = 0
    for message in messages:
        if message["role"] == "assistant":
            num_tokens += len(encoding.encode(message["content"]))
    return num_tokens

def print_distribution(values, name):
    print(f"\n#### Distribution of {name}:")
    print(f"min / max: {min(values)}, {max(values)}")
    print(f"mean / median: {np.mean(values)}, {np.median(values)}")
    print(f"p10 / p90: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}")

files = ['data/training_set.jsonl', 'data/validation_set.jsonl']

for file in files:
    print(f"Processing file: {file}")
    with open(file, 'r', encoding='utf-8') as f:
        total_tokens = []
        assistant_tokens = []
        
        lines_chunk = []
        for line_count, line in enumerate(f, start=1):
            lines_chunk.append(line)
            if line_count % 5 == 0:
                for json_line in lines_chunk:
                    try:
                        ex = json.loads(json_line)
                        messages = ex.get("messages", {})
                        total_tokens.append(num_tokens_from_messages(messages))
                        assistant_tokens.append(num_assistant_tokens_from_messages(messages))
                    except json.JSONDecodeError as e:
                        print(f"Skipping line due to JSON decode error: {e}")
                lines_chunk = []

        # Process remaining lines
        if lines_chunk:
            for json_line in lines_chunk:
                try:
                    ex = json.loads(json_line)
                    messages = ex.get("messages", {})
                    total_tokens.append(num_tokens_from_messages(messages))
                    assistant_tokens.append(num_assistant_tokens_from_messages(messages))
                except json.JSONDecodeError as e:
                    print(f"Skipping line due to JSON decode error: {e}")

    print_distribution(total_tokens, "total tokens")
    print_distribution(assistant_tokens, "assistant tokens")
    print('*' * 50)

Processing file: data/training_set.jsonl

#### Distribution of total tokens:
min / max: 27, 460
mean / median: 71.13599337524505, 66.0
p10 / p90: 47.0, 100.0

#### Distribution of assistant tokens:
min / max: 0, 416
mean / median: 35.55436693030487, 31.0
p10 / p90: 14.0, 63.0
**************************************************
Processing file: data/validation_set.jsonl

#### Distribution of total tokens:
min / max: 32, 325
mean / median: 70.9818169528187, 66.0
p10 / p90: 47.0, 100.0

#### Distribution of assistant tokens:
min / max: 0, 276
mean / median: 35.40178450723266, 31.0
p10 / p90: 14.0, 62.0
**************************************************


In [2]:
import json

# Function to load a dataset with error handling for invalid JSON
def load_dataset(file_path):
    dataset = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            try:
                dataset.append(json.loads(line))
            except json.JSONDecodeError as e:
                print(f"Skipping line due to JSON decode error: {e}")
    return dataset

# Load the training set
training_dataset = load_dataset('data/training_set.jsonl')

# Training dataset stats
print("Number of examples in training set:", len(training_dataset))
print("First example in training set:")
for message in training_dataset[0]["messages"]:
    print(message)

# Load the validation set
validation_dataset = load_dataset('data/validation_set.jsonl')

# Validation dataset stats
print("\nNumber of examples in validation set:", len(validation_dataset))
print("First example in validation set:")
for message in validation_dataset[0]["messages"]:
    print(message)

Number of examples in training set: 59172
First example in training set:
{'role': 'system', 'content': 'You are an expert in CFA finance.'}
{'role': 'user', 'content': 'Explain the reclassification of investment property under U.S. GAAP.'}
{'role': 'assistant', 'content': 'Under U.S. GAAP, investment property can be reclassified as owner-occupied property.'}

Number of examples in validation set: 14794
First example in validation set:
{'role': 'system', 'content': 'You are an expert in CFA finance.'}
{'role': 'user', 'content': 'What were the interest coverage ratios for Nokia and Ericsson in 2008?'}
{'role': 'assistant', 'content': 'The interest coverage ratios for 2008 were: - 32.0 for Nokia - 9.6 for Ericsson.'}


In [3]:
# Upload fine-tuning files

import os
from openai import AzureOpenAI

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),
  api_version = "2024-08-01-preview"  # This API version or later is required to access seed/events/checkpoint features
)

print("Client created")

training_file_name = 'data/training_set.jsonl'
validation_file_name = 'data/validation_set.jsonl'

# Upload the training and validation dataset files to Azure OpenAI with the SDK.

training_response = client.files.create(
    file = open(training_file_name, "rb"), purpose="fine-tune"
)
training_file_id = training_response.id

validation_response = client.files.create(
    file = open(validation_file_name, "rb"), purpose="fine-tune"
)
validation_file_id = validation_response.id

print("Training file ID:", training_file_id)
print("Validation file ID:", validation_file_id)

Client created
Training file ID: file-f74cc3b5bbee4a0ab35f8839488bc8be
Validation file ID: file-21f6a6ae0d3c47728eb9e0b66338335a


In [None]:
# client.files.delete(training_file_id)
# client.files.delete(validation_file_id)

''

In [4]:
# Submit fine-tuning training job

response = client.fine_tuning.jobs.create(
    training_file = training_file_id,
    validation_file = validation_file_id,
    model = "gpt-4o-2024-08-06", # Enter base model name. Note that in Azure OpenAI the model name contains dashes and cannot contain dot/period characters.
    seed = 105 # seed parameter controls reproducibility of the fine-tuning job. If no seed is specified one will be generated automatically.
)   

job_id = response.id

# You can use the job ID to monitor the status of the fine-tuning job.
# The fine-tuning job will take some time to start and complete.

print("Job ID:", response.id)
print("Status:", response.status)
print(response.model_dump_json(indent=2))

Job ID: ftjob-b84598f8029240b487e6941abdb166c5
Status: pending
{
  "id": "ftjob-b84598f8029240b487e6941abdb166c5",
  "created_at": 1734585249,
  "error": null,
  "fine_tuned_model": null,
  "finished_at": null,
  "hyperparameters": {
    "n_epochs": -1,
    "batch_size": -1,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-4o-2024-08-06",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": null,
  "seed": 105,
  "status": "pending",
  "trained_tokens": null,
  "training_file": "file-f74cc3b5bbee4a0ab35f8839488bc8be",
  "validation_file": "file-21f6a6ae0d3c47728eb9e0b66338335a",
  "estimated_finish": 1734588823,
  "integrations": null
}


In [5]:
# Track training status

from IPython.display import clear_output
import time

start_time = time.time()

# Get the status of our fine-tuning job.
response = client.fine_tuning.jobs.retrieve(job_id)

status = response.status

# If the job isn't done yet, poll it every 10 seconds.
while status not in ["succeeded", "failed"]:
    time.sleep(10)

    response = client.fine_tuning.jobs.retrieve(job_id)
    print(response.model_dump_json(indent=2))
    print("Elapsed time: {} minutes {} seconds".format(int((time.time() - start_time) // 60), int((time.time() - start_time) % 60)))
    status = response.status
    print(f'Status: {status}')
    clear_output(wait=True)

print(f'Fine-tuning job {job_id} finished with status: {status}')

# List all fine-tuning jobs for this resource.
print('Checking other fine-tune jobs for this resource.')
response = client.fine_tuning.jobs.list()
print(f'Found {len(response.data)} fine-tune jobs.')

Fine-tuning job ftjob-b84598f8029240b487e6941abdb166c5 finished with status: succeeded
Checking other fine-tune jobs for this resource.
Found 5 fine-tune jobs.


In [6]:
response = client.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10)
print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "ftevent-132a09435b2a4372b8bdcbd45c584857",
      "created_at": 1734593846,
      "level": "info",
      "message": "Training tokens billed: 4091000",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-5a3fb768724c46db90e782361fd2169d",
      "created_at": 1734593846,
      "level": "info",
      "message": "Model Evaluation Passed.",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-05fdca4a14b44d87891927e09045054a",
      "created_at": 1734593846,
      "level": "info",
      "message": "Completed results file: file-b28d43822a0e4ba5b615d8e35a7f5d8b",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
      "id": "ftevent-24503b4e1abc412c809f15be29d4ddb4",
      "created_at": 1734593840,
      "level": "info",
      "message": "Postprocessing started.",
      "object": "fine_tuning.job.event",
      "type": "message"
    },
    {
 

In [7]:
response = client.fine_tuning.jobs.checkpoints.list(job_id)
print(response.model_dump_json(indent=2))

{
  "data": [
    {
      "id": "ftchkpt-fad71355c583415c86f0d6a22b17dd70",
      "created_at": 1734593403,
      "fine_tuned_model_checkpoint": "gpt-4o-2024-08-06.ft-b84598f8029240b487e6941abdb166c5",
      "fine_tuning_job_id": "ftjob-b84598f8029240b487e6941abdb166c5",
      "metrics": {
        "full_valid_loss": null,
        "full_valid_mean_token_accuracy": null,
        "step": 1518.0,
        "train_loss": 0.6745920181274414,
        "train_mean_token_accuracy": 0.8134920597076416,
        "valid_loss": 0.8711808813680516,
        "valid_mean_token_accuracy": 0.7512376237623762
      },
      "object": "fine_tuning.job.checkpoint",
      "step_number": 1518
    },
    {
      "id": "ftchkpt-8545e078ee1d441bb7cc67941772f96e",
      "created_at": 1734593274,
      "fine_tuned_model_checkpoint": "gpt-4o-2024-08-06.ft-b84598f8029240b487e6941abdb166c5:ckpt-step-1517",
      "fine_tuning_job_id": "ftjob-b84598f8029240b487e6941abdb166c5",
      "metrics": {
        "full_valid_loss": 

In [None]:
# Retrieve fine_tuned_model name

response = client.fine_tuning.jobs.retrieve(job_id)

print(response.model_dump_json(indent=2))
fine_tuned_model = response.fine_tuned_model

{
  "id": "ftjob-b84598f8029240b487e6941abdb166c5",
  "created_at": 1734585249,
  "error": null,
  "fine_tuned_model": "gpt-4o-2024-08-06.ft-b84598f8029240b487e6941abdb166c5",
  "finished_at": 1734593846,
  "hyperparameters": {
    "n_epochs": 1,
    "batch_size": 39,
    "learning_rate_multiplier": 1
  },
  "model": "gpt-4o-2024-08-06",
  "object": "fine_tuning.job",
  "organization_id": null,
  "result_files": [
    "file-b28d43822a0e4ba5b615d8e35a7f5d8b"
  ],
  "seed": 105,
  "status": "succeeded",
  "trained_tokens": 5943754,
  "training_file": "file-f74cc3b5bbee4a0ab35f8839488bc8be",
  "validation_file": "file-21f6a6ae0d3c47728eb9e0b66338335a",
  "estimated_finish": 1734588823,
  "integrations": null
}


In [9]:
# Deploy fine-tuned model

import json
import requests

token = os.getenv("TEMP_AUTH_TOKEN", "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6InoxcnNZSEhKOS04bWdndDRIc1p1OEJLa0JQdyIsImtpZCI6InoxcnNZSEhKOS04bWdndDRIc1p1OEJLa0JQdyJ9.eyJhdWQiOiJodHRwczovL21hbmFnZW1lbnQuY29yZS53aW5kb3dzLm5ldC8iLCJpc3MiOiJodHRwczovL3N0cy53aW5kb3dzLm5ldC8wNDhhZDkwZC0yZjFlLTQ0OWYtOTIwZS0zMDdhYmNjMDZlMWQvIiwiaWF0IjoxNzM0OTI0NTU5LCJuYmYiOjE3MzQ5MjQ1NTksImV4cCI6MTczNDkyOTAzNiwiYWNyIjoiMSIsImFpbyI6IkFZUUFlLzhZQUFBQXc3UWpOeEl1MExKRitzVDdhWVBaNzhDNGZaMUMxMmJtcW8rWE40NndxdERyNzBMcHlER0ZVMnNnUERBV05qVzN2bFlNaTJPVlVwVXNJRE15TkhoR3hRcEtoZ05DYVp1TzJ4V0pla0NWQXJUWjhkWFp6dktZWXZib3NDcDJXTnF5U3VnNnQ3eW91SUlTSGQ5WDlCVFJnT0J0NldmT2RtZmdiZVc4bWFpci95VT0iLCJhbHRzZWNpZCI6IjE6bGl2ZS5jb206MDAwM0JGRkQxMEMwQkIyMSIsImFtciI6WyJwd2QiLCJtZmEiXSwiYXBwaWQiOiJiNjc3YzI5MC1jZjRiLTRhOGUtYTYwZS05MWJhNjUwYTRhYmUiLCJhcHBpZGFjciI6IjAiLCJlbWFpbCI6ImtpcmtqYWFAZ21haWwuY29tIiwiZmFtaWx5X25hbWUiOiJQYXRodW1hbnVuIiwiZ2l2ZW5fbmFtZSI6IktpcmsiLCJncm91cHMiOlsiMDNmMjU1YTMtNzc2NC00YzQyLTk5ZDYtOGQ4NjZhZTQzOGZhIl0sImlkcCI6ImxpdmUuY29tIiwiaWR0eXAiOiJ1c2VyIiwiaXBhZGRyIjoiMTcyLjE4OC44OC4xNTUiLCJuYW1lIjoiS2lyayBQYXRodW1hbnVuIiwib2lkIjoiN2FhOTIwNjUtM2U0NC00MWY3LThmMzctZTAwNGNlZDc4MWE1IiwicHVpZCI6IjEwMDMyMDAyOEM3OEE4OEQiLCJyaCI6IjEuQVVvQURkbUtCQjR2bjBTU0RqQjZ2TUJ1SFVaSWYza0F1dGRQdWtQYXdmajJNQk11QVJSS0FBLiIsInNjcCI6InVzZXJfaW1wZXJzb25hdGlvbiIsInN1YiI6Ik5JWDdRNHpySFRQUER3bExkb25fWU5UeUJlek1xRkdjUF9JczlFV1dHVGMiLCJ0aWQiOiIwNDhhZDkwZC0yZjFlLTQ0OWYtOTIwZS0zMDdhYmNjMDZlMWQiLCJ1bmlxdWVfbmFtZSI6ImxpdmUuY29tI2tpcmtqYWFAZ21haWwuY29tIiwidXRpIjoiOTVLWnNPMkF3ay02bWlOTmNPM1VBQSIsInZlciI6IjEuMCIsIndpZHMiOlsiNjJlOTAzOTQtNjlmNS00MjM3LTkxOTAtMDEyMTc3MTQ1ZTEwIiwiYjc5ZmJmNGQtM2VmOS00Njg5LTgxNDMtNzZiMTk0ZTg1NTA5Il0sInhtc19lZG92Ijp0cnVlLCJ4bXNfaWRyZWwiOiIxOCAxIiwieG1zX3RjZHQiOjE2ODAwMTg1Mzl9.H3qSIFbGxEzv5Yn2-FSwc8CLmQxTO4MyftpJdEf1RdSElMtUzQ4yVIxQL-WZaEIL2SSo3Ze5YkhMNVK0GdOjgCDOy7AWrTfkqVt-9d6cH0MQPzrBsoon1xGgIQquc7m-ZRzsCsIBuK8xXiFnqsafIBMd_hU_c4r-Ciyg1VM9hy5NO8uGw6BaTNgIgRdPoVUc1KkSb-2d7ejM59m0M0qml8imw0jcc1AXDzcIz4mIqvTF7ecdkYGkbMkgC2uwZZ5ss0XL0kUdD8B2TUdBA4T7GkxdrKOleZdmYRsOmbrt2rLJ85b9VZfoEuoDJO2tZslz8gvRIlgXc35uB34qMltOpQ") 
subscription = "c26b1de8-8139-4208-9a48-3cbe4fb007c4"
resource_group = "finetune-model-test"
resource_name = "finetune-model-test"
model_deployment_name = "gpt-4o-mini-2024-07-18-ft-test" # Custom deployment name you chose for your fine-tuning model

deploy_params = {'api-version': "2024-10-01"}
deploy_headers = {'Authorization': 'Bearer {}'.format(token), 'Content-Type': 'application/json'}

deploy_data = {
    "sku": {"name": "standard", "capacity": 1},
    "properties": {
        "model": {
            "format": "OpenAI",
            "name": "gpt-4o-2024-08-06.ft-b84598f8029240b487e6941abdb166c5", #retrieve this value from the previous call, it will look like gpt-4o-mini-2024-07-18.ft-0e208cf33a6a466994aff31a08aba678
            "version": "1"
        }
    }
}
deploy_data = json.dumps(deploy_data)

request_url = f'https://management.azure.com/subscriptions/{subscription}/resourceGroups/{resource_group}/providers/Microsoft.CognitiveServices/accounts/{resource_name}/deployments/{model_deployment_name}'

print('Creating a new deployment...')

r = requests.put(request_url, params=deploy_params, headers=deploy_headers, data=deploy_data)

print(r)
print(r.reason)
print(r.json())

Creating a new deployment...
<Response [201]>
Created
{'id': '/subscriptions/c26b1de8-8139-4208-9a48-3cbe4fb007c4/resourceGroups/finetune-model-test/providers/Microsoft.CognitiveServices/accounts/finetune-model-test/deployments/gpt-4o-mini-2024-07-18-ft-test', 'type': 'Microsoft.CognitiveServices/accounts/deployments', 'name': 'gpt-4o-mini-2024-07-18-ft-test', 'sku': {'name': 'standard', 'capacity': 1}, 'properties': {'model': {'format': 'OpenAI', 'name': 'gpt-4o-2024-08-06.ft-b84598f8029240b487e6941abdb166c5', 'version': '1'}, 'versionUpgradeOption': 'NoAutoUpgrade', 'currentCapacity': 1, 'capabilities': {'area': 'US', 'chatCompletion': 'true', 'jsonSchemaResponse': 'true', 'maxContextToken': '128000', 'maxOutputToken': '16384'}, 'provisioningState': 'Creating'}, 'systemData': {'createdBy': 'kirkjaa@gmail.com', 'createdByType': 'User', 'createdAt': '2024-12-23T03:35:49.8456223Z', 'lastModifiedBy': 'kirkjaa@gmail.com', 'lastModifiedByType': 'User', 'lastModifiedAt': '2024-12-23T03:35:4

In [16]:
# Use the deployed customized model

import os
from openai import AzureOpenAI

client = AzureOpenAI(
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT"),
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),
  api_version = "2024-06-01"
)

response = client.chat.completions.create(
    model = "gpt-4o-mini-2024-07-18-ft-test", # model = "Custom deployment name you chose for your fine-tuning model"
    messages = [
        {"role": "system", "content": "You are an expert in CFA finance."},
        {"role": "user", 
         "content": """
          The following information is available for a manufacturing company:
          • Cost of ending inventory computed using FIFO $4.3m
          • Net realizable value $4.1m
          • Current replacement cost $3.8m

          Article continues below
          If the company is using International Financial Reporting Standards (IFRS) instead of US GAAP, its cost of goods sold (in millions) is most likely:

          A. $0.3 higher.

          B. $0.3 lower.

          C. the same.
        """},
    ]
)

print(response.choices[0].message.content)

Under IFRS, inventory is measured at the lower of cost and net realizable value (NRV). Therefore, the inventory value is $4.1 million (NRV), which is lower than the FIFO cost of $4.3 million. The company must write down the inventory by $0.2 million ($4.3 m - $4.1 m). This write-down would increase the cost of goods sold (COGS), making the COGS $0.2 million higher than compared under US GAAP, where the write-down would be to the current replacement cost of $3.8 million. However, since this option is not given in the answers, none of the provided answers (A, B, C) correctly reflect the situation. Hence, a correct answer should state that COGS is $0.2 million higher when using FIFO under IFRS compared to GAAP if both choices were available.
