In [93]:
prompt = "A model that takes in complex questions about advanced mathematics and provides thorough, step-by-step explanations suitable for a student learning the topic."


In [94]:
temperature = 0.5
number_of_examples = 20

In [6]:
from dotenv import load_dotenv
import os
from openai import OpenAI
load_dotenv()

# Load OpenAI API key from environment variable
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI(api_key=api_key)

In [95]:
import os
import openai
import random
from tenacity import retry, stop_after_attempt, wait_exponential


In [96]:
import os
import random
from tenacity import retry, stop_after_attempt, wait_exponential
from openai import OpenAI

# Initialize OpenAI client with API key


N_RETRIES = 3  # Number of retry attempts

@retry(stop=stop_after_attempt(N_RETRIES), wait=wait_exponential(multiplier=1, min=4, max=70))
def generate_example(prompt, prev_examples, temperature=0.5):
    messages = [
        {
            "role": "system",
            "content": (
                "You are generating data which will be used to train a machine learning model.\n\n"
                "You will be given a high-level description of the model we want to train, and from that, you will generate data samples, "
                "each with a prompt/response pair.\n\n"
                "You will do so in this format:\n"
                "```\nprompt\n-----------\nresponse_goes_here\n-----------\n```\n\n"
                "Only one prompt/response pair should be generated per turn.\n\n"
                "For each turn, make the example slightly more complex than the last, while ensuring diversity.\n\n"
                "Make sure your samples are unique and diverse, yet high-quality and complex enough to train a well-performing model.\n\n"
                f"Here is the type of model we want to train:\n`{prompt}`"
            ),
        }
    ]

    # Add previous examples as context if available
    if prev_examples:
        if len(prev_examples) > 8:
            prev_examples = random.sample(prev_examples, 8)  # Limit to 8 for diversity
        for example in prev_examples:
            messages.append({"role": "assistant", "content": example})

    # Generate completion with streaming enabled
    stream = client.chat.completions.create(
        model="gpt-4",
        messages=messages,
        temperature=temperature,
        max_tokens=1000,
        stream=True,
    )

    # Collect and return the complete response content from the stream
    response_content = ""
    for chunk in stream:
        delta_content = chunk.choices[0].delta.content
        if delta_content:
            response_content += delta_content

    return response_content

prev_examples = []
for i in range(number_of_examples):
    print(f'Generating example {i + 1}')
    example = generate_example(prompt, prev_examples)
    prev_examples.append(example)

print("\nGenerated Examples:")
for example in prev_examples:
    print(example)


Generating example 1
Generating example 2
Generating example 3
Generating example 4
Generating example 5
Generating example 6
Generating example 7
Generating example 8
Generating example 9
Generating example 10
Generating example 11
Generating example 12
Generating example 13
Generating example 14
Generating example 15
Generating example 16
Generating example 17
Generating example 18
Generating example 19
Generating example 20

Generated Examples:
prompt
-----------
How do you solve the equation 2x + 3 = 7 using basic algebra?
-----------
response_goes_here
-----------
Here are the steps to solve the equation 2x + 3 = 7:

1. Subtraction: Subtract 3 from both sides of the equation to isolate the term with x on one side. This gives us 2x = 7 - 3, which simplifies to 2x = 4.

2. Division: Divide both sides of the equation by 2 to solve for x. This gives us x = 4 / 2, which simplifies to x = 2. 

So, the solution to the equation 2x + 3 = 7 is x = 2.
-----------
prompt
-----------
Can you e

In [97]:
import os
from openai import OpenAI


def generate_system_message(prompt, temperature=0.5):
    # Create chat completion with the latest client interface
    response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system",
                "content": (
                    "You will be given a high-level description of the model we are training, "
                    "and from that, you will generate a simple system prompt for that model to use. "
                    "Remember, you are not generating the system message for data generation -- "
                    "you are generating the system message to use for inference. A good format to follow is "
                    "`Given WHAT_THE_MODEL_SHOULD_DO.`\n\n"
                    "Make it as concise as possible. Include nothing but the system prompt in your response.\n\n"
                    "For example, never write: `\"SYSTEM_PROMPT_HERE`."
                )
            },
            {
                "role": "user",
                "content": prompt.strip(),
            }
        ],
        temperature=temperature,
        max_tokens=500,
    )

    # Extract and return the content of the response message
    message_content = response.choices[0].message.content  # Accessing the object attribute correctly
    return message_content


system_message = generate_system_message(prompt)

print(f'The system message is: `{system_message}`. Feel free to re-run this cell if you want a better result.')


The system message is: `Given a complex advanced mathematics question, provide a thorough, step-by-step explanation suitable for a student learning the topic.`. Feel free to re-run this cell if you want a better result.


In [98]:

import json
import pandas as pd

# Initialize lists to store prompts and responses
prompts = []
responses = []

# Parse out prompts and responses from examples
for example in prev_examples:
  try:
    split_example = example.split('-----------')
    prompts.append(split_example[1].strip())
    responses.append(split_example[3].strip())
  except:
    pass

# Create a DataFrame
df = pd.DataFrame({
    'prompt': prompts,
    'response': responses
})

# Remove duplicates
df = df.drop_duplicates()

print('There are ' + str(len(df)) + ' successfully-generated examples.')

# Initialize list to store training examples
training_examples = []

# Create training examples in the format required for GPT-3.5 fine-tuning
for index, row in df.iterrows():
    training_example = {
        "messages": [
            {"role": "system", "content": system_message.strip()},
            {"role": "user", "content": row['prompt']},
            {"role": "assistant", "content": row['response']}
        ]
    }
    training_examples.append(training_example)

# Save training examples to a .jsonl file
with open('training_examples.jsonl', 'w') as f:
    for example in training_examples:
        f.write(json.dumps(example) + '\n')

There are 20 successfully-generated examples.


In [99]:
from pathlib import Path
from openai import OpenAI


training_file = client.files.create(
  file=open("training_examples.jsonl", "rb"),
  purpose="fine-tune"
)


# Extract the file ID
training_file_id = training_file.id

In [100]:

client.fine_tuning.jobs.create(
  training_file= training_file_id ,
  model="gpt-4o-mini-2024-07-18"
)

FineTuningJob(id='ftjob-XLs759TDBkPkyCyywyMQzU3a', created_at=1730281986, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-fstdT2M6lEmwLIKxwMKeW2i1', result_files=[], seed=138091890, status='validating_files', trained_tokens=None, training_file='file-lri19muNjPgADVWb1QjXbXtp', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None)

In [92]:
client.fine_tuning.jobs.list(limit=10)

SyncCursorPage[FineTuningJob](data=[FineTuningJob(id='ftjob-nIHsSuxwCU2Ob71rzryFc3Ci', created_at=1730281512, error=Error(code='invalid_n_examples', message='Training file has 5 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_epochs='auto', batch_size='auto', learning_rate_multiplier='auto'), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-fstdT2M6lEmwLIKxwMKeW2i1', result_files=[], seed=813466721, status='failed', trained_tokens=None, training_file='file-yrZr4GjAwD1YLeuPMS1WMrwH', validation_file=None, estimated_finish=None, integrations=[], user_provided_suffix=None), FineTuningJob(id='ftjob-e7lxvnoe9RlPxwyPQQrGKXjk', created_at=1730281394, error=Error(code='invalid_n_examples', message='Training file has 5 example(s), but must have at least 10 examples', param='training_file'), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(n_ep

In [105]:
from openai import OpenAI

completion = client.chat.completions.create(
  model="ft:gpt-4o-mini-2024-07-18:van-heurck::ANzl11Kn",
  messages=[
    {
        "role": "system",
        "content": system_message,
      },
      {
          "role": "user",
          "content": "Can you explain how to find the eigenvalues and eigenvectors of a matrix?",
      }
  ]
)
print(completion.choices[0].message.content)

Sure, here's how to find the eigenvalues and eigenvectors of a matrix:

1. Identify the matrix: Determine the square matrix for which you want to find the eigenvalues and eigenvectors.

2. Write the characteristic equation: The characteristic equation of a matrix A is given by det(A - λI) = 0, where λ is a scalar (the eigenvalue), I is the identity matrix of the same size as A, and det denotes the determinant.

3. Solve for λ: The solutions to the characteristic equation det(A - λI) = 0 are the eigenvalues of the matrix A.

4. Find the eigenvectors: For each eigenvalue λ, the corresponding eigenvectors are the non-zero solutions to the equation (A - λI)v = 0, where v is the eigenvector.

5. Normalize the eigenvectors (optional): If you want the eigenvectors to be unit vectors, you can normalize them by dividing each eigenvector by its magnitude.

That's it! You have found the eigenvalues and eigenvectors of the matrix.
