In [None]:
pip install openai

In [3]:
import numpy as np
import pandas as pd
import os
import sys
from sentence_transformers import SentenceTransformer, util
import torch
import time
import json


In [6]:
import random
from google.colab import files

input_file = 'hugging_face_train.csv'
train_output_file = '3.5_train.csv'
test_output_file = '3.5_test.csv'

with open(input_file, 'r') as infile:
    lines = infile.readlines()

train_lines = random.sample(lines, 50000)

remaining_lines = list(set(lines) - set(train_lines))

test_lines = random.sample(remaining_lines, 2500)

with open(train_output_file, 'w') as train_file:
    train_file.writelines(train_lines)

with open(test_output_file, 'w') as test_file:
    test_file.writelines(test_lines)

print(f"Random 50,000 lines written to {train_output_file}")
print(f"Random 2,500 lines written to {test_output_file}")

# download
files.download(train_output_file)
files.download(test_output_file)



Random 50,000 lines written to 3.5_train.csv
Random 2,500 lines written to 3.5_test.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [1]:
!pip install openai

import openai



In [4]:
!pip install --upgrade openai



In [6]:
openai.api_key = '' # can't leak

In [None]:
# Begin the training

def upload_file(file_path):
    with open(file_path, 'rb') as f:
        response = openai.File.create(
            file=f,
            purpose='fine-tune'
        )
    return response['id']

dataset_path = 'gpt_3.5_train.txt'
file_id = upload_file(dataset_path)
print(f"Uploaded file ID: {file_id}")

def create_fine_tune_job(file_id, model='gpt-3.5-turbo'):
    response = openai.FineTune.create(
        training_file=file_id,
        model=model
    )
    return response

fine_tune_response = create_fine_tune_job(file_id)
print(f"Fine-tune job created: {fine_tune_response['id']}")

import time

def check_fine_tune_status(job_id):
    response = openai.FineTune.retrieve(id=job_id)
    status = response['status']
    return status

job_id = fine_tune_response['id']
print("Monitoring fine-tuning job...")
while True:
    status = check_fine_tune_status(job_id)
    print(f"Job status: {status}")
    if status == 'succeeded':
        print("Fine-tuning job completed successfully!")
        break
    elif status == 'failed':
        print("Fine-tuning job failed.")
        break
    time.sleep(60)

fine_tuned_model = openai.FineTune.retrieve(id=job_id)['fine_tuned_model']
print(f"Fine-tuned model name: {fine_tuned_model}")

with open('fine_tuned_model_name.txt', 'w') as f:
    f.write(fine_tuned_model)

print("done")





In [13]:
!pip install --upgrade openai


Collecting openai
  Using cached openai-1.30.4-py3-none-any.whl (320 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 0.28.0
    Uninstalling openai-0.28.0:
      Successfully uninstalled openai-0.28.0
Successfully installed openai-1.30.4


In [2]:
!pip install openai==0.28
import openai



In [10]:
import time

In [3]:
# had some formatting issues so trying jsonl

import json

input_file = 'gpt_3.5_train.txt'
output_file = 'formatted_dataset.jsonl'

with open(input_file, 'r') as infile:
    lines = infile.readlines()

json_lines = []
for line in lines:

    parts = line.strip().split(', ')
    if len(parts) == 3:
        clue, length, answer = parts
        # Create a JSON object
        json_obj = {
            "prompt": f"{clue}, {length}",
            "completion": answer.strip()
        }
        json_lines.append(json_obj)

with open(output_file, 'w') as outfile:
    for json_obj in json_lines:
        outfile.write(json.dumps(json_obj) + '\n')

print(f"Converted {len(lines)} lines to JSONL format and saved to {output_file}")

from google.colab import files
files.download(output_file)


Converted 50000 lines to JSONL format and saved to formatted_dataset.jsonl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [7]:
# check?
import json

output_file = 'formatted_dataset.jsonl'
with open(output_file, 'r') as infile:
    for line in infile:
        try:
            json_obj = json.loads(line)
            if 'prompt' not in json_obj or 'completion' not in json_obj:
                print(f"Invalid format: {line}")
        except json.JSONDecodeError as e:
            print(f"JSON decode error: {line}")
print("All good")

All good


In [1]:
!pip install openai
import openai
import time
import pandas as pd
import numpy as np




In [2]:
pip install openai==0.28



In [10]:
import json

input_file = 'gpt_3.5_train.txt'
output_file = 'chat_formatted_dataset.jsonl'

with open(input_file, 'r') as infile:
    lines = infile.readlines()

json_lines = []
for line in lines:
    parts = line.strip().split(', ')
    if len(parts) == 3:
        clue, length, answer = parts
        json_obj = {
            "messages": [
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{clue}, {length}"},
                {"role": "assistant", "content": answer.strip()}
            ]
        }
        json_lines.append(json_obj)

with open(output_file, 'w') as outfile:
    for json_obj in json_lines:
        outfile.write(json.dumps(json_obj) + '\n')

print(f"Converted {len(lines)} lines to chat format and saved to {output_file}")

from google.colab import files
files.download(output_file)


Converted 50000 lines to chat format and saved to chat_formatted_dataset.jsonl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
openai.api_key = ''

In [6]:
# get lines
import json
import random

input_file = 'chat_formatted_dataset.jsonl'
output_file = 'chat_formatted_dataset_2000.jsonl'

with open(input_file, 'r') as infile:
    lines = infile.readlines()

if len(lines) < 2000:
    raise ValueError("The input file contains fewer than 2000 lines.")

selected_lines = random.sample(lines, 2000)

with open(output_file, 'w') as outfile:
    for line in selected_lines:
        outfile.write(line)

print(f"Selected 2000 lines and saved to {output_file}")

from google.colab import files
files.download(output_file)


Selected 2000 lines and saved to chat_formatted_dataset_2000.jsonl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
train_output_file = 'chat_formatted_dataset_2000.jsonl'

def upload_file(file_path):
    response = openai.File.create(
        file=open(file_path, "rb"),
        purpose='fine-tune'
    )
    return response['id']

train_file_id = upload_file(train_output_file)
print(f"Uploaded train file ID: {train_file_id}")

def create_fine_tune_job(training_file_id, model='gpt-3.5-turbo'):
    response = openai.FineTune.create(
        training_file=training_file_id,
        model=model
    )
    return response

# Use the correct endpoint for creating fine-tune
fine_tune_response = openai.FineTuningJob.create(training_file=train_file_id, model='gpt-3.5-turbo')
print(f"Fine-tune job created: {fine_tune_response['id']}")

def get_fine_tune_job_details(job_id):
    response = openai.FineTuningJob.retrieve(id=job_id)
    return response

def check_fine_tune_status(job_id):
    response = openai.FineTuningJob.retrieve(id=job_id)
    status = response['status']
    return status

job_id = fine_tune_response['id']
print(job_id)

print("Monitoring fine-tuning job...")
while True:
    status = check_fine_tune_status(job_id)
    print(f"Job status: {status}")
    if status == 'succeeded':
        print("Fine-tuning job completed successfully!")
        break
    elif status == 'failed':
        print("Fine-tuning job failed.")
        break
    time.sleep(45)

fine_tuned_model = openai.FineTuningJob.retrieve(id=job_id)['fine_tuned_model']
print(f"Fine-tuned model name: {fine_tuned_model}")

with open('fine_tuned_model_name.txt', 'w') as f:
    f.write(fine_tuned_model)

Uploaded train file ID: file-crnSMWCLbtyChDJJW1Mha3uq
Fine-tune job created: ftjob-z57JP0WeZUuwIkTShHCR5qWF
ftjob-z57JP0WeZUuwIkTShHCR5qWF
Monitoring fine-tuning job...
Job status: validating_files
Job status: validating_files
Job status: validating_files
Job status: validating_files
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: running
Job status: run

In [1]:
!pip install openai
import openai

openai.api_key = ''



In [2]:
pip install openai==0.28



In [None]:
import openai

with open('fine_tuned_model_name.txt', 'r') as f:
    fine_tuned_model = f.read().strip()

In [25]:
def generate_completion(prompt, model):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=50
    )
    return response.choices[0].message['content'].strip()


def generate_multiple_completions(prompt, model, num_completions=5, max_tokens=50):
    responses = openai.ChatCompletion.create(
        model=model,
        prompt=prompt,
        max_tokens=max_tokens,
        n=num_completions,
        stop=None,
        temperature=0.7
    )
    completions = [response['text'].strip() for response in responses['choices']]
    return completions

prompt = "Fennel or sweet cicely"
completions = generate_multiple_completions(prompt, fine_tuned_model, num_completions=5)
for i, completion in enumerate(completions, 1):
    print(f"Completion {i}: {completion}")

print(f"Using fine-tuned model: {fine_tuned_model}")

prompt = "Some crumbly blocks, 4"
completion = generate_completion(prompt, fine_tuned_model)
print(f"Prompt: {prompt}")
print(f"Completion: {completion}")


InvalidRequestError: Missing required parameter: 'messages'.

In [24]:
# use this function because it works

import openai

def generate_unique_completions(prompt, model, num_completions=5, max_tokens=50):
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        n=num_completions * 2, 
        stop=None,
        temperature=0.9,  
        top_p=0.9 
    )
    completions = set()
    for choice in response['choices']:
        completions.add(choice['message']['content'].strip())
        if len(completions) >= num_completions:
            break

    return list(completions)[:num_completions]  

prompt = "Tries for a role, 5, R___S"
completions = generate_unique_completions(prompt, fine_tuned_model, num_completions=5)
for i, completion in enumerate(completions, 1):
    print(f"Completion {i}: {completion}")


Completion 1: READS:^{
Completion 2: READS|--------------------------------------------------------------------------
Completion 3: READS
Completion 4: AUDITS
Completion 5: READSF


In [None]:
!pip install openai==0.28
import openai

In [6]:

openai.api_key = ''


def upload_file(file_path):
    with open(file_path, 'rb') as f:
        response = openai.File.create(
            file=f,
            purpose='fine-tune'
        )
    return response['id']

dataset_path = 'gpt_3.5_train.txt'
file_id = upload_file(dataset_path)
print(f"Uploaded file ID: {file_id}")

def create_fine_tune_job(file_id, model='gpt-3.5-turbo'):
    response = openai.FineTune.create(
        training_file=file_id,
        model=model
    )
    return response

fine_tune_response = create_fine_tune_job(train_file_id)
print(f"Fine-tune job created: {fine_tune_response['id']}")

def check_fine_tune_status(job_id):
    response = openai.FineTune.retrieve(id=job_id)
    status = response['status']
    return status

job_id = fine_tune_response['id']

print("Monitoring fine-tuning job...")
while True:
    status = check_fine_tune_status(job_id)
    print(f"Job status: {status}")
    if status == 'succeeded':
        print("Fine-tuning job completed successfully!")
        break
    elif status == 'failed':
        print("Fine-tuning job failed.")
        break
    time.sleep(60) 

fine_tuned_model = openai.FineTune.retrieve(id=job_id)['fine_tuned_model']
print(f"Fine-tuned model name: {fine_tuned_model}")

with open('fine_tuned_model_name.txt', 'w') as f:
    f.write(fine_tuned_model)

Uploaded file ID: file-tXI1N4xwQgDm6wDJZAE6TQie


NameError: name 'train_file_id' is not defined

In [None]:
# Potential Load???? Figure this out?

#edit: this works
!pip install openai

import openai

openai.api_key = ''

with open('fine_tuned_model_name.txt', 'r') as f:
    fine_tuned_model = f.read().strip()

def generate_completion(prompt, model):
    response = openai.Completion.create(
        model=model,
        prompt=prompt,
        max_tokens=50
    )
    return response.choices[0].text.strip()

print(f"Using fine-tuned model: {fine_tuned_model}")

prompt = "Fennel or sweet cicely"
completion = generate_completion(prompt, fine_tuned_model)
print(f"Prompt: {prompt}")
print(f"Completion: {completion}")


In [None]:
def generate_multiple_completions(prompt, model, num_completions=5, max_tokens=50):
    responses = openai.Completion.create(
        model=model,
        prompt=prompt,
        max_tokens=max_tokens,
        n=num_completions,
        stop=None,
        temperature=0.7
    )
    completions = [response['text'].strip() for response in responses['choices']]
    return completions

prompt = "Fennel or sweet cicely"
completions = generate_multiple_completions(prompt, fine_tuned_model, num_completions=5)
for i, completion in enumerate(completions, 1):
    print(f"Completion {i}: {completion}")



In [7]:
import random
from google.colab import files

input_file = 'reformatted.txt'
train_output_file = 'gpt_3.5_train.txt'
test_output_file = 'gpt_3.5_test.txt'

with open(input_file, 'r') as infile:
    lines = infile.readlines()

train_lines = random.sample(lines, 50000)

remaining_lines = list(set(lines) - set(train_lines))

test_lines = random.sample(remaining_lines, 2500)

with open(train_output_file, 'w') as train_file:
    train_file.writelines(train_lines)

with open(test_output_file, 'w') as test_file:
    test_file.writelines(test_lines)

print(f"Random 50,000 lines written to {train_output_file}")
print(f"Random 2,500 lines written to {test_output_file}")

files.download(train_output_file)
files.download(test_output_file)

Random 50,000 lines written to gpt_3.5_train.txt
Random 2,500 lines written to gpt_3.5_test.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>