<div>

</div>

# Finetuning OpenAI LLM using Website Data from Webᵀ Crawl

Finetune OpenAI GPT-3.5 using Python and data from Webᵀ Crawl.

In [None]:
!pip install openai jsonlines

In [None]:
import os
from time import sleep
import zipfile

import jsonlines
import openai

openai.api_key = os.getenv('OPENAI_API_KEY') or "OPENAI_API_KEY"

os.listdir()

In [None]:
download_id = "48b6bb1b-cf85-4611-9dkc-d9e9ce3eb19f"
with zipfile.ZipFile(f"{download_id}.zip", 'r') as zip_ref:
    zip_ref.extractall(download_id)

In [None]:
os.listdir(download_id)

In [None]:
jsonl_files = [f for f in os.listdir(download_id) if f.endswith('.jsonl')]
train_jsonl_file = os.path.join(download_id, "train.jsonl")

# System Message, if you have one
system_message = {
    "role": "system",
    "content": "You are the Pokedex. You only answer questions related to Pokemon."
}

# System Message, if you don't have one
# system_message = None

# write everything to train.jsonl
with jsonlines.open(train_jsonl_file, mode='w') as writer:
    for jsonl_file in jsonl_files:
        file_path = os.path.join(download_id, jsonl_file)
        with jsonlines.open(file_path) as reader:
            for line in reader:
                if system_message:
                    line['messages'].insert(0, system_message)
                writer.write(line)

In [None]:
open(train_jsonl_file, 'r').read().split('\n')[:4]

['{"messages": [{"role": "user", "content": "What is Scovillain?"}, {"role": "assistant", "content": "Scovillain is a Grass/Fire type Pokémon introduced in Generation 9. It is known as the Spicy Pepper Pokémon."}]}',
 '{"messages": [{"role": "user", "content": "What are the abilities of Scovillain?"}, {"role": "assistant", "content": "Scovillain has the abilities Chlorophyll, Insomnia, and Moody (hidden ability)."}]}',
 '{"messages": [{"role": "user", "content": "What is the height and weight of Scovillain?"}, {"role": "assistant", "content": "Scovillain has a height of 0.9 m (2′11″) and a weight of 15.0 kg (33.1 lbs)."}]}',
 '{"messages": [{"role": "user", "content": "What are the base stats of Scovillain?"}, {"role": "assistant", "content": "Scovillain has a base HP of 65, Attack of 108, Defense of 65, Special Attack of 108, Special Defense of 65, and Speed of 75. Its total base stats are 486."}]}']

In [None]:
res = openai.File.create(
    file=open(train_jsonl_file, "r"),
    purpose='fine-tune'
)
file_id = res["id"]
file_id

'file-T81NMys3pfjVElwrzSG0M2GG'

In [None]:
res = openai.FineTuningJob.create(
    training_file=file_id,
    model="gpt-3.5-turbo"
)

In [None]:
job_id = res["id"]
job_id

In [None]:
while True:
    res = openai.FineTuningJob.retrieve(job_id)
    if res["finished_at"] is not None:
        break
    else:
        print(".", end="")
        sleep(100)

In [None]:
ft_model = res["fine_tuned_model"]
ft_model

'ft:gpt-3.5-turbo-0613:personal::7vvqx6cC'

## Let's Test Our Model

In [None]:
query = """
What is Scovillain?
"""
response = openai.ChatCompletion.create(
    model=ft_model,
    temperature=0,
    messages=[{"role": "user", "content": query}]
)
response['choices'][0]['message']['content']

'Scovillain is a Grass/Poison type Pokémon introduced in Generation 9. It is known as the Scovillain Pokémon.'

In [None]:
query = """
What does Scovillian evolve into?
"""
response = openai.ChatCompletion.create(
    model=ft_model,
    temperature=0,
    messages=[{"role": "user", "content": query}]
)
response['choices'][0]['message']['content']

'Scovillian evolves into Scorchill.'

### Now, let's compare it with the original model.

In [None]:
x = """
What is Scovillain?
"""
response = openai.ChatCompletion.create(
    model='gpt-3.5-turbo-0613',
    temperature=0,
    messages=[{"role": "user", "content": x}]
)
response['choices'][0]['message']['content']

'Scovillain is a term coined to describe someone who enjoys and seeks out extremely spicy or hot foods. It is a combination of the words "Scoville," which is a measurement of the heat or spiciness of chili peppers, and "villain," which refers to someone who enjoys or takes pleasure in something that may be considered extreme or challenging. A Scovillain is someone who actively seeks out and enjoys the intense heat and spiciness of foods, often pushing their tolerance levels to the limit.'