In [None]:
%pip install openai python-dotenv

In [2]:
import os, openai

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

-- DATASET PREPARATION
For the gpt-3.5-turbo model, OpenAI requires a jsonl file as a training file in a certain format called (the conversational chat format = CCF), the systemRole defines the context on which the model would behave.

In [None]:
# STEP-0: converting a csv database to jsonl on the CCF format
import csv, jsonlines

# Convert the csv file named "filename" (without .csv) and returns the same filename for jsonl file.
def csv2jsonl_ccf(filename, systemRole):
    with open(filename+".csv", "r", encoding="utf-8") as file:
        f_in = csv.reader(file)
        next(f_in) # skip the header row

        with jsonlines.open(filename+".jsonl","w") as f_out:
            system = {
                    "role": "system",
                    "content": systemRole
                }
            for (qst, rep) in f_in:
                user = {
                    "role": "user",
                    "content": qst
                }
                assistant = {
                    "role": "assistant",
                    "content": rep

                }
                entry = {
                    "messages": [system, user, assistant]
                }
                f_out.write(entry)

# Testing the conversion
csv2jsonl_ccf ("../data/devfest_qa_dataset","You are a kind helpful assitant in DevFest GDG event")

In [None]:
# STEP-1: uploading the training file
data = openai.File.create(
    file=open("../data/devfest_qa_dataset.jsonl", "rb"),
    purpose='fine-tune'
)
print(data.id) # ex: file-yp2g7yTbWUwa9YVCJbAHtTEX

-- MODEL CREATION
Based on the jsonl dataset created and uploaded above in the OpenAI Cloud storage, we will create the fine-tuned model trained specifically on our dataset. The process can take some time and the success response will be sent on the api-key account owner, but in the meanwhile, we can still check the process evolution (status variable) 

In [3]:
# STEP-2.1: creating the model
resp = openai.FineTuningJob.create(training_file=data.id, model="gpt-3.5-turbo")
print(resp)  
#ex1 fine-tune job: ftjob-YmiMDwb5JJ7LUbdQRUHKoe32

NameError: name 'data' is not defined

In [4]:
# STEP-2.2: Checking The Process Evolution

# List 10 fine-tuning jobs
jobs = openai.FineTuningJob.list(limit=10)
# print (jobs) #--> get the fine-tuning job id (data.id)
# print(jobs["data"][0]["id"]) #--> get the fine-tuning job id (data.id)

# Retrieve the state of a fine-tune job
status = jobs["data"][0]["status"]
model_name = jobs["data"][0]["fine_tuned_model"]

print(model_name)

ft:gpt-3.5-turbo-0613:personal::85iF2qDK


-- USING THE FINE-TUNED MODEL
Let's create now a Q/A assistant with the resulting model

In [None]:
# gives the response of a given prompt while taking into account the context of the conversation
def chat_with_mygpt(prompt, context):
    try:
        systemRole = "You are a kind helpful assitant in DevFest GDG event"
        # Call the OpenAI API using the /v1/chat/completions endpoint
        completion = openai.ChatCompletion.create(
            model=model_name,  # to be changed when needed
            messages=[
                {"role": "system", "content": systemRole},
                {"role": "user", "content": prompt},
                {"role": "assistant", "content": context}
            ]
        )
        # Extract the assistant's reply from the response
        reply = completion.choices[0].message.content
        return reply
    except Exception as e:
        return str(e)
    
# Main usage :    
print("Welcome to our DevFestGPT!")
context = ""

while True:
    user_input = input("You: ")

    if user_input.lower() in ('exit', 'quit'):
        print("Goodbye!")
        break

    # Call the chat_with_gpt function to get the assistant's reply
    assistant_reply = chat_with_mygpt(user_input, context)

    print("DevFestGPT:", assistant_reply)

    context = assistant_reply