In [None]:
from datasets import load_dataset, DatasetDict
import json

ds = load_dataset("Kaludi/Customer-Support-Responses")

# splitting data into train and test
split_dataset = ds['train'].train_test_split(test_size=24, shuffle=False)
split_dataset['train'] = split_dataset['train']
split_dataset['eval'] = split_dataset['test']
del split_dataset['test']

# DatasetDict with the splits
new_dataset = DatasetDict({
    'train': split_dataset['train'],
    'eval': split_dataset['eval']
})

In [None]:
# Convert to chat completion. This is needed to finetune models like GPT3.5+
def convert_to_chat_completion(row):
    return {
        "messages": [
            {"role": "system", "content": "You're a Customer Support Bot"},
            {"role": "user", "content": row['query']},
            {"role": "assistant", "content": row['response']}
        ]
    }

# Save the converted text to jsonl file
def save_as_jsonl(dataset, file_path):
    with open(file_path, 'w') as f:
        for row in dataset:
            formatted_row = convert_to_chat_completion(row)
            f.write(json.dumps(formatted_row) + '\n')

# Convert and save the datasets as train and eval
save_as_jsonl(new_dataset['train'], 'train.jsonl')
save_as_jsonl(new_dataset['eval'], 'eval.jsonl')

In [None]:
from openai import OpenAI
import os
from dotenv import load_dotenv

# Load open ai environment variables
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# Connect to openai client
client = OpenAI(api_key=openai_api_key)

In [None]:
# upload train file
client.files.create(
  file=open("train.jsonl", "rb"),
  purpose="fine-tune"
)

In [None]:
# upload test file
client.files.create(
  file=open("eval.jsonl", "rb"),
  purpose="fine-tune"
)

In [None]:
# setup finetuning with hyperparamete
train_file_id = "file-RN2COkigFkNUZIRmnPVsbJHd"
eval_file_id = "file-rEzAq1vatTRAA2XnMmGKyQul"
client.fine_tuning.jobs.create(
  training_file=train_file_id,  
  validation_file=eval_file_id,
  model="gpt-3.5-turbo",
  hyperparameters={
    "batch_size": 4,
    "learning_rate_multiplier": 0.1,
    "n_epochs": 50
  }
)

In [None]:
# store finetuned model 
ft_model = client.fine_tuning.jobs.retrieve("ftjob-UuEgkubgXVNJDjmL494oIXDr").fine_tuned_model

# Evaluate

In [None]:
# import eval library ragas to test model performance
from ragas import evaluate
from ragas.metrics import faithfulness,answer_relevancy,answer_correctness,answer_similarity
import pandas as pd
from datasets import Dataset 
import os
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness
from langchain_openai.chat_models import ChatOpenAI

ft_model_ = ChatOpenAI(model_name=ft_model)


# Transform dataset to include required columns
eval_dataset = [
    {
        'contexts': '', 
        'question': item['query'],
        'answer': item['response'],
        'ground_truth': item['response'],
    }
    for item in new_dataset['eval']
]
eval_df = pd.DataFrame(eval_dataset)

# Create the required format of dataset.dict to pass to the ragas eval function
eval_dataset_2 = {
    'question': eval_df['question'].tolist(),
    'answer': eval_df['answer'].tolist(),
    'contexts': eval_df['contexts'].apply(lambda x: [x] if isinstance(x, str) else x).tolist(),  # Ensure contexts is a list of lists
    'ground_truth': eval_df['ground_truth'].tolist()
}
eval_dataset_3 = Dataset.from_dict(eval_dataset_2)

In [None]:
# evaluate using ragas for faithfulness, answer_relevancy and answer_correctness
import nest_asyncio
import asyncio
nest_asyncio.apply()

result = evaluate(
    eval_dataset_3,
    metrics=[faithfulness,answer_relevancy,answer_correctness],
    llm=ft_model_ 
)

print(result)