## Install dependencies

In [None]:
!pip install -Uq openai

## Imports and initial setup

In [None]:
from google.colab import files
import pandas as pd
import openai
import io
import os
import re

## Environment variables

In [None]:
os.environ["OPENAI_API_KEY"] = "..."
openai.api_key = os.getenv("OPENAI_API_KEY")

## Transform uploaded file into a pandas dataframe

In [None]:
prompt_suffix = "\n\n###\n\n"
completion_prefix = " "
completion_separator = "\n"
completion_suffix = " END"

df = (
  pd
    .read_csv('labeled_asks.csv')
    .rename(columns={'MSG_TEXT': 'prompt', 'Answer Type Requested': 'completion' })
    .filter(['prompt', 'completion'], axis=1)
)

# add common suffix pattern to prompt
def add_prompt_suffix(ask):
  return ask + prompt_suffix

# add " " prefix, sort labels alphabetically and separate labels by "\n"
def parse_completion(completion):
  if "," in completion:
    completion = completion_separator.join(sorted(completion.split(",")))
  return completion_prefix + completion + completion_suffix

df['prompt'] = df['prompt'].apply(add_prompt_suffix)
df['completion'] = df['completion'].apply(parse_completion)

df.head(10)

## Save dataset as a jsonl file

In [None]:
df.to_json("answer_type_requested.jsonl", orient="records", lines=True)

## Prepare data with OpenAI data preparation tool

In [None]:
!openai tools fine_tunes.prepare_data -f answer_type_requested.jsonl -q

## Train the model

In [None]:
!openai api fine_tunes.create -t "answer_type_requested_prepared_train.jsonl" -m davinci # not calculating classification metrics due to multilabel not being supported

## Compare fine-tuned model with one shot classification

In [None]:
fine_tuned_model = 'davinci:ft-abcdef123456'

validation_df = pd.read_json('answer_type_requested_prepared_valid.jsonl', lines=True)

validation_df["response_labels"] = ""
validation_df["completion_labels"] = ""
validation_df["accuracy"] = ""

def extract_labels(completion):
  completion = re.sub(r"^\s?|\sEND?", "", completion)
  return completion.split("\n")

def calculate_accuracy(completion_labels, response_labels):
  accuracy = 0
  for label in completion_labels:
    if label in response_labels:
      accuracy += 1/len(completion_labels)
  return accuracy

for row in validation_df.itertuples():
  res_fine_tuned = openai.Completion.create(model=fine_tuned_model, prompt=row.prompt, stop=" END", temperature=0.3, logprobs=5)
  response_text = res_fine_tuned.choices[0].text
  completion_labels = extract_labels(row.completion)
  response_labels = extract_labels(response_text)
  accuracy = calculate_accuracy(completion_labels, response_labels)
  validation_df.loc[row.Index, 'completion_labels'] = ",".join(completion_labels)
  validation_df.loc[row.Index, 'response_labels'] = ",".join(response_labels)
  validation_df.loc[row.Index, 'accuracy'] = accuracy

  print(row.Index)

accuracy_score = validation_df['accuracy'].mean()
print("accuracy_score: " + str(accuracy_score*100) + "%")

validation_df.to_csv('comparison.csv')
files.download('comparison.csv')