## Install dependencies

In [None]:
!pip install -Uq openai

## Imports

In [None]:
from google.colab import files
import numpy as np
import pandas as pd
import openai
import io
import os

## Environment variables

In [None]:
os.environ["OPENAI_API_KEY"] = "..."
openai.api_key = os.getenv("OPENAI_API_KEY")


## Transform uploaded file into a pandas dataframe

In [None]:
prompt_suffix = "\n\n###\n\n"

def parse_prompt(prompt):
  return prompt + prompt_suffix

df_all = (
  pd
    .read_csv('labeled_asks.csv')
    .rename(columns={'MSG_TEXT': 'prompt', 'Answer Type Requested': 'completion' })
    .filter(['prompt', 'completion'], axis=1)
)

df_all['prompt'] = df_all['prompt'].apply(parse_prompt)
df_all.to_json("answer_type_requested.jsonl", orient="records", lines=True)

## Prepare data with OpenAI data preparation tool

In [None]:
!openai tools fine_tunes.prepare_data -f answer_type_requested.jsonl # n, Y, Y - split into training/validation samples, don't add whitespace in front of completions, we'll add it later

## Split multilabel training samples into separate rows and add completion prefix/suffix

In [None]:
df_train = pd.read_json('answer_type_requested_prepared_train.jsonl', lines=True)

print("df_train length: " + str(len(df_train)))

df_train_processed = pd.DataFrame({"prompt": [], "completion": []})

for row in df_train.itertuples():
  labels = row.completion.split(",")
  for label in labels:
    df_train_processed.loc[len(df_train_processed)] = {'prompt': row.prompt, 'completion': label}

print("df_train_processed length: " + str(len(df_train_processed)))

# Create label mappings to ensure all labels are single tokens

unique_labels = df_train_processed['completion'].unique()
id2label = {}
label2id = {}

for i, label in enumerate(unique_labels):
  id2label[str(i)] = label

for key, value in id2label.items():
  label2id[value] = key

print(id2label)
print(label2id)

# Parse completions in training samples

completion_prefix = " "

def parse_completion_train(completion):
  return completion_prefix + label2id[completion]

df_train_processed['completion'] = df_train_processed['completion'].apply(parse_completion_train)
df_train_processed.to_json("answer_type_requested_prepared_train_processed.jsonl", orient="records", lines=True)

## Train the model

In [None]:
!openai api fine_tunes.create -t "answer_type_requested_prepared_train_processed.jsonl" -m davinci # not calculating classification metrics due to multilabel not being supported

## Check the status of the fine-tuning job

In [None]:
!openai api fine_tunes.follow -i ft-abcdef123456

## Run the validation samples against fine-tuned model

In [None]:
fine_tuned_model = 'davinci:ft-abcdef123456'

df_valid = pd.read_json('answer_type_requested_prepared_valid.jsonl', lines=True)

# arbitrary threshold for transforming logprobs into multilabel completion
multilabel_threshold = 0.2

def logprob_to_prob(logprob):
  return np.exp(logprob)

def calculate_accuracy(completion, labels_from_prob):
  completion_labels = completion.split(",")
  prob_labels = labels_from_prob.split(",")
  accuracy = 0
  for label in completion_labels:
    if label in prob_labels:
      accuracy += 1/len(completion_labels)
  return accuracy

df_valid["response_id"] = ""
df_valid["response_label"] = ""
df_valid["labels_from_prob"] = ""
df_valid["accuracy_from_prob"] = ""

for label in unique_labels:
  df_valid[label] = 0

for row in df_valid.itertuples():
  res_fine_tuned = openai.Completion.create(model=fine_tuned_model, prompt=row.prompt, max_tokens=1, temperature=0, logprobs=5)
  response_id = res_fine_tuned.choices[0].text.replace(" ", "")
  response_label = id2label[response_id]
  df_valid.loc[row.Index, 'response_id'] = response_id
  df_valid.loc[row.Index, 'response_label'] = response_label

  labels=[]

  top_logprob = res_fine_tuned.choices[0].logprobs.top_logprobs[0]
  for key, value in top_logprob.items():
    stripped_key = key.replace(" ", "")
    if stripped_key in id2label:
      label = id2label[stripped_key]
      prob = logprob_to_prob(value)
      df_valid.loc[row.Index, label] = prob
      if prob > multilabel_threshold:
        labels.append(label)

  # if all probabilities are below threshold, pick the completion response
  if len(labels) == 0:
    labels.append(response_label)

  labels_from_prob = ",".join(labels)
  df_valid.loc[row.Index, 'labels_from_prob'] = labels_from_prob

  accuracy_from_prob = calculate_accuracy(row.completion, labels_from_prob)
  df_valid.loc[row.Index, 'accuracy_from_prob'] = accuracy_from_prob

  print(row.Index)

accuracy_score = df_valid['accuracy_from_prob'].mean()
print("accuracy_score: " + str(accuracy_score*100) + "%")

df_valid.to_csv('comparison.csv')
files.download('comparison.csv')