## Install dependencies

In [26]:
!pip install -Uq openai

## Imports

In [23]:
from google.colab import files
import pandas as pd
import openai
import io
import os
import time

## Environment variables

In [None]:
os.environ["OPENAI_API_KEY"] = "..."
openai.api_key = os.getenv("OPENAI_API_KEY")

## Transform uploaded file into a pandas dataframe

In [None]:
df = pd.read_csv('answer_type_requested.csv')
df2 = df.filter(['prompt','completion'], axis=1)
df2.head(5)

## Save dataset as a jsonl file

In [None]:
df2.to_json("answer_type_requested.jsonl", orient="records", lines=True)

## Prepare data with OpenAI data preparation tool

In [None]:
!openai tools fine_tunes.prepare_data -f answer_type_requested.jsonl

## Train the model

In [None]:
!openai api fine_tunes.create -t "answer_type_requested_prepared_train.jsonl" -v "answer_type_requested_prepared_valid.jsonl" --compute_classification_metrics --classification_n_classes 3 -m davinci # 3 classes for computing metrics as there were no examples for the 4th class "unknown / other"

## Review training results

In [None]:
!openai api fine_tunes.results -i ft-abcdef123456 > result.csv

In [None]:
results = pd.read_csv('result.csv')
results[results['classification/accuracy'].notnull()].tail(1)
results[results['classification/accuracy'].notnull()]['classification/accuracy'].plot()

## Compare fine-tuned model with one shot classification

In [None]:
fine_tuned_model_gpt3_davinci = 'davinci:ft-abcdef123456'
one_shot_model_gpt3 = 'text-davinci-003'

one_shot_prompt = '''You are a data expert working for a company that supports startup founders.
You are analysing all asks posted on their online forum and classifying them into one of three categories that define what type of answer was requested.
The three categories are: connection, favour, information.

Here are some examples:

ask: I'm looking to speak with someone that could advise us on the way to structure our next fundraising round. Any introductions would be appreciated!
category: connection

ask: We just launched on Product Hunt - would appreciate if you could upvote us!
category: favour

ask: What kind of slides would you include in a pre-seed pitch deck? Any great examples you could share?
category: information

ask: ASK_PROMPT
category:'''

validation_df = pd.read_json('answer_type_requested_prepared_valid.jsonl', lines=True)

fine_tuned_gpt3_davinci_results = []
one_shot_gpt3_results = []

for row in validation_df.itertuples():
  res_fine_tuned_gpt3_davinci = openai.Completion.create(model=fine_tuned_model_gpt3_davinci, prompt=row.prompt, max_tokens=1, temperature=0, logprobs=5)
  fine_tuned_gpt3_davinci_results.append(res_fine_tuned_gpt3_davinci.choices[0].text)
  modified_one_shot_prompt = one_shot_prompt.replace('ASK_PROMPT', row.prompt)
  modified_one_shot_prompt = modified_one_shot_prompt.replace('\\nAnswer Type Requested:', '')
  res_one_shot_gpt3 = openai.Completion.create(model=one_shot_model_gpt3, prompt=modified_one_shot_prompt, max_tokens=1, temperature=0)
  one_shot_gpt3_results.append(res_one_shot_gpt3.choices[0].text)
  print(row.Index)

validation_df['fine_tuned_gpt3_davinci'] = fine_tuned_gpt3_davinci_results
validation_df['one_shot_gpt3'] = one_shot_gpt3_results

validation_df.to_csv('comparison.csv')
files.download('comparison.csv')