## Setup

In [4]:
FINE_TUNE_ID = "ft-IMjooJPCPFHKUSONoaLTLZvh"
OPENAI_API_KEY_FILE = "MyDrive/colab/XAI/OPENAI_API_KEY_pa.txt"
DRIVE_MOUNT_PATH = '/drive/'
PROCESSED_DATA_FOLDER = "MyDrive/public/TUM XAI/Classifiers/GPT3_classifier/data/"

from google.colab import drive
drive.mount(DRIVE_MOUNT_PATH)

Drive already mounted at /drive/; to attempt to forcibly remount, call drive.mount("/drive/", force_remount=True).


In [5]:
!pip install --upgrade openai transformers shap lime

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting openai
  Using cached openai-0.26.4.tar.gz (55 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting transformers
  Downloading transformers-4.26.0-py3-none-any.whl (6.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m78.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting shap
  Downloading shap-0.41.0-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (575 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m575.9/575.9 KB[0m [31m61.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 KB[0m [31m32.6 MB/s[0m eta [36m0

In [6]:
import os
import random
import sqlite3
import requests
import time
from shutil import copytree, copyfile
from pathlib import Path
from typing import Optional
from datetime import datetime
from io import BytesIO

import numpy as np
import pandas as pd
import tensorflow as tf
import openai
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import GPT2TokenizerFast

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")

SEED: int = 42
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)
tf.compat.v1.set_random_seed(SEED)

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [7]:
if os.path.exists('OPENAI_API_KEY.txt'): os.remove('OPENAI_API_KEY.txt')
copyfile(DRIVE_MOUNT_PATH + OPENAI_API_KEY_FILE, 'OPENAI_API_KEY.txt')
openai.api_key_path = 'OPENAI_API_KEY.txt'
openai_api_key = None
with open('OPENAI_API_KEY.txt', 'r') as f:
  openai_api_key = f.read()

In [8]:
if not os.path.exists('data'):
  os.mkdir('data')
  
copyfile(DRIVE_MOUNT_PATH + PROCESSED_DATA_FOLDER + "test.csv", 'data/test.csv')
df_test = pd.read_csv('data/test.csv')
df_test["source"].value_counts()

real         10763
chatgpt       3410
scigen        3366
gpt2          3100
galactica     2083
Name: source, dtype: int64

In [9]:
def prepare_data(df: pd.DataFrame) -> pd.DataFrame:
  # Add completion
  if "completion" not in df.columns:
    df["completion"] = " 1"
    df.loc[df["source"] == "real", "completion"] = " 0"

  # Create prompt
  if "prompt" not in df.columns:
    df["prompt"] = "Title:\n" + df["title"] \
      + "\n\nAbstract:\n" + df["abstract"] \
      + "\n\nIntroduction:\n" + df["introduction"] \
      + "\n\nConclusion:\n" + df["conclusion"] \
      + "\n\n###\n\n"

  return df[["prompt", "completion"]]

df_test_prepared = prepare_data(df_test)
df_test_prepared["completion"].value_counts()

 1    11959
 0    10763
Name: completion, dtype: int64

In [10]:
response = openai.FineTune.retrieve(id=FINE_TUNE_ID)
model_id = response["fine_tuned_model"]
model_id

'ada:ft-personal-2023-01-25-14-14-44'

## Testing

In [None]:
TEST_SIZE = 1000

random_indicies = np.random.choice(df_test_prepared.index.values, TEST_SIZE)
df_test_selected = df_test_prepared.iloc[random_indicies]
df_test_selected

In [None]:
df_test_selected['tokens'] = df_test_selected.apply(lambda row: len(tokenizer(row['prompt'])['input_ids']), axis=1)
print("The following are too long:")
df_test_selected.loc[df_test_selected['tokens'] > 2048]

In [None]:
df_test_selected = df_test_selected.drop(df_test_selected[df_test_selected['tokens'] > 2048].index)
df_test_selected

In [None]:
response = openai.Completion.create(model=model_id, prompt=df_test_selected['prompt'].tolist(), max_tokens=1, temperature=0, logprobs=None)

In [None]:
results = pd.DataFrame(response["choices"])
assert len(results) == len(df_test_selected) 
results = results.set_index('index')
results = results.drop(labels=['logprobs', 'finish_reason'], axis=1)
results = results.rename({'text': 'predicted'}, axis=1)
results['correct'] = df_test_selected["completion"].values
results['prompt'] = df_test_selected["prompt"].values
results

In [None]:
results.loc[results['predicted'] != results['correct']]

## Explainability