# GPT Knowledge Tracing.ipynb

This file contains the code for the inference using the GPT-3.5 API in this project.

In [18]:
# Import libraries and helper codes

import os
import json
import random
from time import sleep

import pandas
from sklearn.metrics import classification_report, balanced_accuracy_score

from openai import AzureOpenAI, OpenAI
import tiktoken

from gpt_helpers import *

should_include_KCs = True
should_include_KC_texts = True

In [2]:
# Prepare the data and topics (= KC) for our inference

generate_new_file('dataverse_files/2_DBE_KT22_datafiles_100102_csv/KCs.csv',
                  'dataverse_files/2_DBE_KT22_datafiles_100102_csv/Question_KC_Relationships.csv',
                  'dataverse_files/2_DBE_KT22_datafiles_100102_csv/Generated_KC_Questions.csv')

In [5]:
# Continue preparing the data

data = read_data('', # change with the test data file you want to use for inference (e.g. the downsampled data)
                    'dataverse_files/2_DBE_KT22_datafiles_100102_csv/Questions.csv',
                    'dataverse_files/2_DBE_KT22_datafiles_100102_csv/Generated_KC_Questions.csv',
                    N = -1)
data = remove_padding(data) # kept for compatibility with the original code, which added padding in case of unequal-length subsequences. (The current code keeps all subsequences of the same length.)

In [6]:
# API information for the OpenAI API. Replace with your own information (kept empty for security reasons)

api_info = {
    'api_key': "",
    'api_version': "",
    'azure_endpoint': "",
    'model': ""
}

In [None]:
# Generate the prompts to be used for GPT-3.5 inference
prompts, gts = generate_prompts(data, incl_id = False, incl_q = False, incl_kc = should_include_KCs, incl_diff = True)
prompts_sample, gts_sample = randomly_sample_prompts(prompts, gts, N = len(prompts), seed = 0, max_token_len = 4096) # in the original code, we randomly "sampled" prompts, but for final run, this function effectively only shuffles the prompts and keeps those that fit within the maximum context length of our model

# Generate predictions from the GPT-3.5 model
preds = predict(prompts_sample, gts_sample, api_info)

In [None]:
# Evaluate the predictions of the GPT-3.5 model
metrics = evaluate(preds, gts_sample)

print(metrics)