In [None]:
# import libraries
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from tqdm import tqdm
import random
import openai

from sklearn.metrics import classification_report, roc_curve, roc_auc_score, confusion_matrix, accuracy_score, f1_score, cohen_kappa_score
from sklearn.model_selection import GroupKFold, train_test_split, cross_val_score, StratifiedKFold
from sklearn import tree, metrics
import xgboost


from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam, Ftrl
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier


random.seed(20)

### import data and set up 5 folds cv

In [2]:
# import data
df = pd.read_csv("think_aloud_all_platforms_2023Nov27.csv") # this csv contains all valid input, in attempt level

In [3]:
# split data for CV

group_dict = dict()
groups = np.array([])

for index, row in df.iterrows():
    s_id = row['anon_student_id']
    if s_id not in group_dict:
        group_dict[s_id] = index
    groups = np.append(groups, group_dict[s_id])
    
# Set up the splitter with 5 splits
gkf = GroupKFold(n_splits = 5)

### USE

In [None]:
# load universal sentence encoder
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder-large/5")

In [None]:
## Converting text into embedding - USE
text = np.array(df['utterance_combined'], dtype=object)[:, np.newaxis]

X = []
for r in tqdm(text):
    emb = embed(r)
    review_emb = tf.reshape(emb, [-1]).numpy()
    X.append(review_emb)
X = np.array(X)

### OpenAI

In [4]:
# load openAI key REMOVE when upload to github (TODO)
openai.api_key = 'ADD YOUR KEY HERE' #api key

def get_embedding(text, model="text-embedding-3-small"):
   text = text.replace("\n", " ")
   return openai.Embedding.create(input = [text], model=model)['data'][0]['embedding']

In [None]:
## Converting text into embedding - OpenAI
text = df['utterance_combined']

X = []
for r in tqdm(text):
    emb = get_embedding(r)
    review_emb = tf.reshape(emb, [-1]).numpy()
    X.append(review_emb)
X = np.array(X)

### prediction model

In [None]:
# define label
y = df.wrong

In [None]:
# set up storage arrays for each round of validation
roc_auc_scores_all = np.array([])
pred = pd.DataFrame()


for train_index, test_index in gkf.split(X, y, groups=groups):
    
    # Get the training and test data from the dataset for this group
    X_train = X[train_index]
    X_test = X[test_index]
    y_train = y.iloc[train_index]
    y_test = y.iloc[test_index]
    
    model = Sequential()
    model.add(Dense(28, input_shape=(1536,), activation='relu')) 
    model.add(Dense(28, activation='relu')) 
    model.add(Dense(1, activation='sigmoid'))
    
    optimizer = tf.keras.optimizers.Adam(lr=0.01)
    model.compile(loss = 'binary_crossentropy', optimizer= optimizer,metrics = ['acc'])
    
    
    num_epochs = 30
    batch_size = 10

    model.fit(
        X_train, 
        y_train, 
        epochs=num_epochs, 
        validation_split=0.1,
        shuffle=True, 
        batch_size=batch_size)
    
    # test classifier on this round of testing group
    predictions = model.predict(X_test)
    
    pred_new = pd.concat([
    pd.DataFrame(df.iloc[test_index,[0]]).reset_index(drop=True), #row number
    pd.DataFrame(y.iloc[test_index]).reset_index(drop=True),
    pd.DataFrame(predictions).reset_index(drop=True)],ignore_index=True, axis = 1)
    
    pred = pred.append(pred_new, ignore_index=True)
    
    # compute some metrics and store them for averaging later on
   
    # AUC
    roc_auc_scores = roc_auc_score(y_test, predictions)
    roc_auc_scores_all = np.append(roc_auc_scores_all, roc_auc_scores)
    

    
# print mean scores for the 5-fold CV
print("average roc_auc: ", np.round(roc_auc_scores_all.mean(), 3))
print("stdv roc_auc: ", np.round(roc_auc_scores_all.std(), 3))

In [None]:
pred_use = pd.concat(
    [pd.DataFrame(pred_use_process),
     pd.DataFrame(pred_use_plan),
     pd.DataFrame(pred_use_act),
     pd.DataFrame(pred_use_wrong),
    ], axis=1)

pred_use.to_csv("pred_use.csv")

In [None]:
pred_openai_v3 = pd.concat(
    [pd.DataFrame(pred_openai_process),
     pd.DataFrame(pred_openai_plan),
     pd.DataFrame(pred_openai_act),
     pd.DataFrame(pred_openai_wrong),
    ], axis=1)

pred_openai_v3.to_csv("pred_openai_v3.csv")

## Transferability

In [34]:
train_indices = df[df['platform'].isin(['ORCCA','Logic Tutor'])].index
test_indices = df[df['platform'] == 'Stoich'].index

In [None]:
y = df.wrong

X_train = X[train_indices]
X_test = X[test_indices]
y_train = y.iloc[train_indices]
y_test = y.iloc[test_indices]
    
model = Sequential()
model.add(Dense(28, input_shape=(1536,), activation='relu')) 
model.add(Dense(28, activation='relu')) 
model.add(Dense(1, activation='sigmoid'))
    
optimizer = tf.keras.optimizers.Adam(lr=0.01)
model.compile(loss = 'binary_crossentropy', optimizer= optimizer,metrics = ['acc'])
    
    
num_epochs = 30
batch_size = 10
model.fit(
    X_train, 
    y_train, 
    epochs=num_epochs,
    validation_split=0.1,
    shuffle=True, 
    batch_size=batch_size)

# test classifier on this round of testing group
predictions = model.predict(X_test)

print(roc_auc_score(y_test, predictions))