In [2]:
import sklearn
import numpy as np
import pickle
import pandas as pd

In [2]:
with open("./df_pickle/df_pool.pkl", "rb") as fp:
    df_pool = pickle.load(fp)
# Change 'y' and 'n' to 1 and 0 in the pandas dataframe:
df_pool['label'] = df_pool['label'].map({'y': 1, 'n': 0})

In [24]:
with open("./df_pickle/df_train.pkl", "rb") as fp:
    df_train = pickle.load(fp)

# Change 'y' and 'n' to 1 and 0 in the pandas dataframe:
df_train['label'] = df_train['label'].map({'y': 1, 'n': 0})

In [19]:
with open("./df_pickle/df_test.pkl", "rb") as fp:
    df_test = pickle.load(fp)

# Change 'y' and 'n' to 1 and 0 in the pandas dataframe:
df_test['label'] = df_test['label'].map({'y': 1, 'n': 0})

In [5]:
with open("./df_pickle/df_gen.pkl", "rb") as fp:
    df_gen = pickle.load(fp)

# Change 'y' and 'n' to 1 and 0 in the pandas dataframe:
df_gen['label'] = df_gen['label'].map({'y': 1, 'n': 0})

In [6]:
from keras.models import Sequential
from keras.layers import Dense
import openai
import time
import os
import re

from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.getenv('OPENAI_API_KEY')

In [7]:
neural_net = Sequential()
neural_net.add(Dense(256, input_dim=768*2, activation='relu'))
neural_net.add(Dense(256, activation='relu'))
neural_net.add(Dense(1, activation='sigmoid'))

neural_net.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [8]:
trainset = df_pool[df_pool['label'].notna()]['combined_embedding'].values
#pool = df_pool[df_pool['label'].isna()]['combined_embedding']
labels = df_pool[df_pool['label'].notna()]['label'].values
assert len(trainset) == len(labels)

In [9]:
df_test['combined_embedding'] = df_test.apply(lambda x: np.concatenate((x['question_embedding'],x['context_embedding'])), axis=1)
Xtest = df_test['combined_embedding'].values
ytest = df_test['label'].values

In [10]:
def oracle(idxs, df_pool):
    new_labels = []
    req_per_min = 0
    for i, idx in enumerate(idxs):
        ctx = df_pool.loc[idx]['context']
        q = df_pool.loc[idx]['question']

        #Check rate limit
        req_per_min += 1
        while req_per_min>=19:
            time_stamp = time.time()
            if int(time.time()-last_time_stamp) > 60:
                last_time_stamp = time_stamp
                req_per_min = 0
            else:
                time.sleep(10)
        
        #Ask the oracle for label for the context and two questions
        try:
            completion = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            temperature=0,
            messages=[
                {"role":"system", "content":"You are a system designed to label if a list of provided questions can be answered using ANY part of a provided context. You will always only reply in the following format for each question: `label: LABEL. LABEL should be 'y' if a question can be answered using the context and else 'n'"},
                {"role":"user", "content": "CONTEXT: ```The man in the house has a boy named Bob and a red car. He loves ice cream``` QUESTION: ```Is the boy named Jim?```"},
                {"role":"assistant", "content":"label: y"},
                {"role": "user", "content": f'CONTEXT: ```{ctx}``` QUESTION: ```{q}```'}
            ]
            )
        except Exception as e:
            print("OPENAI_ERROR:",str(e))
            continue
        
        #Parse the response to get labels
        res = completion.choices[0].message.content
        labels = re.findall(r'label: ([yn])', res)
        
        #If the label is not found, assume something went wrong and pad output
        if len(labels) != 1:
            new_labels.append(None)
        elif labels[0] == 'n':
            new_labels.append(0)
        elif labels[0] == 'y':
            new_labels.append(1)

    return new_labels

In [14]:
#reset training set and pool
from multiprocessing import Pool
import worker
if __name__ ==  '__main__':
    testacc_qbc=[] #this should hold the final accuracies

    n_samples = 50 # number of samples to be labeled from pool
    n_add = 10
    n_committee = 10

    for i in range(n_samples):
        Xtrain = df_pool[df_pool['label'].notna()]['combined_embedding'].values
        ytrain = df_pool[df_pool['label'].notna()]['label'].values
        Xpool = df_pool[df_pool['label'].isna()]['combined_embedding']
        ypool_lab = []

        p = Pool(processes = n_add)
        ypool_lab = p.map(worker.task, [(id, Xtrain, ytrain, Xpool) for id in range(n_committee)])
        p.close()
        p.join()

        #get probability of label for each class based on voting in the committee
        ypool_p = np.mean(np.array(ypool_lab), 0).flatten()
        #select sample with maximum disagreement (least confident)
        ypool_p_sort_idx = np.argsort(abs(0.5-ypool_p))[:n_add] #least confident
        #Refit model in all training data
        neural_net.fit(np.asarray(list(Xtrain)).astype('float32'), np.array([np.array(xi) for xi in ytrain]), epochs=25, batch_size=64)
        testacc_qbc.append((len(Xtrain),neural_net.evaluate(np.asarray(list(Xtest)).astype('float32'), np.array([np.array(xi) for xi in ytest]))))
        # make labels for new points
        new_labels = oracle(ypool_p_sort_idx, df_pool)
        df_pool.loc[Xpool.iloc[ypool_p_sort_idx].index, 'label'] = new_labels
        print('Model: LR, %i samples (QBC)'%(n_samples+i*n_add))

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Model: LR, 50 samples (QBC)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Model: LR, 60 samples (QBC)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
Model: LR, 70 samples (QBC)
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25

In [21]:
with open("./df_pickle/test_acc.pkl", "wb") as fp:
    pickle.dump(testacc_qbc, fp)

## Plot

In [None]:
with open("./df_pickle/test_acc_new.pkl", "rb") as fp:
    test_acc = pickle.load(fp)
with open("./df_pickle/rng_acc.pkl", "rb") as fp:
    test_acc_rng = pickle.load(fp)

In [None]:
acc = [m[1] for _,m in test_acc]
acc_rng = [m[1] for _,m in test_acc_rng]

In [None]:
import matplotlib.pyplot as plt
plt.plot(acc, marker='o', label="Active learning")
plt.plot(acc_rng, marker='o', label="Random")
plt.title('Test accuracy per iteration (n_add=10)')
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.legend()