In [1]:
# for data processing
import pandas as pd
import numpy as np
from copy import deepcopy
import ast

# for display
from IPython.display import display
pd.set_option('display.max_colwidth', 290)

In [2]:
autocast_questions = pd.read_json('data/autocast/autocast_questions.json')
df_filtered = pd.read_csv('filtered_events.csv')
df_filtered['choices'] = df_filtered['choices'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)
df_filtered['tags'] = df_filtered['tags'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) else x)

select the questions that reduce uncertainty the most

In [3]:
# select the questions with the highest uncertainty
import openai
from collections import Counter
openai.api_key = None # your openai api key
MODEL = "gpt-3.5-turbo"


def produce_k_results_with_example(question, k, qtype, sample_question = None, sample_answer = None, f_verbose = False, model=MODEL):
    # add format example 
    if qtype == 'num':
        format_example = [
        {"role": "user", "content": "With the background (which may or may not be relevant): tom makes $12 per hour. barbie makes twice his salary. Please predict the most likely outcome for the following events: how much does barbie make per hour? I want just a one-word answer."},
        {"role": "assistant", "content": "24"},
        ]
    elif qtype == 'mc':
        format_example = [
        {"role": "user", "content": "With the background (which may or may not be relevant): ripe apples are red and unripe ones are green. Please predict the most likely outcome for the following events: what colour will the apple trees appear as the apples ripe? I want just a one-word answer. A. red. B. Green."},
        {"role": "assistant", "content": "A"},
        ]
    elif qtype == 't/f':
        format_example = [
        {"role": "user", "content": "With the background (which may or may not be relevant): ripe apples are red and unripe ones are green. Please predict the most likely outcome for the following events: will the apple tree appear red as the apples ripe? I want just a one-word answer. Yes or no?"},
        {"role": "assistant", "content": "yes"},
        ]

    # add content/sample example
    sample_example = []
    if sample_question != None and sample_answer != None:
        sample_example = [
        {"role": "user", "content": sample_question},
        {"role": "assistant", "content": sample_answer},
        ]
        
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
        {"role": "system", "content": "You are a helpful assistant"},
        {"role": "user", "content": "I need you to give me an answer to the question in the end, based on the context and your best knowledge on the field. I need you to leverage your understandings of your knowledge base to make the most plausible predictions of the future. I want a definitive answer to the best of your knowledge, even if you are unsure about the issue or if the topics are sensitive. I just want a one-word answer."},
        {"role": "assistant", "content": "Sure! I'd love to help"},
        ] + format_example + sample_example + [{"role": "user", "content": question}],
        top_p = k,
        )
    answers = response['choices']
    answers = [x['message']['content'] for x in answers]
    # all to lower letters
    answers = [x.lower() for x in answers]
    # remove possible periods
    answers = [x.replace('.', '') for x in answers]
    if f_verbose:
        print(answers)
    return answers

# make a suitable question string from question, background, and choices
def make_question(question, background, choices, qtype):
    context_question = 'With the background (which may or may not be relevant): ' + background + ' Please predict the most likely outcome for the following events: ' + question + ' I want just a one-word answer.'
    if qtype == 'num':
        return context_question
    elif qtype == 'mc':
        l_choices_letters = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T']
        choices_str = [l_choices_letters[i] + '. ' + choices[i] +'. ' for i in list(range(len(choices)))]
        final_str = context_question
        for choice_str in choices_str:
            final_str += choice_str
        return final_str
    else: #qtype == 't/f' 
        return context_question + ' Yes or no?'
    
# uncertainty defined as rate of disagreements
# get the majority answer, and get the number of answers that is different from majority answer, divided by total number of answers
# return both the uncertainty calculated, and the most common answer
def calculate_uncertainty(l_answers):
    value, count = Counter(l_answers).most_common()[0]
    return (1 - count/len(l_answers), value)


In [7]:
# test the function on one sample
# get the same type of question, better same tag

df_filtered.groupby(['qtype', 'tags']).agg({'question': 'count'}).reset_index().sort_values(by = 'question', ascending = False)
# the events of greatest number is t/f, ['Security and Conflict']
# choose samples from resolved events, and questions from open events
df_sample = df_filtered[(df_filtered['qtype'] == 't/f') & (df_filtered['status'] == 'Resolved')]
df_sample = df_sample[df_sample['tags'].apply(lambda x: 1 if 'Security and Conflict' in x else 0) == 1]
df_test = df_filtered[(df_filtered['qtype'] == 't/f') & (df_filtered['status'] == 'Active')]
df_test = df_test[df_test['tags'].apply(lambda x: 1 if 'Security and Conflict' in x else 0) == 1]

n_samples = 5
k = 10
question = df_test.sample(1)
samples = df_sample.sample(n_samples)

str_question = make_question(list(question['question'])[0], list(question['background'])[0], list(question['choices'])[0], list(question['qtype'])[0])
l_uncertainty = []
l_majority_answer = []
for i in range(n_samples):
    str_sample_question = make_question(list(samples['question'])[i], list(samples['background'])[i], list(samples['choices'])[i], list(samples['qtype'])[i])
    str_sample_answer = list(samples['answer'])[i]
    question_ans = produce_k_results_with_example(str_question, k, list(question['qtype'])[0], sample_question = str_sample_question, sample_answer = str_sample_answer)
    uncertainty, ans = calculate_uncertainty(question_ans)
    l_uncertainty.append(uncertainty)
    l_majority_answer.append(ans)

df_test_res = pd.DataFrame({'sample_event': list(samples['question']),
                            'sample_event_answer': list(samples['answer']),
                            'uncertainty': l_uncertainty,
                            'answer': l_majority_answer})

# to add the result for comparison without sample
question_ans = produce_k_results_with_example(str_question, k, list(question['qtype'])[0])
uncertainty, ans = calculate_uncertainty(question_ans)
df_test_res = pd.concat([pd.DataFrame({'sample_event': ['(only format example)'],
                                      'sample_event_answer': ['(not applicable)'],
                                      'uncertainty': [uncertainty],
                                      'answer': [ans]}),
                                      df_test_res])

df_test_res


Unnamed: 0,sample_event,sample_event_answer,uncertainty,answer
0,(only format example),(not applicable),0.4,uncertain
0,Will the IAEA verify that Iran's uranium stockpile has been reduced to less than 300 kg of low-enriched uranium before 1 June 2016?,yes,0.1,uncertain
1,"Between 1 April 2017 and 31 March 2018, will an armed group from Bangladesh engage in a campaign that systematically kills 1,000 or more civilians in Bangladesh?",no,0.3,no
2,"Between 1 October 2018 and 30 September 2019, will an armed group from Myanmar engage in a campaign that systematically kills 1,000 or more civilians in Myanmar?",no,0.2,no
3,Will ACLED record any civilian fatalities in Angola in October 2017?,no,0.2,uncertain
4,"Between 1 July 2018 and 30 June 2019, will an armed group from the Democratic Republic of Congo engage in a campaign that systematically kills 1,000 or more civilians in the Democratic Republic of Congo?",no,0.1,no


test if samples selected this way can result in actual performance improvement

In [48]:
# and test if they actually increase performance
k = 10

# baseline 
# run in parallel to save runtime
df_filtered_active = df_filtered[(df_filtered['status'] == 'Active') & (df_filtered['qtype'] != 'num')]
df_filtered_active['str_question'] = df_filtered_active.apply(lambda row: make_question(row['question'], row['background'], row['choices'], row['qtype']),
                                                              axis = 1)
# produce_k_results_with_example(str_question, k, list(question['qtype'])[0], sample_question = None, sample_answer = None)
# df_filtered_active['baseline_ans'] = df_filtered_active.apply(lambda row: produce_k_results_with_example(row['str_question'], k, row['qtype'], sample_question = None, sample_answer = None),
#                                                               axis = 1) 
# ^^^ the inference takes too long. parallel processing


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered_active['str_question'] = df_filtered_active.apply(lambda row: make_question(row['question'], row['background'], row['choices'], row['qtype']),


In [49]:
import os
import concurrent.futures
from threading import Semaphore



def apply_produce_k_results(row, k, semaphore):
    # Acquire the semaphore
    semaphore.acquire()
    
    try:
        result = produce_k_results_with_example(row['str_question'], k, row['qtype'], sample_question = None, sample_answer = None)
    except Exception as e: # since openai since to give frequent connection errors
        print(f"API error: {e}")
        result = None  
    finally:
        # Release the semaphore
        semaphore.release()
    
    return result


In [50]:
import psutil
import time

if __name__ == "__main__":
    df_filtered_active['baseline_ans'] = [None] * len(df_filtered_active)

    # num_cores = os.cpu_count()
    # if num_cores > 1:
    #     num_threads = num_cores - 1
    # else:
    #     num_threads = 1
    
    ## when multiple programs are run on parallel
    total_cores = os.cpu_count()
    cpu_usage = psutil.cpu_percent(interval=1)
    free_cores = total_cores - round((cpu_usage/100) * total_cores)
    while free_cores <= 1:
        time.sleep(20)
    print(str(free_cores) + ' cores available now. start running program')
    num_threads = free_cores - 1


    
    openai_rate_limit = 90000
    semaphore = Semaphore(openai_rate_limit*0.9 // num_threads)  

    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        results = []
        while df_filtered_active['baseline_ans'].isnull().any():
            for idx, row in df_filtered_active[df_filtered_active['baseline_ans'].isnull()].iterrows():
                while True:
                    try:
                        result = executor.submit(apply_produce_k_results, row, k, semaphore).result()

                        # Save results after each API call
                        pd.Series([result]).to_csv('results.csv', mode='a', header=False)
                        
                        # Update the DataFrame
                        df_filtered_active.at[idx, 'baseline_ans'] = result
                        
                        # Break the loop if the API call was successful
                        break
                    except Exception as e:
                        print(f"Error when processing row {idx}: {e}. Retrying...")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered_active['baseline_ans'] = [None] * len(df_filtered_active)


8 cores available now. start running program
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 80763 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 84072 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 83479 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 82245 / min. Contact us through our help center at help.openai

In [51]:
# process answer from GPT
import re

def extract_yes_no_from_list(string_list):
    results = []
    for string in string_list:
        # return yes/no if either word is present in string
        # otherwise return None
        # if both present, return the first one
        if string is not None:
            pattern = re.compile(r'\b(yes|no)\b')
            match = pattern.search(string)
            if match is not None:
                results.append(match.group())
            else:
                results.append(None)
        else:
            results.append(None)
    return results

def extract_choice_from_list(string_list):
    results = []
    for string in string_list:
        # if the string contains only one character, return the character
        # else return None
        if len(string) == 1:
            results.append(string)
        else:
            results.append(None)
    return results

# process the raw answers produced by GPT
def process_ans(l_ans, qtype):
    # for true/false questions, extract out 'yes' or 'no' if the answer is present in the string
    # for MCQ questions,
    # this means that the cases where GPT is "uncertain" or does not give answer as "an ai language model"
    if qtype == 't/f':
        return extract_yes_no_from_list(l_ans)
    elif qtype == 'mc':
        return extract_choice_from_list(l_ans)
    
# get majority answer from the processed ans
def get_majority_answer(l_processed_ans, qtype, choices):
    # if l_processed_ans are all None, return None
    # otherwise, if the question is t/f, return the number of 'yes'/the total number of valid answers
    # else (if the question is mcq), get the numeric version of majority answer
    if all(pd.isnull(l_processed_ans)):
        return None
    else:
        non_none_ans = [x for x in l_processed_ans if x is not None]
        if qtype == 't/f':
            return non_none_ans.count('yes')/len(non_none_ans)
        elif qtype == 'mc':
            majority_ans = max(set(non_none_ans), key = non_none_ans.count)
            dic_char = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11}
            return dic_char[majority_ans.lower()]/len(choices)
        
# get whether the predicted answer is within the acceptable range (from df_filtered)
def check_whether_accurate(processed_number, acceptable_lower_boundary, acceptable_upper_boundary):
    if processed_number is None:
        return 0
    else:
        if processed_number >= acceptable_lower_boundary and processed_number <= acceptable_upper_boundary:
            return 1
        else:
            return 0
        

In [52]:
# analyse baseline 
df_filtered_active['processed_baseline_ans'] = df_filtered_active.apply(lambda row: process_ans(row['baseline_ans'], row['qtype']),
                                                                        axis = 1)
df_filtered_active['processed_baseline_number'] = df_filtered_active.apply(lambda row: get_majority_answer(row['processed_baseline_ans'], row['qtype'], row['choices']),
                                                                           axis = 1)
l_whether_accurate = df_filtered_active.apply(lambda row: check_whether_accurate(row['processed_baseline_number'], row['acceptable pred lower boundary'], row['acceptable pred upper boundary']), axis = 1)
baseline_acc = sum(l_whether_accurate)/len(l_whether_accurate)
baseline_acc


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered_active['processed_baseline_ans'] = df_filtered_active.apply(lambda row: process_ans(row['baseline_ans'], row['qtype']),
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered_active['processed_baseline_number'] = df_filtered_active.apply(lambda row: get_majority_answer(row['processed_baseline_ans'], row['qtype'], row['choices']),


0.17346938775510204

In [11]:
# with active prompt inference
import random 

l_tags = ['Series — Forecasting AI Progress', 'Economy – US – Economic Indicators', 'Economy – US', 'Novel Coronavirus (Covid-19)', 'Security and Conflict' ]
l_qtypes = ['t/f', 'mc']

dic_resolved_reservoir = {}
for tag in l_tags:
    for qtype in l_qtypes:
        dic_resolved_reservoir[(tag, qtype)] = []
        l_tag_in_tags = df_filtered['tags'].apply(lambda x: 1 if tag in x else 0)
        df_resolved_selected = df_filtered[(df_filtered['status'] == 'Resolved') & (df_filtered['qtype'] == qtype) & (l_tag_in_tags == 1) & (df_filtered['background'].notnull())]
        for i in range(len(df_resolved_selected)):
            str_sample_question = make_question(list(df_resolved_selected['question'])[i], list(df_resolved_selected['background'])[i], list(df_resolved_selected['choices'])[i], list(df_resolved_selected['qtype'])[i])
            str_sample_answer = list(df_resolved_selected['answer'])[i]
            dic_resolved_reservoir[((tag, qtype))].append((str_sample_question, str_sample_answer))


def produce_k_results_with_example_helper(str_question, k, qtype, sample_question, sample_answer):
    return produce_k_results_with_example(str_question, k, qtype, sample_question, sample_answer), sample_question

# k = number of top k results from gpt
# m = number of candidate samples
def apply_produce_k_results_ap(row, k, semaphore, m): # with active prompt
    semaphore.acquire()

    l_res = []
    l_sample_question = []

    try:
        available_tags = [x for x in row['tags'] if x in l_tags]
        key = (random.choice(available_tags), row['qtype'])
        while len(dic_resolved_reservoir[key]) == 0: # if the corresponding value list is empty
            key = ((random.choice(l_tags), row['qtype']))
        for i in range(m):
            qa_pair = random.choice(dic_resolved_reservoir[key])
            result, sample_question = produce_k_results_with_example_helper(row['str_question'], k, row['qtype'], sample_question = qa_pair[0], sample_answer = qa_pair[1])
            l_res.append(result)
            l_sample_question.append(sample_question)
    except Exception as e: # since openai since to give frequent connection errors
        print(f"API error: {e}")
        # Even if there is an error, we return the partial results
    finally:
        # Release the semaphore
        semaphore.release()
    
    return l_res, l_sample_question


In [12]:
import psutil
import time

from copy import deepcopy
from collections import defaultdict

k = 10
m = 5

if __name__ == "__main__":
    df_filtered_active_v2 = deepcopy(df_filtered[(df_filtered['status'] == 'Active') & (df_filtered['qtype'] != 'num')])
    df_filtered_active_v2['str_question'] = df_filtered_active_v2.apply(lambda row: make_question(row['question'], row['background'], row['choices'], row['qtype']),
                                                                  axis = 1)
    df_filtered_active_v2['ap_ans'] = None  
    df_filtered_active_v2['sample_question'] = None 

    # num_cores = os.cpu_count()
    # if num_cores > 1:
    #     num_threads = num_cores - 1
    # else:
    #     num_threads = 1

    ## when multiple programs are run on parallel
    total_cores = os.cpu_count()
    cpu_usage = psutil.cpu_percent(interval=1)
    free_cores = total_cores - round((cpu_usage/100) * total_cores)
    while free_cores <= 1:
        time.sleep(20)
    print(str(free_cores) + ' cores available now. start running program')
    num_threads = free_cores - 1

    openai_rate_limit = 90000
    semaphore = Semaphore(openai_rate_limit*0.9 // num_threads) 


    with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
        results_dict = defaultdict(list)
        sample_questions_dict = defaultdict(list)

        while df_filtered_active_v2['ap_ans'].apply(lambda x: x is None or len(x) < m).any():
            for idx, row in df_filtered_active_v2[df_filtered_active_v2['ap_ans'].apply(lambda x: x is None or len(x) < m)].iterrows():
                try:
                    results, sample_questions = executor.submit(apply_produce_k_results_ap, row, k, semaphore, m).result()
                    for i in range(len(results)):
                        results_dict[idx].append(results[i])
                        sample_questions_dict[idx].append(sample_questions[i])

                        # Save results and corresponding sample_question after each API call
                        pd.Series([idx, results[i], sample_questions[i], row['question']], index=['index', 'ap_ans', 'sample_question', 'question']).to_csv('results.csv', mode='a', header=False)

                    # Update the DataFrame
                    df_filtered_active_v2.at[idx, 'ap_ans'] = results_dict[idx]
                    df_filtered_active_v2.at[idx, 'sample_question'] = sample_questions_dict[idx]
                    
                except Exception as e:
                    print(f"Error when processing row {idx}: {e}. Retrying...")


    df_results = pd.DataFrame([(key, v) for key, values in results_dict.items() for value in values for v in value], columns=['index', 'ap_ans'])
    df_sample_questions = pd.DataFrame([(key, v) for key, values in sample_questions_dict.items() for value in values for v in value], columns=['index', 'sample_question'])


    df_filtered_active_v2 = df_filtered_active_v2.reset_index().merge(df_results, on='index').merge(df_sample_questions, on='index').set_index('index')


7 cores available now. start running program
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 86338 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 85710 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyWpgqxIiyS on tokens per min. Limit: 90000 / min. Current: 85259 / min. Contact us through our help center at help.openai.com if you continue to have issues.
API error: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)
API error: Rate limit reached for default-gpt-3.5-turbo in organization org-tVk7Sf4AMDs1MDyW

In [41]:
df_filtered_active_v2 = pd.read_pickle('filtered_events_active_prompt.pkl')
df_filtered_active_v2 = df_filtered_active_v2.groupby(['id']).first().drop(columns = ['ap_ans_y', 'sample_question_y'], axis=1)

df_filtered_active_v2.to_pickle('filtered_events_active_prompt.pkl')

In [65]:
# uncertainty defined as rate of disagreements
# get the majority answer, and get the number of answers that is different from majority answer, divided by total number of answers
# return both the uncertainty calculated, and the most common answer
# v2 to take care of invalid (None) answers after processing
def calculate_uncertainty_v2(l_answers):
    l_valid_answers = [x for x in l_answers if x is not None]
    if len(l_valid_answers) == 0:
        return (1, None)
    else:
        value, count = Counter(l_valid_answers).most_common()[0]
        return (1 - count/len(l_valid_answers), value)
    

# based on list of uncertainties calculated from calculate_uncertainty_v2,
# return the index of the best example
def pick_best_example(l_uncertainties):
    # in cases where there are multiple examples that result in equally lowest uncertainty values
    # do majority vote to take any of the example that results in the same majoirty answer

    l_uncertainties.sort(key=lambda x: x[0])
    min_float = l_uncertainties[0][0]
    min_tuples = [tup for tup in l_uncertainties if tup[0] == min_float]
    if len(min_tuples) == 1:
        return [i for i, tup in enumerate(l_uncertainties) if tup[0] == min_float][0]
    else:
        # create a dictionary to count the occurrences of each answer (as majority answer from top k inferences for that example)
        ans_counts = {}
        for _, ans in min_tuples:
            if ans in ans_counts:
                ans_counts[ans] += 1
            else:
                ans_counts[ans] = 1

        # get the answer with the most occurrences
        majority_answer = max(ans_counts, key=ans_counts.get)

        # get the indices in the original list of the tuples that have the minimum float value and the majority string
        indices = [i for i, tup in enumerate(l_uncertainties) if tup[0] == min_float and tup[1] == majority_answer]

        # return any among the selected indices
        return random.choice(indices)
    

In [68]:
df_filtered_active_v2['processed_ap_ans'] = df_filtered_active_v2.apply(lambda row: [process_ans(x, row['qtype']) for x in row['ap_ans_x']],
                                                                        axis = 1)
df_filtered_active_v2['processed_ap_number'] = df_filtered_active_v2.apply(lambda row: [get_majority_answer(x, row['qtype'], row['choices']) for x in row['processed_ap_ans']],
                                                                           axis = 1)
df_filtered_active_v2['ap_uncertainty'] = df_filtered_active_v2['processed_ap_ans'].apply(lambda l: [calculate_uncertainty_v2(x) for x in l])
df_filtered_active_v2['ap_selected_index'] = df_filtered_active_v2['ap_uncertainty'].apply(lambda x: pick_best_example(x))
l_whether_accurate = df_filtered_active_v2.apply(lambda row: check_whether_accurate(row['processed_ap_number'][row['ap_selected_index']], row['acceptable pred lower boundary'], row['acceptable pred upper boundary']), axis = 1)
ap_acc = sum(l_whether_accurate)/len(l_whether_accurate)
ap_acc

0.19387755102040816

In [70]:
df_filtered_active_v2.to_pickle('outputs/filtered_events_active_prompt.pkl')