In [1]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [3]:
def query_prompt(prompt, max_tokens=5000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [4]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            if "science" in cat:
                return True
            if "physics" in cat:
                return True
            if "chemistry" in cat:
                return True
        return False
    
    def text_preprocess(text):
        return text.replace("===", "\n").replace("==", "\n")

    df = pd.read_parquet(fname)
    df_science = df[df["categories"].apply(f)]
    df_science["text"] = "title: " + df_science["title"] + "\n" + df_science["text"].apply(text_preprocess)
    return df_science.sample(len(df_science)//20)

In [5]:
files = glob.glob("../data/wikipedia/*.parquet")

In [6]:
import time

In [7]:
import pickle

In [8]:
import json

In [13]:
from datetime import datetime as dt
import os

In [None]:
texts = []

In [14]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
# 依頼
You are a professor at a science university and are creating a test for your students.
Using the given text, create a question in which you select the most appropriate statement from the five options in the question text. Also, extract the evidence for your answer.
The output should be an array in json format, with "prompt" as the problem statement, "A," "B," "C," "D," and "E" as choices, "answer" as the answer choice (one of A through E), and "basis" as the rationale. Please make sure that the answer choices are not all the same, e.g., all five answers are A.
In addition, five questions must be created per text. This means that the total number of questions created will be 5.

# text
## text 1 
title: {series['title']}

{series['text'].split("==")[0]}


# attention
Please create one question per text. So the total number of problems created will be five.
"""
    return prompt

def f(series):
    if series["A"] != series["A"]:
        if type(series["choices"]) == dict:
            for key in ["A", "B", "C", "D", "E"]:
                series[key] = series["choices"][key]
        elif type(series["choices"] == list):
            for i, key in enumerate(["A", "B", "C", "D", "E"]):
                series[key] = series["choices"][i]
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

for file in files:
    if os.path.basename(file) in ["a.parquet", "all.parquet"]:
        print(f"pass: {file}")d
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            for text_json in texts_json:
                text_json["wiki_id"] = series["id"]
                text_json["original_text"] = series["text"]
                texts.append(text_json)
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
        if i % 100 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

pass: ../data/wikipedia/a.parquet
pass: ../data/wikipedia/all.parquet


../data/wikipedia/b.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [24:42<00:00, 18.08s/it]
../data/wikipedia/c.parquet:   1%|█▌                                                                                                                                                                                                               | 1/129 [00:22<48:34, 22.77s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/open

This model's maximum context length is 16385 tokens. However, you requested 20920 tokens (15920 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is configuration entropy?",
    "A": "The portion of a system's entropy related to the positions of its constituent particles",
    "B": "The portion of a system's entropy related to the velocity or momentum of its constituent particles",
    "C": "The portion of a system's entropy related to the energy of its constituent particles",
    "D": "The portion of a system's entropy related to the spin configurations of its constituent particles",
    "E": "The portion of a system's entropy related to the number of conformations of a molecule",
    "answer": "A",
    "basis": "The text states that configuration entropy is the portion of a system's entropy that is related to discrete representative positions of its constituent particles."
  },
  {
    "prompt": "How is configu

../data/wikipedia/c.parquet:   6%|████████████▉                                                                                                                                                                                                    | 8/129 [02:07<33:58, 16.84s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 50 column 106 (char 2156)
../data/wikipedia/c.parquet:   7%|██████████████▌                                                                                                          

Expecting ',' delimiter: line 50 column 106 (char 2156)
[
  {
    "prompt": "When did the TV series 'Counterpart' premiere?",
    "A": "December 10, 2016",
    "B": "December 10, 2017",
    "C": "December 10, 2018",
    "D": "December 10, 2019",
    "E": "December 10, 2020",
    "answer": "B",
    "basis": "The prompt states that the TV series 'Counterpart' premiered on December 10, 2017."
  },
  {
    "prompt": "Who is the main actor in the TV series 'Counterpart'?",
    "A": "J. K. Simmons",
    "B": "Olivia Williams",
    "C": "Harry Lloyd",
    "D": "Nazanin Boniadi",
    "E": "Sara Serraiocco",
    "answer": "A",
    "basis": "The prompt mentions that J. K. Simmons stars in the TV series 'Counterpart' as Howard Silk."
  },
  {
    "prompt": "What is the premise of the TV series 'Counterpart'?",
    "A": "A scientist creates parallel Earths in 1987.",
    "B": "A flu pandemic kills millions in the Prime world.",
    "C": "Counterparts are used as spies and sleeper agents.",
    "D"

../data/wikipedia/c.parquet:   9%|███████████████████▎                                                                                                                                                                                            | 12/129 [03:18<33:32, 17.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 16567 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is combinatorial optimization?",
    "A": "Finding an optimal object from a finite set of objects",
    "B": "Finding the shortest path between two points",
    "C": "Determining the best allocation of resources",
    "D": "Solving real-world instances of mathematical problems",
    "E": "Developing the best airline network of spokes and destinations",
    "answer": "A",
    "basis": "The text states that combinatorial optimization is the process of finding an optimal object from a finite set of objects."
  },
  {
    "prompt": "Which field is combinatorial optimization related to?",
    "A": "Artificial intelligence",
    "B": "Physics",
    "C": "Chemistry",
    "D": "Biology",
    "E": "Geology",
    "answer": "A",
    "basis": "The text mentions that combinatorial optimization is related to artificial intelligence,

../data/wikipedia/c.parquet:  30%|██████████████████████████████████████████████████████████████▉                                                                                                                                                 | 39/129 [11:05<27:48, 18.54s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 17017 tokens (12017 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
    {
        "prompt": "What is the purpose of the CompTox Chemicals Dashboard?",
        "A": "To provide public access to information from EPA Action Plans",
        "B": "To identify chemicals that require further testing",
        "C": "To reduce the use of animals in chemical testing",
        "D": "To provide data for analytical scientists involved in structure identification",
        "E": "To incorporate data from several previous EPA databases into one package",
        "answer": "C",
        "basis": "The text states that the CompTox Chemicals Dashboard is used by EPA and other scientists to help identify chemicals that require further testing and reduce the use of animals in chemical testing."
    },
    {
        "prompt": "What types of data are provided by the CompTox Chemicals 

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 83 (char 1660)
../data/wikipedia/c.parquet:  32%|██████████████████████████████████████████████████████████████████                                                                                                                                              | 41/129 [11:31<24:49, 16.92s/it]

Expecting ',' delimiter: line 10 column 83 (char 1660)
[
  {
    "prompt": "What is the definition of cocrystals?",
    "A": "Cocrystals are solids that are crystalline, single-phase materials composed of two or more different molecular or ionic compounds.",
    "B": "Cocrystals are solids that are crystalline, single-phase materials composed of two or more different molecular or ionic compounds generally in a stoichiometric ratio which are neither solvates nor simple salts.",
    "C": "Cocrystals are solids that are crystalline, single-phase materials composed of two or more different molecular or ionic compounds generally in a stoichiometric ratio which are neither solvates nor simple salts and consist of two or more components that form a unique crystalline structure having unique properties.",
    "D": "Cocrystals are solids that are crystalline, single-phase materials composed of two or more different molecular or ionic compounds generally in a stoichiometric ratio which are neith

../data/wikipedia/c.parquet:  44%|███████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                    | 57/129 [16:00<17:49, 14.85s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 20 column 336 (char 1422)
../data/wikipedia/c.parquet:  45%|█████████████████████████████████████████████████████████████████████████████████████████████▌                           

Expecting ',' delimiter: line 20 column 336 (char 1422)
[
  {
    "prompt": "According to the theory of Curvilinear Disparity, which group within a political party tends to be more ideologically extreme?",
    "A": "The party elite",
    "B": "The middle-elite",
    "C": "The non-elite",
    "D": "The party candidates",
    "E": "The convention delegates",
    "answer": "B",
    "basis": "The theory of Curvilinear Disparity states that the middle-elite within a political party tends to be more ideologically extreme than both the party elite and the non-elite. This is mentioned in the text: 'The middle-elite tend to be strongly emotionally and psychologically invested in politics and seek to project their ideological convictions to the party elite and the public at large.'"
  },
  {
    "prompt": "In a two-party system, party elites tend to favor which type of electoral strategy?",
    "A": "Appealing to a broad range of the electorate",
    "B": "Targeting an ideological niche",
    "C

../data/wikipedia/c.parquet:  71%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 92/129 [26:25<11:10, 18.11s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 17076 tokens (12076 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the purpose of the Christmas Bird Count?",
    "A": "To provide population data for use in science",
    "B": "To compete at how many birds can be killed",
    "C": "To count birds on Christmas instead of killing them",
    "D": "To observe birds for recreational purposes",
    "E": "To identify the longest-running citizen science survey",
    "answer": "A",
    "basis": "The purpose of the Christmas Bird Count is to provide population data for use in science, especially conservation biology."
  },
  {
    "prompt": "Who proposed counting birds on Christmas instead of killing them?",
    "A": "Frank Chapman",
    "B": "National Audubon Society",
    "C": "Bird Studies Canada",
    "D": "Gulf Coast Bird Observatory",
    "E": "Red Nacional de Observadores de Aves",
  

../data/wikipedia/c.parquet:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 120/129 [34:18<02:34, 17.16s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 19498 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What does physical causal closure assert?",
    "A": "All physical events have a cause outside the physical domain.",
    "B": "Physical events have only physical causes.",
    "C": "Every physical event has a non-physical cause.",
    "D": "Physical events have both physical and mental causes.",
    "E": "Physical events have no causes.",
    "answer": "B",
    "basis": "The text states that physical causal closure says that 'all physical states have pure physical causes' or that 'physical effects have only physical causes.'"
  },
  {
    "prompt": "What is the relationship between physical causal closure and physicalism?",
    "A": "Physical causal closure is a characteristic principle of physicalism.",
    "B": "Physical causal closure contradicts the principles of physicalism.",
    "C": "Physical causal closure is irre

../data/wikipedia/c.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 129/129 [36:43<00:00, 17.08s/it]
../data/wikipedia/d.parquet:   7%|██████████████▎                                                                                                                                                                                                   | 6/88 [01:39<22:47, 16.68s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 18 column 25 (char 1256)
[
    {
        "prompt": "What is the main result of Darboux's theorem?",
        "A": "Any two symplectic manifolds of the same dimension are locally symplectomorphic to one another.",
        "B": "There are no local invariants in symplectic geometry.",
        "C": "The curvature is a local invariant in Riemannian geometry.",
        "D": "The metric can always be made to take the standard form at any given point in Riemannian geometry.",
        "E": "The Darboux theorem can be strengthened to hold on a neighborhood of a submanifold.",
        "answer": "A",
        "basis": "The text states that one of the consequences of Darboux's theorem is that any two symplectic manifolds of the same dimension are locally symplectomorphic to one another."
    },
    {
        "prompt": "What is the consequence of Darboux's theorem for contact geometry?",
        "A": "There are no local invariants in symplectic geometry.",
        "B": "The curva

../data/wikipedia/d.parquet:  20%|██████████████████████████████████████████▊                                                                                                                                                                      | 18/88 [05:09<18:10, 15.58s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 16738 tokens (11738 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
    {
        "prompt": "What was Du Qinghua's major at Stanford University?",
        "A": "Solid state physics",
        "B": "Aeronautic engineering",
        "C": "Hydrodynamics",
        "D": "Mechanics",
        "E": "Material engineering",
        "answer": "C",
        "basis": "In September 1948, Du turned to Harvard University and studied hydrodynamics under the academic advice of Richard von Mises."
    },
    {
        "prompt": "Where did Du Qinghua teach after returning to China?",
        "A": "Peking University",
        "B": "Tsinghua University",
        "C": "Shanghai Jiao Tong University",
        "D": "Xi'an Jiao Tong University",
        "E": "Zhejiang University",
        "answer": "B",
        "basis": "In 1952, Du was transferred to Tsinghua University and became a tea

../data/wikipedia/d.parquet:  30%|█████████████████████████████████████████████████████████████▊                                                                                                                                                   | 26/88 [06:50<14:36, 14.14s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 56 (char 2872)
../data/wikipedia/d.parquet:  31%|████████████████████████████████████████████████████████████████▏                                                                 

Invalid \escape: line 34 column 56 (char 2872)
[
    {
        "prompt": "What is Dirichlet's ellipsoidal problem?",
        "A": "It asks under what conditions there can exist an ellipsoidal configuration at all times of a homogeneous rotating fluid mass in which the motion, in an inertial frame, is a linear function of the coordinates.",
        "B": "It asks under what conditions there can exist a spherical configuration at all times of a homogeneous rotating fluid mass in which the motion, in an inertial frame, is a linear function of the coordinates.",
        "C": "It asks under what conditions there can exist a cylindrical configuration at all times of a homogeneous rotating fluid mass in which the motion, in an inertial frame, is a linear function of the coordinates.",
        "D": "It asks under what conditions there can exist a cuboid configuration at all times of a homogeneous rotating fluid mass in which the motion, in an inertial frame, is a linear function of the coordina

../data/wikipedia/d.parquet:  53%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                 | 47/88 [12:52<11:41, 17.10s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 4 column 33 (char 104)
../data/wikipedia/d.parquet:  55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉          

Expecting ',' delimiter: line 4 column 33 (char 104)
[
  {
    "prompt": "What is David Annandale predominantly known for?",
    "A": "His contributions to "The Skiffy and Fanty Show" podcast",
    "B": "His numerous fiction contributions to various Warhammer 40,000 series",
    "C": "His standalone novels",
    "D": "His academic achievements",
    "E": "His podcast nominations for the Hugo Award",
    "answer": "B",
    "basis": "The text states that David Annandale is predominantly known for his numerous fiction contributions to various Warhammer 40,000 series."
  },
  {
    "prompt": "Where did David Annandale receive his BA and MA degrees?",
    "A": "University of Alberta",
    "B": "University of Manitoba",
    "C": "University of Toronto",
    "D": "University of British Columbia",
    "E": "University of Calgary",
    "answer": "B",
    "basis": "The text mentions that David Annandale received a BA and an MA from the University of Manitoba."
  },
  {
    "prompt": "What is the

../data/wikipedia/d.parquet:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                       | 58/88 [16:11<08:22, 16.73s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 29842 tokens. Please reduce the length of the messages.
[
    {
        "prompt": "What did the Davisson–Germer experiment confirm?",
        "A": "The wave-particle duality of matter",
        "B": "The existence of photons",
        "C": "The diffraction pattern of electrons",
        "D": "The localized nature of particles",
        "E": "The wave mechanics approach of the Schrödinger equation",
        "answer": "A",
        "basis": "The Davisson–Germer experiment confirmed the hypothesis of wave-particle duality, which was proposed by Louis de Broglie."
    },
    {
        "prompt": "Who proposed the wave–particle duality theory?",
        "A": "Clinton Davisson",
        "B": "Lester Germer",
        "C": "Louis de Broglie",
        "D": "Walter M. Elsasser",
        "E": "Max Born",
        "answer": "C",
        "basis": "Louis de Broglie proposed the wave–particle duality theory in 1924."

../data/wikipedia/d.parquet:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                 | 74/88 [20:32<04:03, 17.39s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 17953 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the purpose of a direct fluorescent antibody (DFA) test?",
    "A": "To detect deposits of immunoglobulins and complement proteins in biopsies",
    "B": "To determine if a subject has a specific viral or bacterial infection",
    "C": "To grow infected nasal wash samples in cell culture",
    "D": "To detect parasitic infections",
    "E": "To test for autoimmune diseases",
    "answer": "B",
    "basis": "The text states that a direct fluorescent antibody test can be used to quickly determine if a subject has a specific viral or bacterial infection."
  },
  {
    "prompt": "How does direct immunofluorescence differ from western blotting?",
    "A": "Direct immunofluorescence uses a direct method of detection, while western blotting uses an indirect method",
    "B": "Direct immunofluorescence uses a secondary anti

../data/wikipedia/d.parquet:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                            | 76/88 [20:59<03:18, 16.52s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 16985 tokens (11985 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the plot of Doctor Blood's Coffin?",
    "A": "A young biochemist returns to his hometown and starts performing heart transplants on dead bodies.",
    "B": "A doctor investigates strange crimes and discovers a laboratory in a disused tin mine.",
    "C": "A nurse assists a village doctor in solving a series of burglaries and disappearances.",
    "D": "A scientist conducts controversial medical experiments in Vienna.",
    "E": "A police sergeant investigates a series of murders in Cornwall.",
    "answer": "B",
    "basis": "The evidence for this answer can be found in the plot summary of the text, which states that strange crimes are occurring in Cornwall, doctors' surgeries are being burgled, and people are disappearing. It is later revealed that the stolen medic

../data/wikipedia/d.parquet:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌         | 84/88 [23:05<01:07, 16.93s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 44 column 19 (char 2575)
../data/wikipedia/d.parquet:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 44 column 19 (char 2575)
[
    {
        "prompt": "What is a Dirac operator?",
        "A": "A differential operator that is a formal square root of a second-order operator",
        "B": "A differential operator that is a formal square root of a first-order operator",
        "C": "A differential operator that is a formal square root of a third-order operator",
        "D": "A differential operator that is a formal square root of a fourth-order operator",
        "E": "A differential operator that is a formal square root of a fifth-order operator",
        "answer": "A",
        "basis": "In mathematics and quantum mechanics, a Dirac operator is a differential operator that is a formal square root, or half-iterate, of a second-order operator such as a Laplacian."
    },
    {
        "prompt": "What did Paul Dirac originally use the Dirac operator for?",
        "A": "To factorise formally an operator for Minkowski space",
        "B": "To factorise formally an 

../data/wikipedia/d.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [24:17<00:00, 16.56s/it]
../data/wikipedia/e.parquet:  26%|█████████████████████████████████████████████████████▌                                                                                                                                                           | 21/82 [06:14<18:42, 18.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/e.parquet:  27%|████████████████████████████████████████████████████████                                                                                                                                                         | 22/82 [

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is the title of the 1998 BBC documentary series on geology?",
      "A": "The Time Travellers",
      "B": "The Deep",
      "C": "Ring of Fire",
      "D": "Journey to the Centre of the Earth",
      "E": "The Roof of the World",
      "answer": "Earth Story",
      "basis": "The given text states that the title of the 1998 BBC documentary series on geology is 'Earth Story'."
    },
    {
      "prompt": "Who presented the 1998 BBC documentary series on geology?",
      "A": "Aubrey Manning",
      "B": "Simon Lamb",
      "C": "David Sington",
      "D": "Alaska",
      "E": "India",
      "answer": "Aubrey Manning",
      "basis": "The given text mentions that the 1998 BBC documentary series on geology was presented by Aubrey Manning."
    },
    {
      "prompt": "When was the two-disc DVD of the series released?",
      "A": "1 November 1998",
      "B": "8 November 1998",
      "C": "15 N

../data/wikipedia/e.parquet:  33%|████████████████████████████████████████████████████████████████████▊                                                                                                                                            | 27/82 [08:05<17:30, 19.11s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 17447 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the purpose of enterprise architecture according to the Enterprise IT Design school of thought?",
    "A": "To foster and maintain the learning capabilities of enterprises",
    "B": "To guide the process of planning and designing an enterprise's IT/IS capabilities",
    "C": "To create a greater coherency between the various concerns of an enterprise",
    "D": "To improve the capabilities of the enterprise to improve itself",
    "E": "To support the design and re-design of organizational structures",
    "answer": "B",
    "basis": "According to the text, the purpose of enterprise architecture in the Enterprise IT Design school of thought is to guide the process of planning and designing an enterprise's IT/IS capabilities to meet the desired organizational objectives."
  },
  {
    "prompt": "Which organization i

../data/wikipedia/e.parquet:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                       | 54/82 [14:44<06:18, 13.52s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 37 column 17 (char 1531)
../data/wikipedia/e.parquet:  67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 37 column 17 (char 1531)
[
  {
    "prompt": "What is the Empirical Valence Bond (EVB) approach used for?",
    "A": "Calculating free-energies of a chemical reaction in condensed-phase",
    "B": "Modeling the potential energy surface of a reaction",
    "C": "Performing sampling using molecular dynamics or Monte Carlo simulations",
    "D": "Studying enzyme evolution and assisting in enzyme design",
    "E": "Parameterizing the coupling parameter using a reference reaction",
    "answer": "A",
    "basis": "The text states that the EVB approach is an approximation for calculating free-energies of a chemical reaction in condensed-phase."
  },
  {
    "prompt": "Who developed the Empirical Valence Bond (EVB) approach?",
    "A": "Arieh Warshel",
    "B": "Marcus",
    "C": "Gregory A. Voth",
    "D": "Molaris",
    "E": "Q",
    "answer": "A",
    "basis": "The text states that the EVB approach was first developed by Israeli chemist Arieh Warshel."
  },
  {
    "p

../data/wikipedia/e.parquet:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                      | 67/82 [18:44<04:42, 18.84s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 17843 tokens. Please reduce the length of the messages.
[
    {
        "prompt": "According to elite theory, power in contemporary society is primarily held by:",
        "A": "members of the economic elite and policy-planning networks",
        "B": "democratically elected officials",
        "C": "counter-elites within excluded groups",
        "D": "non-elites with diverse backgrounds",
        "E": "pluralistic social groups",
        "answer": "A",
        "basis": "The text states that elite theory posits that power is held by a small minority consisting of members of the economic elite and policy-planning networks."
    },
    {
        "prompt": "Which of the following is a characteristic of elite theory?",
        "A": "Power is decentralized and dispersed among various social groups",
        "B": "Elites have diverse backgrounds and positions",
        "C": "Non-elites have significant i

../data/wikipedia/e.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [22:48<00:00, 16.68s/it]
../data/wikipedia/f.parquet:  40%|████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                            | 25/62 [06:59<10:35, 17.16s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 24 column 24 (char 1658)
[
    {
        "prompt": "What is the force of mortality?",
        "A": "The instantaneous rate of mortality at a certain age measured on an annualized basis",
        "B": "The probability of a person dying from age x to x + 1",
        "C": "The conditional probability of a person who has attained age (x) dying between ages x and x + Δx",
        "D": "The cumulative distribution function of the continuous age-at-death random variable",
        "E": "The probability density function of the continuous age-at-death random variable",
        "answer": "A",
        "basis": "The prompt states that the force of mortality represents the instantaneous rate of mortality at a certain age measured on an annualized basis."
    },
    {
        "prompt": "How is the force of mortality related to the probability density function?",
        "A": "The force of mortality is equal to the probability density function",
        "B": "The force of mortali

../data/wikipedia/f.parquet:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                      | 46/62 [13:01<04:14, 15.88s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 16585 tokens (11585 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the fundamental difference between conventional batteries and flow batteries?",
    "A": "Energy is stored in the electrode material in conventional batteries, while in flow batteries it is stored in the electrolyte.",
    "B": "Energy is stored in the electrolyte in conventional batteries, while in flow batteries it is stored in the electrode material.",
    "C": "Conventional batteries use solid electroactive materials, while flow batteries use liquid electroactive materials.",
    "D": "Conventional batteries use liquid electrolytes, while flow batteries use solid electrolytes.",
    "E": "There is no fundamental difference between conventional batteries and flow batteries.",
    "answer": "A",
    "basis": "The text states that the fundamental difference between 

../data/wikipedia/f.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [17:08<00:00, 16.59s/it]
../data/wikipedia/g.parquet:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 46/68 [12:37<05:18, 14.46s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 4 column 21 (char 93)
[
    {
        "prompt": "What is the vector space of a single qubit?",
        "A": "V_1 = \mathbb{C}^2",
        "B": "V_N = \left(\mathbb{C}^2\right)^{\otimes N}\cong \mathbb{C}^{2^N}",
        "C": "V_N = \left(\mathbb{C}^2\right)^{\otimes N}",
        "D": "V_N = \mathbb{C}^{2^N}",
        "E": "V_1 = \left(\mathbb{C}^2\right)^{\otimes N}",
        "answer": "A",
        "basis": "The text states that the vector space of a single qubit is V_1 = \mathbb{C}^2."
    },
    {
        "prompt": "How many generalized Pauli matrices are there for a group of N qubits?",
        "A": "4^N - 1",
        "B": "4^N",
        "C": "4^N + 1",
        "D": "2^N - 1",
        "E": "2^N",
        "answer": "A",
        "basis": "The text states that there are 4^N such generalized Pauli matrices if we include the identity I = \bigotimes_{m=1}^N I^{(m)} and 4^N - 1 if we do not."
    },
    {
        "prompt": "What are the traditional Pauli matrices the 

../data/wikipedia/g.parquet:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                    | 51/68 [14:13<05:17, 18.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 134 (char 522)
../data/wikipedia/g.parquet:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 10 column 134 (char 522)
[
    {
        "prompt": "What does GOFAI stand for?",
        "A": "Good Old-Fashioned Artificial Intelligence",
        "B": "General Organic Food and Agriculture Institute",
        "C": "Great Opportunities for Artificial Intelligence",
        "D": "Global Organization for Artificial Intelligence",
        "E": "Good Old-Fashioned Algorithmic Intelligence",
        "answer": "A",
        "basis": "GOFAI stands for 'Good Old-Fashioned Artificial Intelligence', as mentioned in the text: 'GOFAI is an acronym for "Good Old-Fashioned Artificial Intelligence" invented by the philosopher John Haugeland in his 1985 book Artificial Intelligence: The Very Idea.'"
    },
    {
        "prompt": "Which of the following is NOT an extension of symbolic AI beyond GOFAI?",
        "A": "Probabilistic reasoning",
        "B": "Non-monotonic reasoning",
        "C": "Multi-agent systems",
        "D": "Deep learning",
        "E": "Neuro-symbo

../data/wikipedia/g.parquet:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊      | 66/68 [19:06<00:42, 21.43s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/g.parquet:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉   | 67/68 [19:32<00:22, 22.71s/it]

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is the gyroradius?",
      "A": "The radius of the circular motion of a charged particle in the presence of a uniform magnetic field.",
      "B": "The angular frequency of the circular motion of a charged particle in the presence of a uniform magnetic field.",
      "C": "The radius of the circular motion of a charged particle in the absence of a magnetic field.",
      "D": "The angular frequency of the circular motion of a charged particle in the absence of a magnetic field.",
      "E": "The radius of the circular motion of a charged particle in the presence of a non-uniform magnetic field.",
      "answer": "A",
      "basis": "The gyroradius is defined as the radius of the circular motion of a charged particle in the presence of a uniform magnetic field."
    },
    {
      "prompt": "What is the formula for the non-relativistic gyroradius in SI units?",
      "A": "r_{g} = \\frac{m v_{\\

../data/wikipedia/g.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 68/68 [19:48<00:00, 17.48s/it]
../data/wikipedia/h.parquet:  17%|████████████████████████████████████▍                                                                                                                                                                            | 11/63 [03:10<14:41, 16.95s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/open

This model's maximum context length is 16385 tokens. However, your messages resulted in 24352 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Henry Hasse best known for?",
    "A": "Being an American science fiction author",
    "B": "Being a fan of science fiction",
    "C": "Co-authoring Ray Bradbury's first published story",
    "D": "Writing the novelette 'He Who Shrank'",
    "E": "Editing 'Adventures in Time and Space'",
    "answer": "C",
    "basis": "The text states that Henry Hasse is best known for being the co-author of Ray Bradbury's first professionally published story, 'Pendulum'."
  },
  {
    "prompt": "Which of the following stories did Henry Hasse co-author with Ray Bradbury?",
    "A": "'The Pendulum'",
    "B": "'Gabriel's Horn'",
    "C": "'Final Victim'",
    "D": "'He Who Shrank'",
    "E": "'Adventures in Time and Space'",
    "answer": "A",
    "basis": "The text mentions that Henry Hasse co-authored the story 'Pendulum' with Ray

../data/wikipedia/h.parquet:  57%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                         | 36/63 [09:44<06:41, 14.89s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 13 (char 1592)
../data/wikipedia/h.parquet:  59%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋       

Invalid \escape: line 34 column 13 (char 1592)
[
  {
    "prompt": "What does the Hadamard–Rybczynski equation give?",
    "A": "The terminal velocity of a spherical bubble",
    "B": "The density of the ambient fluid",
    "C": "The viscosity of the bubble",
    "D": "The gravitational acceleration",
    "E": "The resultant velocity of the bubble",
    "answer": "A",
    "basis": "The Hadamard–Rybczynski equation gives the terminal velocity of a slowly moving spherical bubble through an ambient fluid."
  },
  {
    "prompt": "Which forces are neglected in the derivation of the Hadamard–Rybczynski equation?",
    "A": "Buoyancy force and drag force",
    "B": "Surface tension force and inertia force",
    "C": "Gravitational force and drag force",
    "D": "Buoyancy force and gravitational force",
    "E": "Surface tension force and gravitational force",
    "answer": "B",
    "basis": "The Hadamard–Rybczynski equation can be derived from the Navier–Stokes equations by considering only

../data/wikipedia/h.parquet:  94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 59/63 [16:42<01:06, 16.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/h.parquet:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████          | 60/63 [16:46<00:38, 12.86s/it]

'str' object does not support item assignment
{"prompt": "What is the main focus of molecular biology?", "A": "The study of DNA structure", "B": "The study of protein folding", "C": "The study of macromolecules", "D": "The study of genetic mutations", "E": "The study of cellular organization", "answer": "C", "basis": "The text states that molecular biology focuses on characterizing the structure, function, and relationships between nucleic acids and proteins, which are two types of macromolecules."}


../data/wikipedia/h.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [17:37<00:00, 16.79s/it]
../data/wikipedia/i.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [22:10<00:00, 17.74s/it]
../data/wikipedia/j.parquet:  43%|██████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                                      | 33/76 [09:33<11:54, 16.61s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not 

'str' object does not support item assignment
{"prompt": "What is the title of the film?", "A": "Jurassic World: Fallen Kingdom", "B": "Jurassic Park", "C": "Jurassic World Dominion", "D": "Jurassic World: Dominion", "E": "Jurassic World: Dominion", "answer": "E", "basis": "The title of the film is 'Jurassic World Dominion', as mentioned in the text."}


../data/wikipedia/j.parquet:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 65/76 [18:32<02:59, 16.28s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 17597 tokens (12597 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which organization publishes the Journal of Education for Sustainable Development?",
    "A": "SAGE Publications",
    "B": "Centre for Environment Education",
    "C": "Committee on Publication Ethics",
    "D": "ProQuest",
    "E": "J-Gate",
    "answer": "A",
    "basis": "The text states that the Journal of Education for Sustainable Development is published by SAGE Publications, India."
  },
  {
    "prompt": "What is the main focus of the Journal of Education for Sustainable Development?",
    "A": "Education for Sustainable Development",
    "B": "Environmental Science",
    "C": "Technology Collection",
    "D": "Engineering",
    "E": "Earth Sciences",
    "answer": "A",
    "basis": "The text mentions that the journal is a forum for discussion and dialogues in the e

../data/wikipedia/j.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [21:28<00:00, 16.95s/it]
../data/wikipedia/k.parquet:  46%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                 | 16/35 [04:45<05:37, 17.74s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 28 column 17 (char 1509)
[
    {
        "prompt": "What is the Keldysh formalism used for?",
        "A": "Describing the quantum mechanical evolution of a system in a non-equilibrium state",
        "B": "Describing the quantum mechanical evolution of a system in an equilibrium state",
        "C": "Describing the quantum mechanical evolution of a system subject to time varying external fields",
        "D": "Describing the quantum mechanical evolution of a system in a superposition state",
        "E": "Describing the quantum mechanical evolution of a system in a ground state",
        "answer": "A",
        "basis": "The Keldysh formalism is a general framework for describing the quantum mechanical evolution of a system in a non-equilibrium state or systems subject to time varying external fields."
    },
    {
        "prompt": "What is the main mathematical object in the Keldysh formalism?",
        "A": "Non-equilibrium Green's function",
        "B": "Equi

../data/wikipedia/k.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [10:23<00:00, 17.82s/it]
../data/wikipedia/l.parquet:   5%|███████████▌                                                                                                                                                                                                      | 5/91 [01:31<25:03, 17.48s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 14 column 22 (char 800)
[
  {
    "prompt": "What is the LeRoy radius?",
    "A": "The internuclear distance between two atoms at which LeRoy-Bernstein theory becomes valid",
    "B": "The distance at which electron exchange terms are prominent",
    "C": "The distance at which atoms and molecules interact through classical physics",
    "D": "The distance at which the internuclear potential can be reasonably approximated by charge independent atomic distributions",
    "E": "The distance at which the vibrational levels can be well described by LeRoy-Bernstein theory",
    "answer": "A",
    "basis": "The LeRoy radius is defined as the internuclear distance between two atoms at which LeRoy-Bernstein theory becomes valid."
  },
  {
    "prompt": "What is the expression for the LeRoy radius?",
    "A": "R_{LR} = 2[\langle r_A^2 \rangle ^{1/2} + \langle r_B^2 \rangle ^{1/2}]",
    "B": "R_{LR} = \mathfrak{D} - C_{n}/r^{n}",
    "C": "R_{LR} = \mathfrak{D}-X_n(C_n)[v_

../data/wikipedia/l.parquet:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                       | 53/91 [15:42<10:55, 17.24s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 18298 tokens (13298 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Who was elected as a fellow of the Royal Society in 1771?",
    "A": "John Wynn Baker",
    "B": "Peter Biron, Duke of Courland",
    "C": "Alexander Dalrymple",
    "D": "William Duncan",
    "E": "Robert Erskine",
    "answer": "A",
    "basis": "The text mentions 'John Wynn Baker' as one of the fellows elected in 1771."
  },
  {
    "prompt": "Which fellow of the Royal Society elected in 1771 was a Scottish engineer?",
    "A": "John Wynn Baker",
    "B": "Peter Biron, Duke of Courland",
    "C": "Alexander Dalrymple",
    "D": "William Duncan",
    "E": "Robert Erskine",
    "answer": "E",
    "basis": "The text mentions 'Robert Erskine' as a Scottish engineer and one of the fellows elected in 1771."
  },
  {
    "prompt": "Who among the following was a physician and spe

../data/wikipedia/l.parquet:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 77/91 [22:39<04:22, 18.75s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 18478 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Who is Joseph Adama's brother?",
    "A": "Sam Adama",
    "B": "Daniel Graystone",
    "C": "William Adama",
    "D": "Larry",
    "E": "Tomas Vergis",
    "answer": "A",
    "basis": "Joseph Adama's brother is Sam Adama, as mentioned in the text: 'His brother is Samael \"Sam\" Adama.'"
  },
  {
    "prompt": "Who is portrayed by Paula Malcomson?",
    "A": "Joseph Adama",
    "B": "Sam Adama",
    "C": "Amanda Graystone",
    "D": "Daniel Graystone",
    "E": "Zoe Graystone",
    "answer": "C",
    "basis": "Amanda Graystone is portrayed by Paula Malcomson, as mentioned in the text: 'Amanda Graystone is portrayed by Paula Malcomson.'"
  },
  {
    "prompt": "Who is the CEO of Graystone Industries?",
    "A": "Joseph Adama",
    "B": "Sam Adama",
    "C": "Amanda Graystone",
    "D": "Daniel Graystone",
    "E": "Zoe Grays

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 17 (char 2639)
../data/wikipedia/l.parquet:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                           | 79/91 [23:10<03:40, 18.39s/it]

Invalid \escape: line 34 column 17 (char 2639)
[
  {
    "prompt": "What is the Lifson–Roig model?",
    "A": "A model for the alpha helix-random coil transition of polypeptides",
    "B": "A model for the helix-coil transition of nucleic acids",
    "C": "A model for the branched polymer structure",
    "D": "A model for the tertiary structure of proteins",
    "E": "A model for the statistical mechanics of homopolymers",
    "answer": "A",
    "basis": "The Lifson–Roig model is a helix-coil transition model applied to the alpha helix-random coil transition of polypeptides."
  },
  {
    "prompt": "What is the main difference between the Lifson–Roig model and the Zimm–Bragg model?",
    "A": "The Lifson–Roig model considers three consecutive residues, while the Zimm–Bragg model considers two consecutive residues.",
    "B": "The Lifson–Roig model uses a 4x4 transfer matrix, while the Zimm–Bragg model uses a 2x2 transfer matrix.",
    "C": "The Lifson–Roig model is used for nucleic aci

../data/wikipedia/l.parquet:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                    | 82/91 [24:04<02:47, 18.64s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 17134 tokens (12134 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main function of a library?",
    "A": "To provide physical and digital access to materials",
    "B": "To offer quiet areas for studying",
    "C": "To organize collections of documents",
    "D": "To serve as a community hub",
    "E": "To provide information literacy skills training",
    "answer": "A",
    "basis": "The text states that the main function of a library is to provide physical (hard copies) or digital access (soft copies) materials."
  },
  {
    "prompt": "What is the origin of the term 'library'?",
    "A": "It is derived from the Latin word for 'book'",
    "B": "It is derived from the Ancient Greek word for 'book container'",
    "C": "It is derived from the French word for 'book'",
    "D": "It is derived from the German word for 'book'",
  

../data/wikipedia/l.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91/91 [26:46<00:00, 17.65s/it]
../data/wikipedia/m.parquet:  36%|██████████████████████████████████████████████████████████████████████████▋                                                                                                                                     | 47/131 [15:22<26:28, 18.91s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Expecting ',' delimiter: line 43 column 26 (char 2180)
[
  {
    "prompt": "What is the purpose of military terminology?",
    "A": "To depoliticise discussion about military operations",
    "B": "To humanize military personnel",
    "C": "To provide a precise description of military operations",
    "D": "To promote operational security",
    "E": "To distort the meaning of military operations",
    "answer": "A",
    "basis": "The text states that military terms serve to depoliticise discussion about operations."
  },
  {
    "prompt": "Why has the development of common military terminology become more important?",
    "A": "Due to the need for joint operations between different military services",
    "B": "Due to the increased complexity of international alliances and operations",
    "C": "Due to the need for extended air defense",
    "D": "Due to the criticism of military terms",
    "E": "Due to the need for operational security",
    "answer": "B",
    "basis": "The text ment

../data/wikipedia/m.parquet:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                       | 86/131 [27:43<15:31, 20.69s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "What is the mismatch negativity (MMN) or mismatch field (MMF)?",
    "A": "A component of the event-related potential (ERP) to an odd stimulus in a sequence of stimuli",
    "B": "A component of the event-related potential (ERP) to a frequent stimulus in a sequence of stimuli",
    "C": "A component of the event-related potential (ERP) to a random stimulus in a sequence of stimuli",
    "D": "A component of the event-related potential (ERP) to a predictable stimulus in a sequence of stimuli",
    "E": "A component of the event-related potential (ERP) to a unique stimulus in a sequence of stimuli",
    "answer": "A",
    "basis": "The text states that the mismatch negativity (MMN) or mismatch field (MMF) is a component of the event-related potential (ERP) to an odd stimulus in a sequence of stimuli."
  },
  {
    "prompt": "Which sensory syst

../data/wikipedia/m.parquet:  76%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                 | 100/131 [37:07<10:35, 20.50s/it]

Invalid \escape: line 14 column 18 (char 689)
[
    {
        "prompt": "What is magnetic helicity?",
        "A": "A measure of the linkage, twist, and writhe of a magnetic field",
        "B": "The extent to which the field lines wrap and coil around one another",
        "C": "The helicity of a magnetic vector potential",
        "D": "The helicity of the magnetic field confined to a volume",
        "E": "The current helicity of the magnetic field",
        "answer": "A",
        "basis": "In the given text, it is mentioned that magnetic helicity is a measure of the linkage, twist, and writhe of a magnetic field."
    },
    {
        "prompt": "What is the mathematical definition of magnetic helicity?",
        "A": "H^{\mathbf f} = \int_V {\mathbf f} \cdot \left( abla\times{\mathbf f}\right)\ dV",
        "B": "H^{\mathbf M} = \int_V {\mathbf A}\cdot{\mathbf B}\ dV",
        "C": "H^{\mathbf J} = \int_V {\mathbf B}\cdot{\mathbf J}\ dV",
        "D": "H^{\mathbf M} = \int_V \left(

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 14 column 18 (char 689)
../data/wikipedia/m.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 131/131 [47:02<00:00, 21.55s/it]
../data/wikipedia/n.parquet:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

'str' object does not support item assignment
{"prompt": "What is the definition of a non-governmental organization (NGO)?", "A": "An organization that is formed independent from government", "B": "An organization that is formed by the government", "C": "An organization that is involved in humanitarianism or the social sciences", "D": "An organization that is funded by the United Nations", "E": "An organization that is involved in lobbying for corporations", "answer": "A", "basis": "The text states that a non-governmental organization (NGO) is an organization that is formed independent from government."}



../data/wikipedia/number.parquet:  22%|█████████████████████████████████████████████▎                                                                                                                                                              | 10/45 [03:04<10:26, 17.89s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 50 column 138 (char 3159)
../data/wikipedia/number.parquet:  24%|█████████████████████████████████████████████████▊                                                                 

Expecting ',' delimiter: line 50 column 138 (char 3159)
[
    {
        "prompt": "In 1947, which event demonstrated that prehistoric peoples could have traveled from South America?",
        "A": "The Kon-Tiki raft journey across the Pacific Ocean",
        "B": "The first living things sent into space",
        "C": "The discovery of the kaon particle",
        "D": "The introduction of the first antithyroid drug",
        "E": "The invention of the clavioline",
        "answer": "A",
        "basis": "The event that demonstrated that prehistoric peoples could have traveled from South America in 1947 was the Kon-Tiki raft journey across the Pacific Ocean. This is mentioned in the text: 'August 7 – Thor Heyerdahl's balsa-wood raft, the Kon-Tiki, smashes into the reef at Raroia in the Tuamotu Islands after a 101-day, 4300-mile (6900-km) journey across the Pacific Ocean, demonstrating that prehistoric peoples could have traveled from South America.'"
    },
    {
        "prompt": "In 1

../data/wikipedia/number.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [14:18<00:00, 19.08s/it]
../data/wikipedia/o.parquet:  20%|██████████████████████████████████████████                                                                                                                                                                        | 7/35 [02:04<08:14, 17.65s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 34 column 26 (char 2069)
[
    {
        "prompt": "What are the Onsager reciprocal relations?",
        "A": "The equality of certain ratios between flows and forces in thermodynamic systems out of equilibrium",
        "B": "The observation that temperature differences at constant pressure can cause matter flow",
        "C": "The equality of the Peltier and Seebeck coefficients of a thermoelectric material",
        "D": "The equality of the direct and reverse piezoelectric coefficients",
        "E": "The principle of detailed balance in kinetic systems",
        "answer": "A",
        "basis": "The text states that the Onsager reciprocal relations express the equality of certain ratios between flows and forces in thermodynamic systems out of equilibrium."
    },
    {
        "prompt": "What is the limitation of the Onsager reciprocal relations?",
        "A": "They do not apply when external magnetic fields or Coriolis forces are present",
        "B": "They

../data/wikipedia/o.parquet:  49%|█████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 17/35 [04:56<05:09, 17.18s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "What is the purpose of an ontology in information science?",
    "A": "To represent entities, ideas, and events with their properties and relations",
    "B": "To create a controlled vocabulary of jargon between different languages",
    "C": "To enable economic decision-making through simulations and models",
    "D": "To limit complexity and organize data into information and knowledge",
    "E": "To improve problem solving within a specific domain",
    "answer": "D",
    "basis": "The text states that ontologies in information science are used to limit complexity and organize data into information and knowledge."
  },
  {
    "prompt": "What is the relationship between ontology and philosophy?",
    "A": "Ontology is a branch of philosophy",
    "B": "Ontology intersects with philosophy in areas such as metaphysics and epistemology",
   

../data/wikipedia/o.parquet:  57%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                         | 20/35 [10:49<15:14, 60.97s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 24 column 17 (char 1539)
../data/wikipedia/o.parquet:  60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    

Invalid \escape: line 24 column 17 (char 1539)
[
  {
    "prompt": "What is the advantage of orbital-free density functional theory?",
    "A": "It is more accurate than Kohn–Sham density functional theory models.",
    "B": "It is based on the Thomas–Fermi model.",
    "C": "It can be applied to large systems.",
    "D": "It provides guidance on how to find the functional of the electron density.",
    "E": "It accurately approximates the interacting kinetic energy.",
    "answer": "C",
    "basis": "The text states that orbital-free density functional theory has the advantage of being fast, so that it can be applied to large systems."
  },
  {
    "prompt": "What are the two terms that are not known exactly in the density functional?",
    "A": "The electronic kinetic energy and the exchange–correlation energy.",
    "B": "The electron density and the exchange–correlation energy.",
    "C": "The electronic kinetic energy and the electron density.",
    "D": "The electron density and 

../data/wikipedia/o.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [15:49<00:00, 27.13s/it]
../data/wikipedia/other.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:55<00:00, 18.65s/it]
../data/wikipedia/p.parquet:   0%|                                                                                                                                                                                                                         | 0/111 [00:00<?, ?it/s]

This model's maximum context length is 16385 tokens. However, your messages resulted in 18381 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the chemical formula of (Diacetoxyiodo)benzene?",
    "A": "CHI(OCCH)",
    "B": "CHI(OCCH3)2",
    "C": "CHI(OCCH3)3",
    "D": "CHI(OCCH2F)2",
    "E": "CHI(OCCH2F)3",
    "answer": "A",
    "basis": "The text states that (Diacetoxyiodo)benzene has the formula CHI(OCCH)."
  },
  {
    "prompt": "How is (Diacetoxyiodo)benzene prepared?",
    "A": "By reacting iodobenzene with a mixture of acetic acid and peracetic acid",
    "B": "By reacting iodosobenzene and glacial acetic acid",
    "C": "By reacting iodine, acetic acid, and benzene with sodium perborate",
    "D": "By reacting iodine, acetic acid, and benzene with potassium peroxydisulfate",
    "E": "By reacting iodine, acetic acid, and benzene with potassium iodide",
    "answer": "A",
    "basis": "The text mentions that (Diacetoxyiodo)benzene can be prepare

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 30 column 138 (char 2238)
[
  {
    "prompt": "What is the main goal of teaching modern physical education?",
    "A": "To expose children and teens to a wide variety of exercise and healthy activities.",
    "B": "To teach skills to maintain a lifetime of fitness as well as health.",
    "C": "To encourage self-reporting and monitoring of exercise.",
    "D": "To individualize duration, intensity, and type of activity.",
    "E": "To focus feedback on the work, rather than the result.",
    "answer": "B",
    "basis": "The main goals in teaching modern physical education are to teach skills to maintain a lifetime of fitness as well as health. This is mentioned in the text: 'The main goals in teaching modern physical education are: To expose children and teens to a wide variety of exercise and healthy activities. To teach skills to maintain a lifetime of fitness as well as health.'"
  },
  {
    "prompt": "What are some benefits of physical education?",
  

../data/wikipedia/p.parquet:  36%|██████████████████████████████████████████████████████████████████████████▉                                                                                                                                     | 40/111 [12:48<21:11, 17.90s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 16971 tokens (11971 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is P. L. Travers best known for?",
    "A": "Writing children's books",
    "B": "Being a professional Shakespearean actress",
    "C": "Working for the British Ministry of Information",
    "D": "Being an Australian-British writer",
    "E": "Being an Australian-British actress",
    "answer": "A",
    "basis": "The text states that P. L. Travers is best known for the Mary Poppins series of books, which are usually classified as children's books."
  },
  {
    "prompt": "Where was P. L. Travers born?",
    "A": "London, England",
    "B": "Maryborough, Queensland, Australia",
    "C": "Sydney, Australia",
    "D": "Brisbane, Australia",
    "E": "Allora, Queensland, Australia",
    "answer": "B",
    "basis": "The text states that P. L. Travers was born in Maryborough,

../data/wikipedia/p.parquet:  50%|███████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                         | 55/111 [17:04<17:46, 19.05s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/p.parquet:  50%|████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                       | 56/111 [17:09<13:30, 14.74s/it]

'str' object does not support item assignment
{
  "prompt": "Which of the following is a feature of Phyloscan?",
  "A": "It requires users to log in to access the web service.",
  "B": "It provides a statistically sensitive scan of DNA sequence data.",
  "C": "It uses the MONKEY software to model evolutionary relationships.",
  "D": "It combines statistics across multiple gene promoter regions.",
  "E": "It is a web service for protein sequence analysis.",
  "answer": "B",
  "basis": "The text states that Phyloscan provides a statistically sensitive scan of user-supplied DNA sequence data."
}


../data/wikipedia/p.parquet:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                    | 66/111 [20:02<12:50, 17.12s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 19684 tokens (14684 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is Peng Shepherd's first novel?",
    "A": "The Book of M",
    "B": "The Future Library",
    "C": "The Cartographers",
    "D": "The Book of M: Kıyamet Başlıyor",
    "E": "Le Livre de M",
    "answer": "A",
    "basis": "The text states that Peng Shepherd's debut novel is The Book of M."
  },
  {
    "prompt": "Where was Peng Shepherd born and raised?",
    "A": "Phoenix, Arizona",
    "B": "New York City, New York",
    "C": "London, England",
    "D": "Paris, France",
    "E": "Beijing, China",
    "answer": "A",
    "basis": "The text mentions that Peng Shepherd was born and raised in Phoenix, Arizona."
  },
  {
    "prompt": "Which university did Peng Shepherd attend for her Bachelor's degree?",
    "A": "Arizona State University",
    "B": "New York University",

../data/wikipedia/p.parquet:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                       | 73/111 [22:24<16:43, 26.41s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/p.parquet:  67%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                     | 74/111 [22:44<15:02, 24.40s/it]

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is polyaddition?",
      "A": "A polymerization reaction that forms polymers via individual independent addition reactions",
      "B": "A reaction between functional groups on molecules with high degrees of polymerization",
      "C": "A reaction between functional groups on molecules with low degrees of polymerization",
      "D": "A reaction that forms polymers via chain polymerization",
      "E": "A reaction that forms polymers via polycondensation",
      "answer": "A",
      "basis": "The text states that polyaddition is a polymerization reaction that forms polymers via individual independent addition reactions."
    },
    {
      "prompt": "What type of species is formed in polyaddition?",
      "A": "Dimers",
      "B": "Trimers",
      "C": "Oligomers",
      "D": "Polymers",
      "E": "Monomers",
      "answer": "D",
      "basis": "The text mentions that polyaddition forms species

../data/wikipedia/p.parquet:  88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                        | 98/111 [30:26<03:46, 17.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/p.parquet:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                      | 99/111 [30:30<02:41, 13.44s/it]

'str' object does not support item assignment
{"prompt": "What is public choice theory?", "A": "The use of economic tools to deal with traditional problems of political science", "B": "The study of political behavior", "C": "The study of self-interested agents and their interactions in politics", "D": "The study of social choice theory", "E": "The study of constitutional economics", "answer": "C", "basis": "According to the text, public choice theory is the subset of positive political theory that studies self-interested agents (voters, politicians, bureaucrats) and their interactions."}


../data/wikipedia/p.parquet:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                  | 101/111 [31:09<02:42, 16.22s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 50 column 195 (char 2687)
../data/wikipedia/p.parquet:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 50 column 195 (char 2687)
[
    {
        "prompt": "What is the title of Friedrich Schelling's 1809 work?",
        "A": "Philosophical Inquiries into the Essence of Human Freedom",
        "B": "The Problem of Radical Evil",
        "C": "The Unification of View",
        "D": "The Existence of Evil",
        "E": "The Emergence into Reason",
        "answer": "A",
        "basis": "The text states that Friedrich Schelling's 1809 work is titled 'Philosophical Inquiries into the Essence of Human Freedom.'"
    },
    {
        "prompt": "What is the main theme of Friedrich Schelling's work?",
        "A": "The problem of evil",
        "B": "The existence of God",
        "C": "The nature of freedom",
        "D": "The influence of Spinoza",
        "E": "The critique of Hegel",
        "answer": "C",
        "basis": "The text states that the main theme of Friedrich Schelling's work is 'to give an account of human freedom.'"
    },
    {
        "prompt"

../data/wikipedia/p.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 111/111 [33:48<00:00, 18.28s/it]
../data/wikipedia/q.parquet:  36%|████████████████████████████████████████████████████████████████████████████▎                                                                                                                                     | 4/11 [01:18<02:11, 18.85s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 34 column 11 (char 1726)
[
  {
    "prompt": "What is Quickprop?",
    "A": "An iterative method for determining the minimum of the loss function of an artificial neural network",
    "B": "A classification algorithm for artificial neural networks",
    "C": "A method for approximating the gradient step of an artificial neuron",
    "D": "A learning algorithm for backpropagation networks",
    "E": "A method for describing the loss function as an upwardly open parabola",
    "answer": "A",
    "basis": "The text states that Quickprop is an iterative method for determining the minimum of the loss function of an artificial neural network."
  },
  {
    "prompt": "What is the assumption made in Quickprop?",
    "A": "The loss function is locally approximately square",
    "B": "The loss function is linear",
    "C": "The loss function is convex",
    "D": "The loss function is differentiable",
    "E": "The loss function is constant",
    "answer": "A",
    "basis": 

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 14 column 13 (char 918)
../data/wikipedia/q.parquet:  55%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                               | 6/11 [02:16<02:02, 24.50s/it]

Invalid \escape: line 14 column 13 (char 918)
[
  {
    "prompt": "What is quantum excitation in circular accelerators or storage rings?",
    "A": "The effect of radiation emitted by electrons moving through a magnetic field",
    "B": "The effect of discrete packets of photons emitted by charged particles",
    "C": "The effect of a random walk or diffusion process caused by photon emission",
    "D": "The effect of quantum mechanics on the distribution of emitted photons",
    "E": "The effect of a bending magnet on the energy spectrum of electrons",
    "answer": "C",
    "basis": "The given text states that quantum excitation is the effect in circular accelerators or storage rings whereby the discreteness of photon emission causes the charged particles (typically electrons) to undergo a random walk or diffusion process."
  },
  {
    "prompt": "How is the spectrum of a bending magnet described in the context of quantum excitation?",
    "A": "S(\xi)=\frac{9\sqrt{3}}{8\pi}\xi\int_0

../data/wikipedia/q.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [04:00<00:00, 21.83s/it]
../data/wikipedia/r.parquet:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                             | 62/79 [18:27<05:18, 18.75s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/open

This model's maximum context length is 16385 tokens. However, your messages resulted in 17551 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Reynolds 531?",
    "A": "A brand name for a type of steel tubing",
    "B": "A type of race car chassis",
    "C": "A type of aircraft component",
    "D": "A type of bicycle frame tubing",
    "E": "A type of medium-carbon steel",
    "answer": "D",
    "basis": "The text states that Reynolds 531 is a brand name for a manganese-molybdenum, medium-carbon steel tubing that was used in bicycle frame tubing."
  },
  {
    "prompt": "Why did Reynolds 531 become the tubing of choice for most frame builders?",
    "A": "It had a wide range of butting, diameters, and thicknesses of tubes",
    "B": "It was the only available tubing for frame builders",
    "C": "It was the cheapest option for frame builders",
    "D": "It had the best heat treatment and cold-forming cycles",
    "E": "It was the lightest tubing available"

../data/wikipedia/r.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [23:17<00:00, 17.69s/it]
../data/wikipedia/s.parquet:  11%|███████████████████████▏                                                                                                                                                                                        | 22/197 [06:41<49:48, 17.08s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/s.parquet:  12%|████████████████████████▎                                                                                                                                                                                       | 23/197 [

'str' object does not support item assignment
{"prompt": "What is Stanisław Lem best known for?", "A": "Being a philosopher", "B": "Being a poet", "C": "Being a science fiction writer", "D": "Being a mathematician", "E": "Being a historian", "answer": "C", "basis": "Lem is best known as a science fiction writer. The text states that he was a Polish writer of science fiction and essays on various subjects."}


../data/wikipedia/s.parquet:  25%|███████████████████████████████████████████████████▋                                                                                                                                                            | 49/197 [15:19<45:55, 18.62s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/s.parquet:  25%|████████████████████████████████████████████████████▊                                                                                                                                                           | 50/197 [15:34<43:21, 17.70s/it]

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is the diameter of Solis Planum on Mars?",
      "A": "1811.23 km",
      "B": "26.4 S",
      "C": "270.33 E",
      "D": "1973",
      "E": "Phoenicis Lacus quadrangle",
      "answer": "A",
      "basis": "The text states that Solis Planum has a diameter of 1811.23 km."
    },
    {
      "prompt": "What is the center latitude of Solis Planum on Mars?",
      "A": "1811.23 km",
      "B": "26.4 S",
      "C": "270.33 E",
      "D": "1973",
      "E": "Phoenicis Lacus quadrangle",
      "answer": "B",
      "basis": "The text states that the center latitude of Solis Planum is 26.4 S."
    },
    {
      "prompt": "What is the center longitude of Solis Planum on Mars?",
      "A": "1811.23 km",
      "B": "26.4 S",
      "C": "270.33 E",
      "D": "1973",
      "E": "Phoenicis Lacus quadrangle",
      "answer": "C",
      "basis": "The text states that the center longitude of Solis Planum is 

../data/wikipedia/s.parquet:  28%|██████████████████████████████████████████████████████████                                                                                                                                                      | 55/197 [17:14<46:44, 19.75s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 18422 tokens (13422 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
    {
        "prompt": "Which of the following is NOT a method of soil stabilization?",
        "A": "Enzymes",
        "B": "Surfactants",
        "C": "Calcium chloride",
        "D": "Geogrids",
        "E": "Bitumen emulsions",
        "answer": "D",
        "basis": "The text mentions various methods of soil stabilization, including enzymes, surfactants, calcium chloride, and bitumen emulsions. However, geogrids are mentioned as a mechanical soil stabilization technique, not a method of soil stabilization."
    },
    {
        "prompt": "What is the purpose of using magnesium chloride for soil stabilization?",
        "A": "To increase the weight-bearing capabilities of subsoils",
        "B": "To prevent road failure from water penetration",
        "C": "To improve the load-bearing ca

../data/wikipedia/s.parquet:  37%|████████████████████████████████████████████████████████████████████████████                                                                                                                                    | 72/197 [22:16<37:57, 18.22s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 14 column 15 (char 814)
../data/wikipedia/s.parquet:  37%|█████████████████████████████████████████████████████████████████████████████                                                      

Invalid \escape: line 14 column 15 (char 814)
[
    {
        "prompt": "What are spherical multipole moments?",
        "A": "Coefficients in a series expansion of a potential that varies inversely with the distance to a source",
        "B": "The electric potential due to a point charge",
        "C": "The magnetic potential due to a point charge",
        "D": "The gravitational potential due to a point charge",
        "E": "The coefficients in a series expansion of a potential that varies directly with the distance to a source",
        "answer": "A",
        "basis": "The text states that spherical multipole moments are the coefficients in a series expansion of a potential that varies inversely with the distance to a source."
    },
    {
        "prompt": "What is the formula for the electric potential due to a point charge?",
        "A": "\Phi(\mathbf{r}) = \frac{q}{4\pi\varepsilon} \frac{1}{R}",
        "B": "\Phi(\mathbf{r}) = \frac{q}{4\pi\varepsilon} \frac{1}{\sqrt{r^2 + r

../data/wikipedia/s.parquet:  42%|██████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                         | 82/197 [25:46<33:06, 17.27s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 40 column 143 (char 2653)
../data/wikipedia/s.parquet:  42%|███████████████████████████████████████████████████████████████████████████████████████▋                                 

Expecting ',' delimiter: line 40 column 143 (char 2653)
[
  {
    "prompt": "What is the plot of the film Schoolgirl Apocalypse?",
    "A": "A Japanese schoolgirl witnesses the mass murder of her female friends and family members and sets out to survive.",
    "B": "A group of men in a small town turn into zombies and begin to attack the women.",
    "C": "A woman named Aoi offers support and help to the protagonist Sakura.",
    "D": "Sakura finds a red-headed boy named Billy who helps her survive the zombie apocalypse.",
    "E": "Sakura loses her grip on reality and decides to give up and die.",
    "answer": "A",
    "basis": "The evidence for this answer can be found in the plot description: 'When all the men in her small town turn into zombies and begin to attack the women, Japanese schoolgirl Sakura (Higarino) finds her life turned upside down. The trauma of witnessing the mass murder of her female friends and family members pushes her toward madness. Having few skills and armed

../data/wikipedia/s.parquet:  55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                             | 108/197 [33:38<26:49, 18.08s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 40 column 90 (char 1761)
../data/wikipedia/s.parquet:  55%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌               

Invalid \escape: line 40 column 90 (char 1761)
[
    {
        "prompt": "What is specific kinetic energy?",
        "A": "The kinetic energy of an object per unit of mass",
        "B": "The potential energy of an object per unit of mass",
        "C": "The total energy of an object per unit of mass",
        "D": "The work done on an object per unit of mass",
        "E": "The heat energy of an object per unit of mass",
        "answer": "A",
        "basis": "Specific kinetic energy is defined as the kinetic energy of an object per unit of mass."
    },
    {
        "prompt": "What is the formula for specific kinetic energy?",
        "A": "e_k = \frac{1}{2} v^2",
        "B": "e_k = \frac{1}{2} m v^2",
        "C": "e_k = m v^2",
        "D": "e_k = \frac{1}{2} m^2 v^2",
        "E": "e_k = \frac{1}{2} m v",
        "answer": "A",
        "basis": "The formula for specific kinetic energy is e_k = \frac{1}{2} v^2."
    },
    {
        "prompt": "What are the units of specific kine

../data/wikipedia/s.parquet:  70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                               | 137/197 [42:34<18:22, 18.37s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/s.parquet:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                              | 138/197 [42:53<18:06, 18.41s/it]

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is the astrophysical S-factor in nuclear physics?",
      "A": "A rescaling of a nuclear reaction's total cross section to account for the Coulomb repulsion between the charged reactants",
      "B": "The energy of a nuclear fusion reaction that occurs in the cores of stars",
      "C": "The dimensionless Sommerfeld parameter in a nuclear reaction",
      "D": "The product of the charges of the reactants in a nuclear reaction",
      "E": "The relative incident velocity in a nuclear reaction",
      "answer": "A",
      "basis": "The text states that the astrophysical S-factor is a rescaling of a nuclear reaction's total cross section to account for the Coulomb repulsion between the charged reactants."
    },
    {
      "prompt": "What does the astrophysical S-factor factor out in the cross section of a nuclear reaction?",
      "A": "The energy",
      "B": "The dimensionless Sommerfeld param

../data/wikipedia/s.parquet:  93%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 183/197 [56:31<04:23, 18.80s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 16783 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is seismic anisotropy?",
    "A": "The variation in values with direction at a point",
    "B": "The variation in values between two or more points",
    "C": "The dependence of seismic velocity on direction or angle",
    "D": "The variation in seismic wave velocity with depth",
    "E": "The variation in seismic wave velocity with frequency",
    "answer": "C",
    "basis": "Seismic anisotropy is defined as the dependence of seismic velocity on direction or angle."
  },
  {
    "prompt": "What are the two main types of anisotropy?",
    "A": "Vertical and horizontal anisotropy",
    "B": "Transverse and longitudinal anisotropy",
    "C": "Isotropic and anisotropic anisotropy",
    "D": "Polar and non-polar anisotropy",
    "E": "VTI and HTI anisotropy",
    "answer": "E",
    "basis": "The two main types of anisotrop

../data/wikipedia/s.parquet:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍          | 187/197 [57:28<02:53, 17.38s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, your messages resulted in 18540 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is an annular solar eclipse?",
    "A": "A solar eclipse that occurs when the Moon passes between Earth and the Sun",
    "B": "A solar eclipse that occurs when the Moon's apparent diameter is smaller than the Sun's",
    "C": "A solar eclipse that appears as a partial eclipse over a region of the Earth",
    "D": "A solar eclipse that causes the Sun to look like an annulus (ring)",
    "E": "A solar eclipse that is visible from Gilbert and Ellice Islands, Mexico, and the United States",
    "answer": "D",
    "basis": "According to the text, an annular solar eclipse occurs when the Moon's apparent diameter is smaller than the Sun's, blocking most of the Sun's light and causing the Sun to look like an annulus (ring)."
  },
  {
    "prompt": "Where was annularity visible during the solar eclipse of April 7, 1940?",
    

../data/wikipedia/s.parquet:  99%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 196/197 [1:00:16<00:21, 21.07s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/s.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 197/197 [1:00:23<00:00, 18.39s/it]

'str' object does not support item assignment
{"prompt": "What is the main theme of Slaughterhouse-Five?", 
 "A": "The horrors of war and the impact on individuals", 
 "B": "The power of religion and faith", 
 "C": "The concept of free will and determinism", 
 "D": "The importance of mental health and well-being", 
 "E": "The role of postmodernism in literature", 
 "answer": "A", 
 "basis": "The text states that Slaughterhouse-Five is an anti-war novel that attempts to come to terms with war through the eyes of the protagonist, Billy Pilgrim. It explores the horrors of war and its impact on individuals, as well as the lasting effects of trauma and the desensitization to death. This aligns with option A, which states that the main theme is the horrors of war and the impact on individuals."}



../data/wikipedia/t.parquet:  21%|███████████████████████████████████████████▉                                                                                                                                                                    | 45/213 [13:43<49:36, 17.72s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 21213 tokens (16213 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the title of the 1933 French-German science fiction film directed by Curtis Bernhardt?",
    "A": "The Tunnel",
    "B": "The Tunnel (1933 French-language film)",
    "C": "The Tunnel (1933 German-language film)",
    "D": "The Tunnel (1935 English-language film)",
    "E": "The Tunnel (1933 French-German-language film)",
    "answer": "B",
    "basis": "The prompt states that the film is a 1933 French-German science fiction film directed by Curtis Bernhardt, and the evidence in the text confirms that the title of the film is 'The Tunnel (1933 French-language film).'"
  },
  {
    "prompt": "Who is the main actor in the film?",
    "A": "Jean Gabin",
    "B": "Madeleine Renaud",
    "C": "Robert Le Vigan",
    "D": "Edmond Van Daële",
    "E": "André Nox",
    "answe

../data/wikipedia/t.parquet:  29%|████████████████████████████████████████████████████████████▌                                                                                                                                                   | 62/213 [18:20<39:29, 15.69s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 33 column 54 (char 1203)
../data/wikipedia/t.parquet:  30%|█████████████████████████████████████████████████████████████▌                                                                    

Invalid \escape: line 33 column 54 (char 1203)
[
    {
        "prompt": "What is the main application of topological recursion?",
        "A": "Enumerative geometry",
        "B": "Random matrix theory",
        "C": "Knot theory",
        "D": "Mathematical physics",
        "E": "String theory",
        "answer": "A",
        "basis": "The text states that topological recursion has applications in enumerative geometry."
    },
    {
        "prompt": "What is the origin of topological recursion?",
        "A": "Random matrices",
        "B": "Algebraic geometry",
        "C": "Enumerative geometry",
        "D": "String theory",
        "E": "Knot theory",
        "answer": "A",
        "basis": "The text mentions that the topological recursion was first discovered in random matrices."
    },
    {
        "prompt": "What is the recursive definition of infinite sequences of symmetric meromorphic n-forms in topological recursion?",
        "A": "2g-2+n",
        "B": "2-2g+n",
      

../data/wikipedia/t.parquet:  45%|█████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                  | 96/213 [28:06<37:01, 18.99s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/t.parquet:  46%|██████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                 | 97/213 [28:20<33:54, 17.54s/it]

'str' object does not support item assignment
{
  "questions": [
    {
      "prompt": "What is the title of the 1957 science fiction novel by A. E. Van Vogt?",
      "A": "The Great Judge",
      "B": "The Mind Cage",
      "C": "The Tortuous Plot",
      "D": "The Enemy of the State",
      "E": "The Jig-saw Puzzle",
      "answer": "B",
      "basis": "The text explicitly states that the title of the novel is 'The Mind Cage'."
    },
    {
      "prompt": "Who risks his career to defend Wade Trask in the novel?",
      "A": "David Marin",
      "B": "Floyd C. Gale",
      "C": "A. E. Van Vogt",
      "D": "Simon & Schuster",
      "E": "Wade Trask",
      "answer": "A",
      "basis": "The text mentions that David Marin risks his career to defend Wade Trask."
    },
    {
      "prompt": "What is David Marin accused of after his brain is switched with Wade Trask?",
      "A": "Sedition",
      "B": "Torture",
      "C": "Imagination",
      "D": "Missing pieces",
      "E": "Body sw

../data/wikipedia/t.parquet:  47%|█████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                             | 100/213 [29:12<33:02, 17.54s/it]

Expecting ',' delimiter: line 10 column 153 (char 654)
[
    {
        "prompt": "What is the main plot of the film 'The Last Mimzy'?",
        "A": "A scientist sends high tech devices back in time to modern day Seattle",
        "B": "Two children discover high tech devices that resemble toys",
        "C": "The children gain genius-level intellects and psionic powers",
        "D": "The Mimzy absorbs a tear from Emma, which contains her DNA",
        "E": "Mimzy provides the genetic information required to restore humanity",
        "answer": "B",
        "basis": "The main plot of the film 'The Last Mimzy' is that two children discover high tech devices that resemble toys. The text states, 'The "toys" are initially incomprehensible to them, other than one which appears to be a stuffed rabbit. The children keep their discovery secret from their parents.'"
    },
    {
        "prompt": "Who is the only one who can use the 'spinners' in the film 'The Last Mimzy'?",
        "A": "Noah

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 153 (char 654)
../data/wikipedia/t.parquet:  53%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                 | 113/213 [33:05<26:15, 15.75s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib

Invalid \uXXXX escape: line 14 column 15 (char 720)
[
  {
    "prompt": "What is thrust in high energy physics?",
    "A": "A property used to characterize the collision of high energy particles in a collider",
    "B": "A property used to measure the coherence of the group of particles resulting from one collision",
    "C": "A property used to quantify the momentum of particle i",
    "D": "A property used to define the thrust axis",
    "E": "A property used to analyze high-energy collisions as two-body processes",
    "answer": "A",
    "basis": "The text states that thrust is a property used to characterize the collision of high energy particles in a collider."
  },
  {
    "prompt": "What is the definition of thrust in high energy physics?",
    "A": "T= \underset{|n|=1}{\operatorname{max}} \bigg[\frac{\sum_i|p_i.n|}{\sum_i|p_i|}\bigg]",
    "B": "Thrust is the sum of the momentum of all final particles resulting from the collision",
    "C": "Thrust is the sum of the momentum of

../data/wikipedia/t.parquet:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 182/213 [53:38<09:52, 19.12s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 50 column 132 (char 2054)
../data/wikipedia/t.parquet:  86%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 50 column 132 (char 2054)
[
  {
    "prompt": "What is the title of the novel?",
    "A": "The Dimension Riders",
    "B": "Doctor Who Magazine #206",
    "C": "Alternate Universe cycle",
    "D": "No Future",
    "E": "Seventh Doctor novels",
    "answer": "A",
    "basis": "The evidence for the answer can be found in the first sentence of the text: 'The Dimension Riders is an original novel written by Daniel Blythe and based on the long-running British science fiction television series Doctor Who.'"
  },
  {
    "prompt": "Who are the main characters in the novel?",
    "A": "Daniel Blythe",
    "B": "Ace and Bernice",
    "C": "The Seventh Doctor",
    "D": "Doctor Who Magazine",
    "E": "Virgin New Adventures",
    "answer": "B",
    "basis": "The evidence for the answer can be found in the first sentence of the text: 'It features the Seventh Doctor, Ace and Bernice.'"
  },
  {
    "prompt": "What is the genre of the novel?",
    "A": "British novels"

../data/wikipedia/t.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [1:03:02<00:00, 17.76s/it]
../data/wikipedia/u.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [05:58<00:00, 19.91s/it]
../data/wikipedia/v.parquet:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                    | 24/29 [07:32<01:34, 18.84s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py

This model's maximum context length is 16385 tokens. However, your messages resulted in 44243 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the plot of the novel Valentine Pontifex?",
    "A": "The Metamorphs try to drive people from their native world Majipoor by spreading ecological problems.",
    "B": "Valentine Pontifex is a novel about a group of scientists trying to save their planet from an ecological disaster.",
    "C": "Valentine Pontifex is a novel about a young boy who discovers his magical powers.",
    "D": "The novel follows the journey of a group of explorers searching for a lost civilization.",
    "E": "Valentine Pontifex is a novel about a love triangle set in a futuristic society.",
    "answer": "A",
    "basis": "The evidence for this answer can be found in the plot summary, which states that the Metamorphs try to drive people from their native world Majipoor by spreading ecological problems."
  },
  {
    "prompt": "What did Dave

../data/wikipedia/v.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [08:39<00:00, 17.92s/it]
../data/wikipedia/w.parquet:   0%|                                                                                                                                                                                                                          | 0/44 [00:00<?, ?it/s]

This model's maximum context length is 16385 tokens. However, you requested 19354 tokens (14354 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What was the first nova to have the degree of linear polarization of its light measured?",
    "A": "V446 Herculis",
    "B": "Olaf Hassel",
    "C": "Hercules",
    "D": "Aquila",
    "E": "1960",
    "answer": "V446 Herculis",
    "basis": "The text states that V446 Herculis was the first nova to have the degree of linear polarization of its light measured."
  },
  {
    "prompt": "When was V446 Herculis observed as a 5th magnitude star?",
    "A": "7 March 1960",
    "B": "Early morning hours",
    "C": "3 days past peak brightness",
    "D": "2 magnitudes",
    "E": "Olaf Hassel",
    "answer": "7 March 1960",
    "basis": "The text mentions that V446 Herculis was observed as a 5th magnitude star in the early morning hours of 7 March 1960."
  },
  {
    "prompt": "What i

Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_5080/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

'str' object does not support item assignment
{"prompt": "What is wetting?", "A": "The ability of a liquid to maintain contact with a solid surface", "B": "The process of a liquid spreading across a solid surface", "C": "The interaction between a liquid and a solid surface", "D": "The bonding or adherence of two materials", "E": "The ability of a liquid to minimize contact with a solid surface", "answer": "A", "basis": "According to the text, wetting is defined as the ability of a liquid to maintain contact with a solid surface."}


../data/wikipedia/w.parquet:  43%|██████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                                      | 19/44 [05:16<08:05, 19.42s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/w.parquet:  45%|███████████████████████████████████████████████████████████████████████████████████████████████                                                                                                                  | 20/44 [05:20<05:56, 14.87s/it]

'str' object does not support item assignment
{"prompt": "What is the main focus of world-systems theory?", "A": "The rise and fall of states", "B": "Income inequality", "C": "Social unrest", "D": "Imperialism", "E": "The world-system as the primary unit of social analysis", "answer": "E", "basis": "The text states that world-systems theory emphasizes the world-system (and not nation states) as the primary unit of social analysis."}


../data/wikipedia/w.parquet:  77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                               | 34/44 [09:28<02:53, 17.39s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 306 (char 1042)
../data/wikipedia/w.parquet:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 10 column 306 (char 1042)
[
    {
        "prompt": "What is the Weierstrass transform?",
        "A": "The Weierstrass transform is a smoothed version of a function obtained by averaging the values of the function weighted with a Gaussian centered at x.",
        "B": "The Weierstrass transform is a method to approximate a given integrable function arbitrarily well with analytic functions.",
        "C": "The Weierstrass transform is a low-pass filter that reduces higher frequencies more than lower ones.",
        "D": "The Weierstrass transform is a linear and translation-invariant assignment of a new function to each function.",
        "E": "The Weierstrass transform is a convolution transform defined via the heat equation.",
        "answer": "A",
        "basis": "The Weierstrass transform is defined as a smoothed version of a function obtained by averaging the values of the function weighted with a Gaussian centered at x. This is mentioned in the te

../data/wikipedia/w.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44/44 [12:35<00:00, 17.17s/it]
../data/wikipedia/x.parquet:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                      | 2/3 [00:43<00:21, 21.47s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_5080/1800176891.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Expecting ',' delimiter: line 33 column 53 (char 1075)
[
    {
        "prompt": "What is the genre of the novel Xavras Wyżryn?",
        "A": "Alternate history",
        "B": "Science fiction",
        "C": "Military fiction",
        "D": "Historical fiction",
        "E": "Romantic fiction",
        "answer": "C",
        "basis": "The text states that Xavras Wyżryn is a member of the military science fiction genre."
    },
    {
        "prompt": "When was Xavras Wyżryn first published?",
        "A": "1997",
        "B": "2004",
        "C": "1981",
        "D": "1920",
        "E": "1935",
        "answer": "A",
        "basis": "The text mentions that Xavras Wyżryn was first published in 1997."
    },
    {
        "prompt": "Who is the main character of Xavras Wyżryn?",
        "A": "Ian Smith",
        "B": "Colonel Xavras Wyżryn",
        "C": "Jewriej",
        "D": "Sienkiewicz's Triple",
        "E": "Two Bodyguards",
        "answer": "B",
        "basis": "The text desc


../data/wikipedia/y.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [02:28<00:00, 18.61s/it]
../data/wikipedia/z.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [02:17<00:00, 17.24s/it]


In [22]:
series['original_text']

'title: Nebula Winners Fourteen\nNebula Winners Fourteen is an anthology of award winning science fiction short works edited by Frederik Pohl. It was first published in hardcover by Harper & Row in August 1980. The first British edition was published in hardcover by W. H. Allen in April 1981. Paperback editions followed from Star in the U.K. in March 1982 and Bantam Books in the U.S. in July 1982. \nSummary\n The book collects pieces that won or were nominated for the Nebula Awards for novel, novella, novelette and short story for the year 1979 and a few other pieces related to the awards, together with a piece by 1979 Grand Master award winner L. Sprague de Camp and an introduction by the editor. Not all nominees for the various awards are included. \nContents\n *"Introduction: A Guide to the Perplexed" (Frederik Pohl) *"The Persistence of Vision" [Best Novella winner, 1979] (John Varley) *"Stone" [Best Short Story winner, 1979] (Edward Bryant) *"A Glow of Candles, a Unicorn\'s Eye" [

In [23]:
for i in range(10):
    series = df_texts.sample(1).iloc[0]
    print(f"{series['prompt']} \n text: \n {series['original_text'].split('==')[0]}")
    print("----------------")

What is the maximum sustainable yield (MSY) usually higher than? 
 text: 
 title: Maximum sustainable yield
In population ecology and economics, maximum sustainable yield (MSY) is theoretically, the largest yield (or catch) that can be taken from a species' stock over an indefinite period. Fundamental to the notion of sustainable harvest, the concept of MSY aims to maintain the population size at the point of maximum growth rate by harvesting the individuals that would normally be added to the population, allowing the population to continue to be productive indefinitely. Under the assumption of logistic growth, resource limitation does not constrain individuals' reproductive rates when populations are small, but because there are few individuals, the overall yield is small. At intermediate population densities, also represented by half the carrying capacity, individuals are able to breed to their maximum rate. At this point, called the maximum sustainable yield, there is a surplus of i

In [40]:
df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

In [20]:
pd.DataFrame(texts)["A"].isnull().sum()

69

In [41]:
df_science = get_df(files[0])
df_science[df_science["id"] == '29637793']

Unnamed: 0,id,title,text,categories


In [42]:
df_science

Unnamed: 0,id,title,text,categories
368626,1908395,Artificial brain,title: Artificial brain\nAn artificial brain (...,"[Computational neuroscience, Robotics, Emergin..."
34268,10410698,Abdul Amir al-Jamri,title: Abdul Amir al-Jamri\nSheikh Abdul Amir ...,"[1938 births, 2006 deaths, Deaths from kidney ..."
233077,1958222,Amyloid beta,title: Amyloid beta\nAmyloid beta (Aβ or Abeta...,"[Peptides, Molecular neuroscience, Alzheimer's..."
9870,3621668,A Woman of the Iron People,title: A Woman of the Iron People\nA Woman of ...,"[1991 American novels, 1991 science fiction no..."
139719,38366604,Albert Spaier,title: Albert Spaier\nAlbert Spaier (9 July 18...,"[1883 births, 1934 deaths, Writers from Iași, ..."
...,...,...,...,...
62685,4474244,Actuarial reserves,"title: Actuarial reserves\nIn insurance, an ac...","[Actuarial science, Capital requirement de:Dec..."
357456,4260564,Arrival II,title: Arrival II\nArrival ll (alternatively t...,"[1998 films, 1998 science fiction films, Ameri..."
391514,32894329,Astrobiophysics,title: Astrobiophysics\nAstrobiophysics is a f...,"[Astrophysics, Biophysics .]"
10000,2824171,A World of Difference (novel),title: A World of Difference (novel)\nA World ...,"[1990 American novels, Novels set during the C..."
