In [1]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [3]:
def query_prompt(prompt, max_tokens=4000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[
            {"role": "system", "content": "You are a professor at a science university and creating a exam for your students."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [4]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            for word in [
                "geology",
                "physics",
                "chemistry",
                "mathematical",
                "biology",
                "astronomy",
                "ecology",
                "genetics",
                "statistics",
                "theoretical"
            ]:
                if word.lower() in cat.lower():
                    return True
            
        return False
    
    def text_preprocess(text):
        return text.replace("===", "\n").replace("==", "\n")

    df = pd.read_parquet(fname)
    df_science = df[df["categories"].apply(f)]
    df_science["text"] = "title: " + df_science["title"] + "\n" + df_science["text"].apply(text_preprocess)
    return df_science.sample(len(df_science)//5)

In [5]:
files = glob.glob("../data/wikipedia/*.parquet")

In [6]:
import time

In [7]:
import pickle

In [8]:
import json

In [9]:
from datetime import datetime as dt
import os

In [10]:
texts = []

In [11]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
Using the given text about science, create multiple questions based on scientific principles. Also, extract the evidence for your answer.
The output should be an array of json format, with "prompt" as the question statement, "A," "B," "C," "D," and "E" as choices, "answer" as the answer choice (one of A through E), and "basis" as the rationale.

# text
title: {series['title']}

{series['text']}

# attention
- create 1 question.
"""
    return prompt

def f(series):
    if series["A"] != series["A"]:
        if type(series["choices"]) == dict:
            for key in ["A", "B", "C", "D", "E"]:
                series[key] = series["choices"][key]
        elif type(series["choices"] == list):
            for i, key in enumerate(["A", "B", "C", "D", "E"]):
                series[key] = series["choices"][i]
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

first = True
for file in files:
    if os.path.basename(file) in ["all.parquet"]:
        print(f"pass: {file}")
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            if first:
                print(texts_json)
                first = False
            if type(texts_json) == dict:
                text_json["wiki_id"] = series["id"]
                text_json["original_text"] = series["text"]
                texts.append(text_json)
            else:
                for text_json in texts_json:
                    text_json["wiki_id"] = series["id"]
                    text_json["original_text"] = series["text"]
                    texts.append(text_json)
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
        if i % 20 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

../data/wikipedia/a.parquet:   0%| | 1/504 [00:04<38:44,

[{'prompt': 'Who became the editor-in-chief of the Annals of Mathematical Statistics in 1938?', 'A': 'Samuel Wilks', 'B': 'Fisher', 'C': 'Neyman', 'D': 'Cramér', 'E': 'Hotelling', 'answer': 'A', 'basis': 'According to the given text, Samuel Wilks became the editor-in-chief of the Annals of Mathematical Statistics in 1938.'}]


../data/wikipedia/a.parquet:  32%|▎| 160/504 [14:30<28:0Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, your messages resulted in 17384 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is arrow pushing used to describe in organic chemistry reaction mechanisms?",
    "A": "The movement of atoms",
    "B": "The movement of electron density",
    "C": "The formation of new bonds",
    "D": "The breaking of covalent bonds",
    "E": "The distribution of positive and negative charges",
    "answer": "B",
    "basis": "Arrow pushing is used to show the movement of electron density, which indirectly shows the movement of atoms themselves. The text states, 'The arrows illustrate the movement of electrons as bonds between atoms are broken and formed.'"
  }
]


../data/wikipedia/a.parquet:  38%|▍| 189/504 [16:55<25:2Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 122 (char 386)
../data/wikipedia/a.parquet:  38%|▍| 190/504 [17:01<26:3

Expecting ',' delimiter: line 10 column 122 (char 386)
[
  {
    "prompt": "What is the meaning of the name 'Adeopapposaurus'?",
    "A": "Far eating lizard",
    "B": "Long necked dinosaur",
    "C": "Massospondylus relative",
    "D": "Keratinous beak dinosaur",
    "E": "Early Jurassic dinosaur",
    "answer": "A",
    "basis": "The name 'Adeopapposaurus' means 'far eating lizard', as mentioned in the text: 'Adeopapposaurus (meaning "far eating lizard", in reference to its long neck)'."
  }
]


../data/wikipedia/a.parquet:  38%|▍| 194/504 [17:27<36:0Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, you requested 17894 tokens (13894 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is Alexandre Kirillov known for?",
    "A": "His works in the field of representation theory",
    "B": "His works in the field of topological groups",
    "C": "His works in the field of Lie groups",
    "D": "Introducing the orbit method into representation theory",
    "E": "All of the above",
    "answer": "E",
    "basis": "The text states that Alexandre Kirillov is known for his works in the fields of representation theory, topological groups, and Lie groups. It also mentions that he introduced the orbit method into representation theory."
  }
]


../data/wikipedia/a.parquet:  41%|▍| 205/504 [18:19<26:0Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, you requested 16998 tokens (12998 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the scope of the journal Astrobiology?",
    "A": "Research on the origin and evolution of life on Earth",
    "B": "Research on the distribution and future of life across the universe",
    "C": "Research on the impact of climate change on ecosystems",
    "D": "Research on the history of space exploration",
    "E": "Research on the effects of gravity on living organisms",
    "answer": "B",
    "basis": "The text states that the scope of the journal Astrobiology includes research on the origin, evolution, distribution, and future of life across the universe."
  }
]


../data/wikipedia/a.parquet:  41%|▍| 208/504 [18:29<20:2Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 6 column 29 (char 298)
../data/wikipedia/a.parquet:  41%|▍| 209/504 [18:36<23:5

Expecting ',' delimiter: line 6 column 29 (char 298)
[
  {
    "prompt": "What is the relationship between antarafacial and suprafacial processes in organic chemistry?",
    "A": "They involve the same face of the π system or isolated orbital.",
    "B": "They involve opposite faces of the π system or isolated orbital.",
    "C": "They involve two "interior" lobes of a σ bond.",
    "D": "They involve two "exterior" lobes of a σ bond.",
    "E": "They involve one "interior" lobe and one "exterior" lobe of a σ bond.",
    "answer": "B",
    "basis": "The text states that the relationship is antarafacial when opposite faces of the π system or isolated orbital are involved in the process, and suprafacial when the same face of the π system or isolated orbital are involved."
  }
]


../data/wikipedia/a.parquet:  43%|▍| 215/504 [19:05<23:1Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, you requested 18194 tokens (14194 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What was Andrew Browder's profession?",
    "A": "Chemist",
    "B": "Biologist",
    "C": "Mathematician",
    "D": "Physicist",
    "E": "Astronomer",
    "answer": "C",
    "basis": "Andrew Browder was an American mathematician at Brown University."
  }
]


../data/wikipedia/a.parquet:  56%|▌| 280/504 [25:24<22:5Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, your messages resulted in 17503 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is autophosphorylation?",
    "A": "The phosphorylation of a kinase by another kinase",
    "B": "The phosphorylation of a kinase by itself",
    "C": "The phosphorylation of a kinase by a phosphatase",
    "D": "The phosphorylation of a kinase by a ligand",
    "E": "The phosphorylation of a kinase by a substrate",
    "answer": "B",
    "basis": "Autophosphorylation is generally defined as the phosphorylation of the kinase by itself."
  }
]


../data/wikipedia/a.parquet:  74%|▋| 374/504 [35:21<13:5Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, your messages resulted in 18468 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the role of urease in the breakdown of urea?",
    "A": "Urease catalyzes the hydrolysis of urea to ammonia and carbon dioxide.",
    "B": "Urease prevents the hydrolysis of urea and keeps it in its original form.",
    "C": "Urease converts urea into a stable form that can be easily absorbed by plants.",
    "D": "Urease reacts with water to form ammonium and hydroxide ions.",
    "E": "Urease increases the pH of the soil and promotes ammonia volatilization.",
    "answer": "A",
    "basis": "The text states that urease is a naturally occurring enzyme that catalyzes the hydrolysis of urea to unstable carbamic acid, which rapidly decomposes to form ammonia and carbon dioxide."
  }
]


../data/wikipedia/a.parquet:  78%|▊| 391/504 [36:59<10:0Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, you requested 16693 tokens (12693 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main focus of Arturo Zychlinsky's research?",
    "A": "Bacterial pathogens",
    "B": "Neutrophil Extracellular Traps (NETs)",
    "C": "Chromatin",
    "D": "Toll Like Receptors",
    "E": "Autoimmunity",
    "answer": "B",
    "basis": "The text states that Arturo Zychlinsky's research focuses on Neutrophil Extracellular Traps (NETs), which he discovered together with Volker Brinkmann."
  }
]


../data/wikipedia/a.parquet:  90%|▉| 452/504 [46:45<05:0Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", l

This model's maximum context length is 16385 tokens. However, your messages resulted in 21203 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Angela Dean's area of specialization?",
    "A": "Design of experiments",
    "B": "Mathematics",
    "C": "Physical and Engineering Sciences",
    "D": "Statistics",
    "E": "Data analysis",
    "answer": "A",
    "basis": "The text states that Angela Dean specializes in the design of experiments."
  }
]


../data/wikipedia/a.parquet: 100%|█| 504/504 [52:11<00:0


pass: ../data/wikipedia/all.parquet


../data/wikipedia/b.parquet:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                | 310/367 [34:51<05:31,  5.82s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18426 tokens (14426 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the size of Bromsgrove Road Cutting?",
    "A": "0.2 hectare",
    "B": "0.5 acre",
    "C": "1 hectare",
    "D": "1 acre",
    "E": "2 hectares",
    "answer": "B",
    "basis": "The text states that Bromsgrove Road Cutting is a 0.2 hectare (0.5 acre) geological site of Special Scientific Interest."
  }
]


../data/wikipedia/b.parquet:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                         | 322/367 [35:56<04:20,  5.79s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 758 (char 757)
../data/wikipedia/b.parquet:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 1 column 758 (char 757)
{"prompt": "What is Bayesian inference?", "A": "A method of statistical inference that uses Bayes' theorem to update the probability for a hypothesis as more evidence becomes available.", "B": "A technique in statistics that computes the posterior probability according to Bayes' theorem.", "C": "A method of updating beliefs based on new evidence.", "D": "A mathematical model used to predict future outcomes based on past data.", "E": "None of the above", "answer": "B", "basis": "Bayesian inference is a technique in statistics that uses Bayes' theorem to update the probability for a hypothesis as more evidence becomes available. This is stated in the given text: 'Bayesian inference computes the posterior probability according to Bayes' theorem: P(H \mid E) = \frac{P(E \mid H) \cdot P(H)}{P(E)}'."}


../data/wikipedia/b.parquet:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                 | 336/367 [37:15<03:00,  5.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19957 tokens (15957 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main purpose of the Bayesian structural time series (BSTS) model?",
    "A": "Feature selection",
    "B": "Time series forecasting",
    "C": "Nowcasting",
    "D": "Inferring causal impact",
    "E": "All of the above",
    "answer": "E",
    "basis": "The text states that the BSTS model is used for feature selection, time series forecasting, nowcasting, inferring causal impact, and other applications."
  }
]


../data/wikipedia/b.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 367/367 [40:15<00:00,  6.58s/it]
../data/wikipedia/c.parquet:   1%|███                                                                                                                                                                                                              | 9/617 [00:48<50:00,  4.93s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
  

Invalid \escape: line 10 column 73 (char 407)
[
  {
    "prompt": "What is the Cauchy–Schwarz inequality?",
    "A": "An inequality for sums of vectors",
    "B": "An inequality for integrals of vectors",
    "C": "An inequality for dot products of vectors",
    "D": "An inequality for cross products of vectors",
    "E": "An inequality for norms of vectors",
    "answer": "C",
    "basis": "The Cauchy–Schwarz inequality states that for all vectors \mathbf{u} and \mathbf{v} of an inner product space, the dot product of the two vectors squared is less than or equal to the product of the norms of the vectors squared. In other words, \left(\mathbf{u} \cdot \mathbf{v}\right)^2 \leq \|\mathbf{u}\|^2 \|\mathbf{v}\|^2."
  }
]


../data/wikipedia/c.parquet:  32%|█████████████████████████████████████████████████████████████████▊                                                                                                                                             | 196/617 [19:18<39:59,  5.70s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 368 (char 807)
../data/wikipedia/c.parquet:  32%|██████████████████████████████████████████████████████████████████                                                               

Invalid \escape: line 10 column 368 (char 807)
[
  {
    "prompt": "What is a complete set of invariants in mathematics?",
    "A": "A collection of maps that classify objects based on their equivalence",
    "B": "A collection of equations that define the coinvariants",
    "C": "A set of functions that determine the image of the maps",
    "D": "A set of matrices with their corresponding eigenvalues",
    "E": "A set of defining equations for the coinvariants",
    "answer": "A",
    "basis": "A complete set of invariants for a classification problem is a collection of maps that classify objects based on their equivalence. The text states, 'In mathematics, a complete set of invariants for a classification problem is a collection of maps :f_i : X \to Y_i (where X is the collection of objects being classified, up to some equivalence relation \sim, and the Y_i are some sets), such that x \sim x' if and only if f_i(x) = f_i(x') for all i.'"
  }
]


../data/wikipedia/c.parquet:  33%|████████████████████████████████████████████████████████████████████                                                                                                                                           | 203/617 [20:07<43:06,  6.25s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 239 (char 584)
../data/wikipedia/c.parquet:  33%|████████████████████████████████████████████████████████████████████▍                                                    

Expecting ',' delimiter: line 10 column 239 (char 584)
[
  {
    "prompt": "What does the phrase 'correlation does not imply causation' mean?",
    "A": "Correlation always implies causation",
    "B": "Correlation sometimes implies causation",
    "C": "Correlation never implies causation",
    "D": "Correlation implies causation in certain cases",
    "E": "None of the above",
    "answer": "C",
    "basis": "The phrase 'correlation does not imply causation' means that a correlation between two events or variables does not necessarily indicate a cause-and-effect relationship between them. This is stated in the text: 'The phrase "correlation does not imply causation" refers to the inability to legitimately deduce a cause-and-effect relationship between two events or variables solely on the basis of an observed association or correlation between them.'"
  }
]


../data/wikipedia/c.parquet:  41%|███████████████████████████████████████████████████████████████████████████████████▊                                                                                                                           | 250/617 [24:55<39:05,  6.39s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 21531 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the main reason for designating Chalbury Hill and Quarry as a Site of Special Scientific Interest?",
    "A": "The presence of grassland",
    "B": "The disused limestone quarry",
    "C": "The area covered by Chalbury Hill Fort",
    "D": "The geological features",
    "E": "The biological features",
    "answer": "D",
    "basis": "The text mentions that Chalbury Hill and Quarry is a biological and geological Site of Special Scientific Interest. It consists of grassland and a disused limestone quarry, but the main reason for its designation is the geological features."
  }
]


../data/wikipedia/c.parquet:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                           | 296/617 [29:13<30:49,  5.76s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 154 (char 441)
../data/wikipedia/c.parquet:  48%|███████████████████████████████████████████████████████████████████████████████████████████████████▋                     

Expecting ',' delimiter: line 10 column 154 (char 441)
[
  {
    "prompt": "What is the main characteristic of Chernozem soil?",
    "A": "High moisture storage capacity",
    "B": "High percentage of phosphorus",
    "C": "High percentage of ammonia compounds",
    "D": "Black color",
    "E": "High percentage of humus",
    "answer": "E",
    "basis": "The main characteristic of Chernozem soil is that it contains a high percentage of humus (4% to 16%). The text states, 'Chernozem (from ; "black ground"), also called black soil, is a black-colored soil containing a high percentage of humus (4% to 16%) and high percentages of phosphorus and ammonia compounds.'"
  }
]


../data/wikipedia/c.parquet:  61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                | 376/617 [36:41<25:41,  6.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 8 column 46 (char 361)
../data/wikipedia/c.parquet:  61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍    

Invalid \escape: line 8 column 46 (char 361)
[
  {
    "prompt": "What is the purpose of the COMET experiment?",
    "A": "To study muon decay to an electron and neutrino",
    "B": "To look for neutrinoless muon to electron conversion",
    "C": "To investigate neutrino oscillations",
    "D": "To measure the branching ratio of muon to electron conversion",
    "E": "To study the supersymmetric \tilde{\chi_{0} }",
    "answer": "B",
    "basis": "The purpose of the COMET experiment is to look for neutrinoless muon to electron conversion, where the electron flies away with an energy of 104.8 MeV. This is mentioned in the given text: 'COMET seeks to look for neutrinoless muon to electron conversion, where the electron flies away with an energy of 104.8 MeV.'"
  }
]


../data/wikipedia/c.parquet:  63%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                             | 387/617 [37:51<21:52,  5.71s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19367 tokens (15367 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main factor that affects the behavior of chevron folds?",
    "A": "Thickness of low competence layers",
    "B": "Thickness of high competence layers",
    "C": "Regular thickness in high competence layers",
    "D": "Irregular thickness in low competence layers",
    "E": "Length of the bed",
    "answer": "C",
    "basis": "According to the text, the stability of chevron folding strictly requires regular thickness in the high-competence layers. Therefore, the main factor that affects the behavior of chevron folds is the regular thickness in high competence layers."
  }
]


../data/wikipedia/c.parquet:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                             | 483/617 [46:56<12:42,  5.69s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 20902 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are the two main classes of charged particle beams?",
    "A": "Unbunched beams and bunched beams",
    "B": "Electron beams and proton beams",
    "C": "Ion beams and electron beams",
    "D": "Proton beams and ion beams",
    "E": "Coasting beams and DC beams",
    "answer": "A",
    "basis": "According to the text, charged particle beams can be split into two main classes: unbunched beams (coasting beams or DC beams) and bunched beams."
  }
]


../data/wikipedia/c.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 617/617 [1:00:27<00:00,  5.88s/it]
../data/wikipedia/d.parquet:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                   | 168/322 [15:36<11:47,  4.59s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, you requested 17676 tokens (13676 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the capital of County Down, Northern Ireland?",
    "A": "Downpatrick",
    "B": "Belfast",
    "C": "Newry",
    "D": "Mourne",
    "E": "Dundrum",
    "answer": "A",
    "basis": "According to the text, Downpatrick is the county town of Down and the joint headquarters of Newry, Mourne and Down District Council."
  }
]


../data/wikipedia/d.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 322/322 [29:41<00:00,  5.53s/it]
../data/wikipedia/e.parquet:  26%|██████████████████████████████████████████████████████▌                                                                                                                                                         | 95/362 [08:30<26:13,  5.90s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, you requested 18176 tokens (14176 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the primary role of dominant Xylocopa virginica females in the nest?",
    "A": "Reproduction",
    "B": "Foraging",
    "C": "Nest construction",
    "D": "All of the above",
    "E": "None of the above",
    "answer": "D",
    "basis": "According to the text, dominant X. virginica females are responsible for a full gamut of activities including reproduction, foraging, and nest construction."
  }
]


../data/wikipedia/e.parquet:  36%|██████████████████████████████████████████████████████████████████████████▉                                                                                                                                    | 131/362 [11:26<17:22,  4.51s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 283 (char 814)
../data/wikipedia/e.parquet:  36%|███████████████████████████████████████████████████████████████████████████▍                                                     

Invalid \escape: line 10 column 283 (char 814)
[
  {
    "prompt": "What is the definition of the empirical distribution function?",
    "A": "A process that describes the proportion of objects in a system in a given state.",
    "B": "A process which counts the number of objects in a given state.",
    "C": "A sequence of random variables that converge to the cumulative distribution function.",
    "D": "A map on measurable functions that converges to a normal random variable.",
    "E": "A process that converges weakly to a certain Gaussian process.",
    "answer": "C",
    "basis": "The definition of the empirical distribution function is given as: For X1, X2, ... Xn independent and identically-distributed random variables in R with common cumulative distribution function F(x), the empirical distribution function is defined by :F_n(x)=\frac{1}{n}\sum_{i=1}^n I_{(-\infty,x]}(X_i), where IC is the indicator function of the set C. For every (fixed) x, Fn(x) is a sequence of random vari

../data/wikipedia/e.parquet:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                    | 327/362 [29:27<03:49,  6.57s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 17410 tokens (13410 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the difference between acute ecotoxicity and chronic ecotoxicity?",
    "A": "Acute ecotoxicity refers to the detrimental effects resulting from a hazardous exposure for no more than 15 days, while chronic ecotoxicity refers to the detrimental effects resulting from a hazardous exposure of 15 days to possibly years.",
    "B": "Acute ecotoxicity is lethal, while chronic ecotoxicity is not lethal but decreases cellular biochemical functions.",
    "C": "Acute ecotoxicity is caused by natural pollutants, while chronic ecotoxicity is caused by synthetic pollutants.",
    "D": "Acute ecotoxicity affects animals, vegetation, and microbes, while chronic ecotoxicity affects only animals.",
    "E": "Acute ecotoxicity is associated with particular drug-receptor actions, whil

../data/wikipedia/e.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 362/362 [32:54<00:00,  5.45s/it]
../data/wikipedia/f.parquet:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                  | 123/235 [11:15<10:27,  5.61s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
  

Invalid \escape: line 10 column 170 (char 587)
[
    {
        "prompt": "What is the Fekete problem?",
        "A": "Finding the points on a 2-sphere with minimal s-energy",
        "B": "Finding the points on a 2-sphere with maximal s-energy",
        "C": "Finding the points on a 2-sphere with minimal logarithmic energy",
        "D": "Finding the points on a 2-sphere with maximal logarithmic energy",
        "E": "None of the above",
        "answer": "A",
        "basis": "The Fekete problem is defined as finding the points x1,...,xN on the 2-sphere for which the s-energy is minimal. The s-energy is defined as the sum of \|x_i - x_j \|^{-s} for s > 0 and the sum of \log \|x_i - x_j \|^{-1} for s = 0."
    }
]


../data/wikipedia/f.parquet:  69%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                               | 163/235 [15:06<06:07,  5.10s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 31012 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are fault blocks?",
    "A": "Large areas of bedrock broken up into blocks by faults",
    "B": "Crustal blocks broken off from tectonic plates",
    "C": "Terranes that have a specific geologic definition",
    "D": "Blocks characterized by relatively uniform lithology",
    "E": "Blocks created by tectonic and localized stresses in Earth's crust",
    "answer": "E",
    "basis": "The text states that fault blocks are very large blocks of rock created by tectonic and localized stresses in Earth's crust."
  }
]


../data/wikipedia/f.parquet:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                            | 167/235 [15:25<05:44,  5.06s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 7 column 24 (char 233)
../data/wikipedia/f.parquet:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 7 column 24 (char 233)
[
  {
    "prompt": "What is the freshman's dream?",
    "A": "The erroneous equation (x+y)^n=x^n+y^n",
    "B": "The correct equation (x+y)^n=x^2+2xy+y^2",
    "C": "The theorem that says (x + y)p = xp + yp",
    "D": "The equation \sqrt{x^2+y^2} = \sqrt{x^2}+\sqrt{y^2}",
    "E": "The equation (x + y)2 = x2 + 2xy + y2",
    "answer": "A",
    "basis": "The text states that the freshman's dream is a name sometimes given to the erroneous equation (x+y)^n=x^n+y^n, where n is a real number (usually a positive integer greater than 1) and x,y are non-zero real numbers."
  }
]


../data/wikipedia/f.parquet:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                      | 173/235 [15:57<05:39,  5.48s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 17467 tokens (13467 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is frequency separation within astrophysics?",
    "A": "The spacing in frequency between adjacent modes of oscillation",
    "B": "The time required for a sound wave to travel to the center of the Sun and return",
    "C": "The difference in frequency between modes of different angular degree",
    "D": "The difference in frequency between modes of different azimuthal order",
    "E": "The difference in frequency between modes of different radial order",
    "answer": "A",
    "basis": "The text states that frequency separation refers to the spacing in frequency between adjacent modes of oscillation, having the same angular degree (l) but different radial order (n)."
  }
]


../data/wikipedia/f.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 235/235 [21:16<00:00,  5.43s/it]
../data/wikipedia/g.parquet:   6%|███████████▊                                                                                                                                                                                                    | 20/351 [01:46<26:21,  4.78s/it]

This model's maximum context length is 16385 tokens. However, your messages resulted in 35699 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "When was Geologiska föreningen founded?",
    "A": "1871",
    "B": "1872",
    "C": "1873",
    "D": "1874",
    "E": "1875",
    "answer": "A",
    "basis": "Geologiska föreningen was founded in Sweden in 1871."
  }
]


Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.hand

Expecting ',' delimiter: line 10 column 229 (char 597)
[
  {
    "prompt": "What was the main finding of Griffith's experiment?",
    "A": "Bacteria can transfer genetic information",
    "B": "Bacteria can synthesize a polysaccharide capsule",
    "C": "Bacteria can be killed by heat",
    "D": "Bacteria can be classified into different types",
    "E": "Bacteria can defeat the host's immune system",
    "answer": "A",
    "basis": "Griffith's experiment suggested that bacteria are capable of transferring genetic information through a process known as transformation. This is stated in the text: 'Griffith concluded that the type II-R had been "transformed" into the lethal III-S strain by a "transforming principle" that was somehow part of the dead III-S strain bacteria.'"
  }
]


../data/wikipedia/g.parquet:  22%|██████████████████████████████████████████████▏                                                                                                                                                                 | 78/351 [06:26<21:57,  4.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 22915 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is a glow discharge?",
    "A": "A plasma formed by the passage of electric current through a gas",
    "B": "A radiation source such as ultraviolet light or Cosmic rays",
    "C": "The process of ionizing gas molecules",
    "D": "The process of exciting atoms in a gas",
    "E": "The process of sputtering particles from a cathode",
    "answer": "A",
    "basis": "According to the text, a glow discharge is a plasma formed by the passage of electric current through a gas."
  }
]


../data/wikipedia/g.parquet:  51%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                     | 179/351 [15:15<12:55,  4.51s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 61 (char 137)
../data/wikipedia/g.parquet:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▏                        

Invalid \escape: line 4 column 61 (char 137)
[
  {
    "prompt": "What is the definition of a general Dirichlet series?",
    "A": "An infinite series that takes the form of a_n e^{-\lambda_n s}",
    "B": "An infinite series that takes the form of a_n / n^s",
    "C": "An infinite series that takes the form of a_n (e^{-s})^n",
    "D": "An infinite series that takes the form of a_n e^{-\lambda_n s} where \lambda_n is a strictly increasing sequence of nonnegative real numbers",
    "E": "An infinite series that takes the form of a_n e^{-\lambda_n s} where \lambda_n is a strictly decreasing sequence of nonnegative real numbers",
    "answer": "D",
    "basis": "The definition of a general Dirichlet series is given in the text as: 'In the field of mathematical analysis, a general Dirichlet series is an infinite series that takes the form of : \sum_{n=1}^\infty a_n e^{-\lambda_n s}, where a_n, s are complex numbers and \\{\lambda_n\\} is a strictly increasing sequence of nonnegative real 

../data/wikipedia/g.parquet:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                         | 308/351 [26:36<04:09,  5.81s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19518 tokens (15518 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What was the ancient Roman treatment for uterine prolapse?",
    "A": "Surgical removal of the uterus and bladder",
    "B": "Suspending the patient upside down from a ladder",
    "C": "Wrapping magic stones in the skin of sacrificed animals",
    "D": "Administering herbs and drugs",
    "E": "Applying ligatures to the groin and armpits",
    "answer": "B",
    "basis": "According to the text, ancient Roman gynecologists treated uterine prolapse by suspending the patient upside down from a ladder."
  }
]


../data/wikipedia/g.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 351/351 [30:19<00:00,  5.18s/it]
../data/wikipedia/h.parquet:   5%|█████████▌                                                                                                                                                                                                      | 14/306 [01:19<29:17,  6.02s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 17119 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is a halo nucleus in nuclear physics?",
    "A": "A nucleus with a core surrounded by a halo of orbiting protons or neutrons",
    "B": "A nucleus with a core surrounded by a halo of electrons",
    "C": "A nucleus with a core surrounded by a halo of photons",
    "D": "A nucleus with a core surrounded by a halo of positrons",
    "E": "A nucleus with a core surrounded by a halo of neutrinos",
    "answer": "A",
    "basis": "According to the given text, a halo nucleus in nuclear physics is an atomic nucleus that has a core nucleus surrounded by a 'halo' of orbiting protons or neutrons."
  }
]


../data/wikipedia/h.parquet:  16%|████████████████████████████████▋                                                                                                                                                                               | 48/306 [04:22<20:37,  4.79s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 16586 tokens (12586 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Who posed the question about the stability of group homomorphisms?",
    "A": "Stanisław Ulam",
    "B": "Donald H. Hyers",
    "C": "Themistocles M. Rassias",
    "D": "S.-M. Jung",
    "E": "T. Aoki",
    "answer": "A",
    "basis": "The stability problem of functional equations originated from a question of Stanisław Ulam, posed in 1940."
  }
]


../data/wikipedia/h.parquet:  33%|████████████████████████████████████████████████████████████████████▎                                                                                                                                          | 101/306 [08:58<16:08,  4.72s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ':' delimiter: line 5 column 10 (char 94)
../data/wikipedia/h.parquet:  33%|█████████████████████████████████████████████████████████████████████                                                       

Expecting ':' delimiter: line 5 column 10 (char 94)
{
  "prompt": "When did X-ray solar studies begin?",
  "A": "1920s",
  "B": "1940s",
  "1950s",
  "1960s",
  "1970s",
  "answer": "B",
  "basis": "According to the text, X-ray solar studies began in 1949 when Herbert Friedman started studying X-ray solar emissions."
}


../data/wikipedia/h.parquet:  37%|█████████████████████████████████████████████████████████████████████████████                                                                                                                                  | 114/306 [10:13<17:42,  5.53s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 56 (char 113)
../data/wikipedia/h.parquet:  38%|█████████████████████████████████████████████████████████████████████████████▊                                                     

Invalid \escape: line 4 column 56 (char 113)
[
  {
    "prompt": "What is a homothetic vector field?",
    "A": "A vector field that satisfies the condition: \(\mathcal{L}_X g_{ab}=2c g_{ab}\)",
    "B": "A vector field that satisfies the condition: \(\mathcal{L}_X g_{ab}=c g_{ab}\)",
    "C": "A vector field that satisfies the condition: \(\mathcal{L}_X g_{ab}=0\)",
    "D": "A vector field that satisfies the condition: \(\mathcal{L}_X g_{ab}=g_{ab}\)",
    "E": "A vector field that satisfies the condition: \(\mathcal{L}_X g_{ab}=3c g_{ab}\)",
    "answer": "A",
    "basis": "The given text states that a homothetic vector field satisfies the condition \(\mathcal{L}_X g_{ab}=2c g_{ab}\), where c is a real constant."
  }
]


../data/wikipedia/h.parquet:  60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                   | 183/306 [16:16<11:15,  5.49s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 21178 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are the two main formations encompassed by the Hamilton Group?",
    "A": "Mahantango Formation and Marcellus Shale",
    "B": "Millboro Shale and Millboro Formation",
    "C": "Tully Limestone and Stafford Limestone",
    "D": "Geneseo Shale and Moscow Formation",
    "E": "Tioga Bentonites and Needmore Shale",
    "answer": "A",
    "basis": "The text states that the Hamilton Group is mainly composed of marine shale with some sandstone and consists of two main formations: the Mahantango Formation and the Marcellus Shale."
  }
]


../data/wikipedia/h.parquet:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊          | 291/306 [26:14<01:41,  6.77s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 16868 tokens (12868 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the Hill limit in solid-state physics?",
    "A": "The critical distance between actinide or rare-earth atoms",
    "B": "The radius of the f-orbital",
    "C": "The distance at which the overlap of f-orbitals becomes negligible",
    "D": "The distance at which f electrons become localized on ion sites",
    "E": "The distance at which f electrons can move through the lattice",
    "answer": "C",
    "basis": "The Hill limit in solid-state physics is defined as the distance at which the overlap of f-orbitals becomes negligible. This means that if two atoms of the lattice are separated by a distance greater than the Hill limit, the overlap of their f-orbitals becomes insignificant."
  }
]


../data/wikipedia/h.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 306/306 [27:30<00:00,  5.39s/it]
../data/wikipedia/i.parquet:  19%|███████████████████████████████████████▉                                                                                                                                                                        | 54/281 [05:00<19:04,  5.04s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, you requested 19411 tokens (15411 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main objective of the International Astrostatistics Association (IAA)?",
    "A": "To promote innovation in all aspects of academic scientific research",
    "B": "To foster collaboration between statisticians and astrophysicists",
    "C": "To create an interdisciplinary community around data-driven problems in Astronomy",
    "D": "To manage the IAA Council",
    "E": "To organize the biannual ISI World Statistics Congress",
    "answer": "B",
    "basis": "The text states that the foremost objective of the IAA is to foster collaboration between statisticians and astrophysicists."
  }
]


../data/wikipedia/i.parquet:  40%|███████████████████████████████████████████████████████████████████████████████████▏                                                                                                                           | 113/281 [10:08<16:22,  5.85s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 21097 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "When was the Italian Mathematical Union founded?",
    "A": "December 7, 1922",
    "B": "March 31, 1922",
    "C": "December 7, 1920",
    "D": "December 7, 1928",
    "E": "March 31, 1928",
    "answer": "A",
    "basis": "According to the text, the Italian Mathematical Union was founded on December 7, 1922, as mentioned in the sentence: 'It was founded on December 7, 1922, by Luigi Bianchi, Vito Volterra, and most notably, Salvatore Pincherle, who became the Union's first President.'"
  }
]


../data/wikipedia/i.parquet:  41%|█████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                         | 116/281 [10:19<13:15,  4.82s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18291 tokens (14291 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Who are the pioneers of the International Workshop on Nitride Semiconductors?",
    "A": "Isamu Akasaki and Hiroshi Amano",
    "B": "Hiroshi Fujioka and Alan Doolittle",
    "C": "Tomas Palacios and Hiroshi Amano",
    "D": "Isamu Akimoto and Alan Doolittle",
    "E": "Hiroshi Amano and Tomas Palacios",
    "answer": "A",
    "basis": "The pioneers of the International Workshop on Nitride Semiconductors are Isamu Akasaki and Hiroshi Amano, who are Nobel laureates in physics (2014)."
  }
]


../data/wikipedia/i.parquet:  49%|████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                          | 137/281 [11:58<12:58,  5.41s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19169 tokens (15169 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the mission of Island Conservation?",
    "A": "To prevent extinctions by removing invasive species from islands",
    "B": "To promote sustainable development goals",
    "C": "To conduct field research on marine systems",
    "D": "To protect endangered species on the IUCN's Red List",
    "E": "To develop plans for island restoration projects",
    "answer": "A",
    "basis": "The mission of Island Conservation is to prevent extinctions by removing invasive species from islands. This is stated in the text: 'Island Conservation is a non-profit organization with the mission to prevent extinctions by removing invasive species from islands.'"
  }
]


../data/wikipedia/i.parquet:  61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                | 172/281 [15:05<09:28,  5.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 16487 tokens (12487 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the state tree of Arizona?",
    "A": "Blue palo verde",
    "B": "Saguaro blossom",
    "C": "Arizona cypress",
    "D": "Desert spoon",
    "E": "Ocotillo",
    "answer": "A",
    "basis": "The state tree of Arizona is the blue palo verde. This is mentioned in the text under the section 'Symbols of the State of Arizona'."
  }
]


../data/wikipedia/i.parquet:  85%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                               | 239/281 [20:49<03:57,  5.65s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 29269 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the role of hydrocarbons in the recognition of individuals in social insects?",
    "A": "Hydrocarbons help social insects build nests.",
    "B": "Hydrocarbons allow social insects to communicate with other colonies.",
    "C": "Hydrocarbons enable social insects to recognize individuals from their own colony.",
    "D": "Hydrocarbons help social insects in reproduction.",
    "E": "Hydrocarbons have no role in the recognition of individuals in social insects.",
    "answer": "C",
    "basis": "According to the text, hydrocarbons are a blend of specific signatures that emanate from the individual and are used for recognition within the same colony. The mixture of hydrocarbons, known as the insect's label, is compared with the individual's internal colony odor or template to determine recognition."
  }
]


../data/wikipedia/i.parquet:  87%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                          | 245/281 [21:14<02:46,  4.64s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 16474 tokens (12474 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the field of isotope electrochemistry concerned with?",
    "A": "Electrochemical separation of isotopes",
    "B": "Estimation of isotopic exchange equilibrium constants",
    "C": "Kinetic isotope effect",
    "D": "Isotope sensors",
    "E": "All of the above",
    "answer": "E",
    "basis": "The text states that isotope electrochemistry is concerned with various topics like electrochemical separation of isotopes, electrochemical estimation of isotopic exchange equilibrium constants, electrochemical kinetic isotope effect, and electrochemical isotope sensors."
  }
]


../data/wikipedia/i.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 281/281 [24:14<00:00,  5.18s/it]
../data/wikipedia/j.parquet:   8%|████████████████▎                                                                                                                                                                                               | 19/242 [01:24<16:39,  4.48s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 17264 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are Jerome Goldstein's main interests?",
    "A": "Partial differential equations",
    "B": "Operator theory",
    "C": "Stochastic analysis",
    "D": "Fluid dynamics",
    "E": "All of the above",
    "answer": "E",
    "basis": "According to the given text, Jerome Goldstein's main interests are partial differential equations, operator theory, stochastic analysis, fluid dynamics, quantum theory, and mathematical finance."
  }
]


../data/wikipedia/j.parquet:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                  | 221/242 [17:01<01:48,  5.18s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 33026 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Jun-Muk Hwang's area of specialization?",
    "A": "Algebraic geometry",
    "B": "Complex differential geometry",
    "C": "Physics",
    "D": "Mathematical sciences",
    "E": "None of the above",
    "answer": "B",
    "basis": "The text states that Jun-Muk Hwang specializes in algebraic geometry and complex differential geometry."
  }
]


../data/wikipedia/j.parquet:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 236/242 [18:05<00:28,  4.67s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 17944 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are the research areas of Joachim von zur Gathen?",
    "A": "Computational complexity",
    "B": "Cryptography",
    "C": "Finite fields",
    "D": "Computer algebra",
    "E": "All of the above",
    "answer": "E",
    "basis": "The text states that Joachim von zur Gathen's research spans several areas in mathematics and computer science, including computational complexity, cryptography, finite fields, and computer algebra."
  }
]


../data/wikipedia/j.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 242/242 [18:30<00:00,  4.59s/it]
../data/wikipedia/k.parquet:  12%|███████████████████████▉                                                                                                                                                                                        | 16/139 [01:17<09:37,  4.70s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
  

Expecting ',' delimiter: line 7 column 36 (char 307)
[
  {
    "prompt": "What is the main idea behind Kaluza-Klein theory?",
    "A": "The existence of a fifth dimension beyond the four dimensions of space and time",
    "B": "The unification of gravitation and electromagnetism",
    "C": "The precursor to string theory",
    "D": "The introduction of the "cylinder condition" hypothesis",
    "E": "The quantum interpretation of the fifth dimension",
    "answer": "A",
    "basis": "The main idea behind Kaluza-Klein theory is the existence of a fifth dimension beyond the four dimensions of space and time. This is mentioned in the text: 'In physics, Kaluza–Klein theory (KK theory) is a classical unified field theory of gravitation and electromagnetism built around the idea of a fifth dimension beyond the common 4D of space and time and considered an important precursor to string theory.'"
  }
]


../data/wikipedia/k.parquet:  21%|███████████████████████████████████████████▍                                                                                                                                                                    | 29/139 [02:31<09:36,  5.24s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 98 (char 560)
../data/wikipedia/k.parquet:  22%|████████████████████████████████████████████▉                                                                                     

Invalid \escape: line 10 column 98 (char 560)
[
  {
    "prompt": "What is a kernel smoother?",
    "A": "A statistical technique to estimate a real valued function as the weighted average of neighboring observed data",
    "B": "A technique to fit locally linear functions",
    "C": "A method to estimate the value of Y(X0) by averaging the values of m nearest neighbors",
    "D": "A technique to fit polynomial functions",
    "E": "A technique to estimate a continuous function of X",
    "answer": "A",
    "basis": "A kernel smoother is a statistical technique to estimate a real valued function f: \mathbb{R}^p \to \mathbb{R} as the weighted average of neighboring observed data. The weight is defined by the kernel, such that closer points are given higher weights. The estimated function is smooth, and the level of smoothness is set by a single parameter."
  }
]


../data/wikipedia/k.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 139/139 [11:20<00:00,  4.90s/it]
../data/wikipedia/l.parquet:   7%|█████████████▌                                                                                                                                                                                                  | 30/458 [02:26<37:11,  5.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 25853 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Which prize recognizes outstanding achievements in the field of atomic or surface physics?",
    "A": "David Adler Lectureship Award in the Field of Materials Physics",
    "B": "Will Allis Prize for the Study of Ionized Gases",
    "C": "Leroy Apker Award",
    "D": "Davisson–Germer Prize in Atomic or Surface Physics",
    "E": "Hans A. Bethe Prize",
    "answer": "D",
    "basis": "The Davisson–Germer Prize in Atomic or Surface Physics is an annual prize for 'outstanding work in atomic physics or surface physics'. The prize is named after Clinton Davisson and Lester Germer, who first measured electron diffraction."
  }
]


../data/wikipedia/l.parquet:  11%|██████████████████████▋                                                                                                                                                                                         | 50/458 [03:57<31:06,  4.57s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 31698 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "When did USM Alger win their first trophy?",
    "A": "1937",
    "B": "1963",
    "C": "1995",
    "D": "2013",
    "E": "2018",
    "answer": "B",
    "basis": "According to the text, USM Alger won their first trophy in 1963."
  }
]


../data/wikipedia/l.parquet:  12%|████████████████████████▌                                                                                                                                                                                       | 54/458 [04:08<23:19,  3.46s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 16464 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Where did Lewis Salter receive his undergraduate education?",
    "A": "University of Oklahoma",
    "B": "University of Oxford",
    "C": "Knox College",
    "D": "Bandung Institute of Technology",
    "E": "Argonne National Laboratory",
    "answer": "A",
    "basis": "According to the text, Lewis Salter received his undergraduate education at the University of Oklahoma."
  }
]


../data/wikipedia/l.parquet:  15%|███████████████████████████████▎                                                                                                                                                                                | 69/458 [05:16<29:42,  4.58s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19842 tokens (15842 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which Apollo mission returned a few dozen pounds/kilos of lunar material?",
    "A": "Apollo 11",
    "B": "Apollo 17",
    "C": "Apollo 15",
    "D": "Apollo 13",
    "E": "Apollo 12",
    "answer": "A",
    "basis": "The text states that the Apollo 11 mission to the surface of the Moon returned a few dozen pounds/kilos of lunar material."
  }
]


../data/wikipedia/l.parquet:  16%|████████████████████████████████▋                                                                                                                                                                               | 72/458 [05:24<23:27,  3.65s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 19064 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Who is the author of the book 'Silent Spring'?",
    "A": "Edward Abbey",
    "B": "Rachel Carson",
    "C": "Henry Beston",
    "D": "Marilyn A. Brown",
    "E": "Lester R. Brown",
    "answer": "B",
    "basis": "The author of the book 'Silent Spring' is Rachel Carson."
  }
]


../data/wikipedia/l.parquet:  17%|████████████████████████████████████▎                                                                                                                                                                           | 80/458 [06:05<33:55,  5.39s/it]

This model's maximum context length is 16385 tokens. However, you requested 20065 tokens (16065 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is lipophilic efficiency (LiPE) used for in drug design and drug discovery?",
    "A": "To evaluate the quality of research compounds",
    "B": "To estimate druglikeness",
    "C": "To link potency and lipophilicity",
    "D": "All of the above",
    "E": "None of the above",
    "answer": "D",
    "basis": "The text states that lipophilic efficiency (LiPE) is used to evaluate the quality of research compounds, estimate druglikeness, and link potency and lipophilicity in an attempt to estimate druglikeness."
  }
]


Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.hand

This model's maximum context length is 16385 tokens. However, you requested 17116 tokens (13116 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which formation is from the Carboniferous period?",
    "A": "Amsden Formation",
    "B": "Bearpaw Formation",
    "C": "Climbing Arrow Formation",
    "D": "Deadwood Formation",
    "E": "Fort Union Formation",
    "answer": "A",
    "basis": "The Amsden Formation is listed as being from the Carboniferous period."
  }
]


../data/wikipedia/l.parquet:  32%|█████████████████████████████████████████████████████████████████▌                                                                                                                                             | 145/458 [11:21<28:55,  5.54s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 47987 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the purpose of the light cone in defining the concept of causality?",
    "A": "To describe the path of light in spacetime",
    "B": "To determine the speed of light in different frames of reference",
    "C": "To classify events in spacetime into distinct categories",
    "D": "To visualize the expansion of the universe",
    "E": "To explain the behavior of black holes",
    "answer": "C",
    "basis": "The purpose of the light cone in defining the concept of causality is to classify events in spacetime into distinct categories. The light cone separates events that can influence each other from events that cannot. It helps determine the set of events that lie on or inside the past or future light cone of a given event, which represents the events that can send or receive a signal that would have time to reach and

../data/wikipedia/l.parquet:  33%|████████████████████████████████████████████████████████████████████▏                                                                                                                                          | 151/458 [11:47<26:04,  5.10s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 20369 tokens (16369 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main difference between loop-mediated isothermal amplification (LAMP) and polymerase chain reaction (PCR)?",
    "A": "LAMP requires a thermal cycler, while PCR does not.",
    "B": "LAMP uses a constant temperature, while PCR uses alternating temperature steps.",
    "C": "LAMP requires a reverse transcription step, while PCR does not.",
    "D": "LAMP uses two sets of primers, while PCR uses four primers.",
    "E": "LAMP produces a single band on a gel, while PCR produces a ladder pattern.",
    "answer": "B",
    "basis": "The text states that LAMP is carried out at a constant temperature, while PCR is carried out with a series of alternating temperature steps or cycles."
  }
]


../data/wikipedia/l.parquet:  36%|█████████████████████████████████████████████████████████████████████████▋                                                                                                                                     | 163/458 [12:43<20:28,  4.16s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18785 tokens (14785 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which planet has a list of geological features that includes craters, mountains, rocks, and plains?",
    "A": "Mercury",
    "B": "Venus",
    "C": "Mars",
    "D": "Jupiter",
    "E": "Saturn",
    "answer": "C",
    "basis": "The list of geological features on Mars includes craters, mountains, rocks, and plains."
  }
]


../data/wikipedia/l.parquet:  55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                             | 251/458 [20:06<14:45,  4.28s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 22114 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Lisa Alvarez-Cohen's area of expertise?",
    "A": "Engineering and applied science",
    "B": "Environmental microbiology and ecology",
    "C": "Chemistry",
    "D": "Civil and environmental engineering",
    "E": "Microbial degradation of environmental contaminants",
    "answer": "B",
    "basis": "In the given text, it is mentioned that Lisa Alvarez-Cohen works in environmental microbiology and ecology."
  }
]


../data/wikipedia/l.parquet:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                          | 293/458 [23:25<13:17,  4.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 20724 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Which textbook is recommended for advanced undergraduate and graduate level classical mechanics?",
    "A": "Chapters 1–21",
    "B": "Course of Theoretical Physics Volume 3 - Quantum Mechanics: Non-Relativistic Theory",
    "C": "A Modern Approach to Quantum Mechanics",
    "D": "Classical Mechanics",
    "E": "None of the above",
    "answer": "D",
    "basis": "The textbook recommended for advanced undergraduate and graduate level classical mechanics is 'Classical Mechanics'."
  }
]


../data/wikipedia/l.parquet:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 336/458 [26:44<07:52,  3.88s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 30265 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "In which year did LA Galaxy win the CONCACAF Champions' Cup?",
    "A": "1997",
    "B": "1999",
    "C": "2000",
    "D": "2003",
    "E": "2006",
    "answer": "C",
    "basis": "The LA Galaxy won the CONCACAF Champions' Cup in 2000."
  }
]


Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.hand

This model's maximum context length is 16385 tokens. However, your messages resulted in 30023 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "In which year did LA Galaxy win the CONCACAF Champions' Cup?",
    "A": "1997",
    "B": "1999",
    "C": "2000",
    "D": "2003",
    "E": "2006",
    "answer": "C",
    "basis": "The LA Galaxy won the CONCACAF Champions' Cup in 2000."
  }
]


../data/wikipedia/l.parquet:  80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                         | 366/458 [29:03<07:41,  5.01s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18203 tokens (14203 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which team has the highest win percentage in the Champions League Twenty20?",
    "A": "Chennai Super Kings",
    "B": "Mumbai Indians",
    "C": "Trinidad and Tobago",
    "D": "Royal Challengers Bangalore",
    "E": "Kolkata Knight Riders",
    "answer": "A",
    "basis": "The team with the highest win percentage is Chennai Super Kings with 61.36%. This is evident from the 'Results summary' section of the given text."
  }
]


../data/wikipedia/l.parquet:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                 | 385/458 [30:38<06:02,  4.97s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 18220 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Where did Lincoln Stein complete his Doctor of Medicine and PhD?",
    "A": "Harvard Medical School",
    "B": "Massachusetts Institute of Technology",
    "C": "Cold Spring Harbor Laboratory",
    "D": "Ontario Institute for Cancer Research",
    "E": "Whitehead Institute of Biomedical Research",
    "answer": "A",
    "basis": "The text states that Lincoln Stein completed his Doctor of Medicine at Harvard Medical School and a PhD in Cell Biology at Harvard University."
  }
]


../data/wikipedia/l.parquet:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                | 387/458 [30:42<04:27,  3.76s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 16582 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Lance Fortnow known for?",
    "A": "Results in computational complexity and interactive proof systems",
    "B": "Discovering the P versus NP problem",
    "C": "Developing quantum computing",
    "D": "Contributing to game theory",
    "E": "Studying genome sequencing",
    "answer": "A",
    "basis": "The text states that Lance Fortnow is known for major results in computational complexity and interactive proof systems."
  }
]


../data/wikipedia/l.parquet:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                    | 413/458 [32:39<03:11,  4.25s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 17283 tokens (13283 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which player has the most league appearances in the given list?",
    "A": "Bertil Bäckvall",
    "B": "Sven Käll",
    "C": "Paulinho Guará",
    "D": "Patrik Andersson",
    "E": "Sören Mannberg",
    "answer": "B",
    "basis": "The player with the most league appearances in the given list is Sven Käll, with 99 appearances."
  }
]


../data/wikipedia/l.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 458/458 [36:21<00:00,  4.76s/it]
../data/wikipedia/m.parquet:   4%|████████▊                                                                                                                                                                                                       | 23/543 [02:15<49:23,  5.70s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, you requested 17516 tokens (13516 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "When was the Men's Volleyball at the Games of the Small States of Europe first introduced?",
    "A": "1987",
    "B": "1989",
    "C": "1991",
    "D": "1993",
    "E": "1995",
    "answer": "A",
    "basis": "According to the text, the Men's Volleyball at the Games of the Small States of Europe was first introduced in 1987."
  }
]


../data/wikipedia/m.parquet:   7%|██████████████▏                                                                                                                                                                                                 | 37/543 [03:34<56:27,  6.70s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 19029 tokens (15029 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is Modigliani risk-adjusted performance (M2)?",
    "A": "A measure of the risk-adjusted returns of an investment portfolio",
    "B": "A measure of the risk-free rate of return",
    "C": "A measure of the volatility of the market",
    "D": "A measure of the excess return of a benchmark portfolio",
    "E": "A measure of the average risk-free rate for a period",
    "answer": "A",
    "basis": "The text states that Modigliani risk-adjusted performance (M2) is a measure of the risk-adjusted returns of some investment portfolio. It measures the returns of the portfolio, adjusted for the risk of the portfolio relative to that of some benchmark (e.g., the market)."
  }
]


../data/wikipedia/m.parquet:  25%|███████████████████████████████████████████████████                                                                                                                                                            | 134/543 [12:11<36:19,  5.33s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "What is Mark Alford's area of research?",
    "A": "Quantum mechanics",
    "B": "Astrophysics",
    "C": "Particle physics",
    "D": "Thermodynamics",
    "E": "None of the above",
    "answer": "B",
    "basis": "The text states that Mark Alford researches dense matter inside neutron stars."
  }
]


../data/wikipedia/m.parquet:  32%|██████████████████████████████████████████████████████████████████▋                                                                                                                                            | 175/543 [21:31<31:54,  5.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 202 (char 506)
../data/wikipedia/m.parquet:  32%|███████████████████████████████████████████████████████████████████                                                      

Expecting ',' delimiter: line 10 column 202 (char 506)
[
  {
    "prompt": "What is the term used to describe the connection between the components of mechanically interlocked molecular architectures?",
    "A": "Supramolecular bond",
    "B": "Covalent bond",
    "C": "Chemical bond",
    "D": "Mechanical bond",
    "E": "Residual bond",
    "answer": "D",
    "basis": "The term used to describe the connection between the components of mechanically interlocked molecular architectures is a mechanical bond. This is mentioned in the text: 'The terminology "mechanical bond" has been coined to describe the connection between the components of mechanically interlocked molecular architectures.'"
  }
]


../data/wikipedia/m.parquet:  34%|██████████████████████████████████████████████████████████████████████▌                                                                                                                                        | 185/543 [22:20<27:33,  4.62s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 32690 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Who was the director of MURA during its formative years?",
    "A": "Donald Kerst",
    "B": "Keith Symon",
    "C": "Tihiro Ohkawa",
    "D": "John F. Kennedy",
    "E": "Lyndon B. Johnson",
    "answer": "A",
    "basis": "In its formative years, Donald Kerst was the director of MURA."
  }
]


../data/wikipedia/m.parquet:  55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                             | 297/543 [32:27<18:05,  4.41s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 17684 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the Molucca Sea Plate classified as?",
    "A": "A convergent plate",
    "B": "A divergent plate",
    "C": "A transform plate",
    "D": "A microplate",
    "E": "A subduction plate",
    "answer": "D",
    "basis": "The Molucca Sea Plate has been classified by scientists as a fully subducted microplate that is part of the Molucca Sea Collision Complex."
  }
]


../data/wikipedia/m.parquet:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                          | 389/543 [40:12<11:22,  4.43s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 202 (char 723)
../data/wikipedia/m.parquet:  72%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 10 column 202 (char 723)
[
  {
    "prompt": "What is the original use of the term 'modulo' in mathematics?",
    "A": "To assert that two distinct mathematical objects can be regarded as equivalent",
    "B": "To refer to the remainder of the numerical division of two numbers",
    "C": "To map a functor to a category by highlighting or defining remainders",
    "D": "To declare things equivalent that otherwise would be considered distinct",
    "E": "To factor out a normal subgroup (or an ideal) from a group (or ring)",
    "answer": "B",
    "basis": "The original use of the term 'modulo' in mathematics is to refer to the remainder of the numerical division of two numbers. This is stated in the text: 'Gauss originally intended to use "modulo" as follows: given the integers a, b and n, the expression a ≡ b (mod n) (pronounced "a is congruent to b modulo n") means that a − b is an integer multiple of n, or equivalently, a and b both leave the same remaind

../data/wikipedia/m.parquet:  83%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 449/543 [45:31<07:18,  4.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 21280 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the function of membrane-bound polyribosomes?",
    "A": "Produce proteins for intracellular use",
    "B": "Produce proteins for exocytosis",
    "C": "Produce proteins for endocytosis",
    "D": "Produce proteins for cell division",
    "E": "Produce proteins for DNA replication",
    "answer": "B",
    "basis": "According to the text, membrane-bound polyribosomes usually produce proteins that are used within the cell membrane or are expelled from the cell via exocytosis."
  }
]


../data/wikipedia/m.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 543/543 [53:23<00:00,  5.90s/it]
../data/wikipedia/n.parquet:  32%|██████████████████████████████████████████████████████████████████▉                                                                                                                                             | 84/261 [06:45<15:00,  5.09s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
  

Invalid \escape: line 10 column 241 (char 700)
[
  {
    "prompt": "What is the definition of a negligible function in mathematics?",
    "A": "A function that approaches zero as x approaches infinity",
    "B": "A function that approaches zero as x approaches zero",
    "C": "A function that approaches infinity as x approaches zero",
    "D": "A function that approaches infinity as x approaches infinity",
    "E": "A function that has a constant value regardless of the value of x",
    "answer": "A",
    "basis": "According to the given text, a negligible function is defined as a function that approaches zero as x approaches infinity. This is stated in the first sentence of the text: 'In mathematics, a negligible function is a function \mu:\mathbb{N}\to\mathbb{R} such that for every positive integer c there exists an integer Nc such that for all x > Nc, :|\mu(x)|<\frac{1}{x^c}.'"
  }
]


../data/wikipedia/n.parquet:  54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                               | 141/261 [11:36<09:19,  4.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 50112 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the characteristic feature of napoleonite?",
    "A": "Rounded lumps",
    "B": "Concentric zones of light and dark colors",
    "C": "Radial arrangement of crystals",
    "D": "All of the above",
    "E": "None of the above",
    "answer": "D",
    "basis": "The text states that napoleonite is characterized by rounded lumps, concentric zones of light and dark colors, and a radial arrangement of crystals."
  }
]


../data/wikipedia/n.parquet:  76%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                 | 199/261 [16:20<05:38,  5.46s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 18024 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the main feature that distinguishes geophysics from geology?",
    "A": "Direct access to the rock",
    "B": "Remote sensing",
    "C": "Data acquisition",
    "D": "Geological interpretation",
    "E": "Data processing",
    "answer": "B",
    "basis": "In the text, it is mentioned that the main feature that distinguishes geophysics from geology is that it involves remote sensing. Various physical phenomena are used to probe below the surface where scientists cannot directly access the rock."
  }
]


../data/wikipedia/n.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 261/261 [21:46<00:00,  5.01s/it]
../data/wikipedia/number.parquet:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                          | 34/43 [02:58<00:45,  5.07s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 18007 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Which player scored the most tries in the 2015 Rugby World Cup?",
    "A": "Julian Savea",
    "B": "Nehe Milner-Skudder",
    "C": "Juan Imhoff",
    "D": "Bryan Habana",
    "E": "JP Pietersen",
    "answer": "A",
    "basis": "The player who scored the most tries in the 2015 Rugby World Cup was Julian Savea."
  }
]


../data/wikipedia/number.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [03:44<00:00,  5.22s/it]
../data/wikipedia/o.parquet:   6%|████████████▋                                                                                                                                                                                                    | 9/148 [00:49<13:13,  5.71s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "What is the purpose of an Oncology Information System (OIS)?",
    "A": "To manage departmental, administrative, and clinical activities in cancer care",
    "B": "To track and manage radiology requests and workflow",
    "C": "To manage patient records more generally",
    "D": "To support medical information management in all healthcare departments",
    "E": "To provide financial control and billing services for cancer patients",
    "answer": "A",
    "basis": "The purpose of an Oncology Information System (OIS) is to manage departmental, administrative, and clinical activities in cancer care. The OIS aggregates information into a complete oncology-specific electronic health record and supports the delivery of integrated care and long-term treatment for cancer patients."
  }
]


../data/wikipedia/o.parquet:  24%|█████████████████████████████████████████████████▏                                                                                                                                                              | 35/148 [08:03<09:15,  4.91s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18728 tokens (14728 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the main cause of ocean acidification in the Great Barrier Reef?",
    "A": "Rise in sea surface temperature",
    "B": "Decrease in aragonite levels",
    "C": "Increase in dissolved inorganic carbon",
    "D": "Rise in atmospheric carbon dioxide",
    "E": "Decrease in pH of the ocean",
    "answer": "D",
    "basis": "The main cause of ocean acidification in the Great Barrier Reef is the rise in atmospheric carbon dioxide, which is taken up by the ocean. This process increases the acidity of the water, leading to a decrease in pH."
  }
]


../data/wikipedia/o.parquet:  78%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                              | 115/148 [14:33<02:43,  4.95s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "During which period were the coal seams in the Oldham Coalfield laid down?",
    "A": "Carboniferous period",
    "B": "Middle Ages",
    "C": "Industrial Revolution",
    "D": "19th century",
    "E": "20th century",
    "answer": "A",
    "basis": "The text states that the coal seams in the Oldham Coalfield were laid down in the Carboniferous period."
  }
]


../data/wikipedia/o.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 148/148 [22:06<00:00,  8.96s/it]
../data/wikipedia/other.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:49<00:00,  4.92s/it]
../data/wikipedia/p.parquet:  10%|█████████████████████▊                                                                                                                                                                                          | 53/506 [04:54<41:03,  5.44s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.

This model's maximum context length is 16385 tokens. However, your messages resulted in 17776 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the Pardy Island Formation?",
    "A": "A formation found in Newfoundland",
    "B": "A formation found in Labrador",
    "C": "A formation found in both Newfoundland and Labrador",
    "D": "A formation found in Prince Edward Island",
    "E": "A formation found in Nova Scotia",
    "answer": "A",
    "basis": "The text explicitly states that the Pardy Island Formation is a formation cropping out in Newfoundland."
  }
]


../data/wikipedia/p.parquet:  11%|███████████████████████▍                                                                                                                                                                                        | 57/506 [05:12<38:41,  5.17s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 17788 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the pEDA parameter?",
    "A": "A scale used to measure the pi-electron donating ability of a substituent",
    "B": "A scale used to measure the pi-electron withdrawing ability of a substituent",
    "C": "A scale used to measure the resonance effect of a substituent",
    "D": "A scale used to measure the mesomeric effect of a substituent",
    "E": "A scale used to measure the substituent constants",
    "answer": "A",
    "basis": "The pEDA parameter is described as a pi-electron substituent effect scale, specifically measuring the pi-electron donating ability of a substituent. The more positive the value of pEDA, the more pi-electron donating the substituent is."
  }
]


../data/wikipedia/p.parquet:  25%|███████████████████████████████████████████████████▌                                                                                                                                                           | 126/506 [11:24<37:57,  5.99s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 226 (char 539)
../data/wikipedia/p.parquet:  25%|███████████████████████████████████████████████████▉                                                                     

Expecting ',' delimiter: line 10 column 226 (char 539)
[
  {
    "prompt": "What is the term used to describe an ancestral character shared by all members of a clade, which does not distinguish the clade from other clades?",
    "A": "Plesiomorphy",
    "B": "Symplesiomorphy",
    "C": "Apomorphy",
    "D": "Synapomorphy",
    "E": "Autapomorphy",
    "answer": "B",
    "basis": "The term used to describe an ancestral character shared by all members of a clade, which does not distinguish the clade from other clades is symplesiomorphy. The text states, 'In phylogenetics, a plesiomorphy ("near form") and symplesiomorphy are synonyms for an ancestral character shared by all members of a clade, which does not distinguish the clade from other clades.'"
  }
]


../data/wikipedia/p.parquet:  34%|██████████████████████████████████████████████████████████████████████▊                                                                                                                                        | 173/506 [15:32<29:45,  5.36s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 30625 tokens. Please reduce the length of the messages.
[
    {
        "prompt": "What is the Porter Shale?",
        "A": "A geologic formation in Washington (state)",
        "B": "A fossil dating technique",
        "C": "A Paleogene period artifact",
        "D": "A stratigraphic unit in Washington (state)",
        "E": "A Paleontology study in Washington (state)",
        "answer": "A",
        "basis": "The text explicitly states that the Porter Shale is a geologic formation in Washington (state)."
    }
]


../data/wikipedia/p.parquet:  35%|████████████████████████████████████████████████████████████████████████▊                                                                                                                                      | 178/506 [15:51<24:22,  4.46s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18020 tokens (14020 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the purpose of using potassium fluoride on alumina in organic synthesis?",
    "A": "To induce alkylation reactions",
    "B": "To catalyze oxidation reactions",
    "C": "To promote reduction reactions",
    "D": "To inhibit polymerization reactions",
    "E": "To enhance esterification reactions",
    "answer": "A",
    "basis": "The purpose of using potassium fluoride on alumina in organic synthesis is to induce alkylation reactions. This is mentioned in the given text: 'It is a base which is used in organic synthesis. It was originally introduced in 1979 by Ando et al. for inducing alkylation reactions.'"
  }
]


../data/wikipedia/p.parquet:  38%|██████████████████████████████████████████████████████████████████████████████▉                                                                                                                                | 193/506 [17:03<30:15,  5.80s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 16445 tokens (12445 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the paternal age effect?",
    "A": "The statistical relationship between the father's age at conception and biological effects on the child.",
    "B": "The statistical relationship between the mother's age at conception and biological effects on the child.",
    "C": "The statistical relationship between the parents' age at conception and biological effects on the child.",
    "D": "The statistical relationship between the child's age at conception and biological effects on the father.",
    "E": "The statistical relationship between the child's age at conception and biological effects on the mother.",
    "answer": "A",
    "basis": "The text states that the paternal age effect is the statistical relationship between the father's age at conception and biological e

../data/wikipedia/p.parquet:  42%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 214/506 [18:45<25:40,  5.28s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 26 (char 132)
../data/wikipedia/p.parquet:  42%|███████████████████████████████████████████████████████████████████████████████████████▉                                           

Invalid \escape: line 4 column 26 (char 132)
[
  {
    "prompt": "What is the formula for the ponderomotive energy in terms of the laser intensity I?",
    "A": "U_p = {e^2 E^2 \over 4m \omega_0^2}",
    "B": "U_p = {e^2 I \over 2 c \epsilon_0 m \omega_0^2}",
    "C": "U_p = {2e^2 \over c \epsilon_0 m} \cdot {I \over 4\omega_0^2}",
    "D": "U_p = \frac{E^2}{4\omega_0^2}",
    "E": "U_p = 9.33 \cdot I(10^{14} \mathrm{W/cm}^2) \cdot \lambda(\mathrm{\mu m})^2",
    "answer": "C",
    "basis": "The formula for the ponderomotive energy in terms of the laser intensity I is U_p = {2e^2 \over c \epsilon_0 m} \cdot {I \over 4\omega_0^2}. This can be derived from the given equation U_p={e^2 I \over 2 c \epsilon_0 m \omega_0^2} by substituting I=c\epsilon_0 E^2/2 and simplifying."
  }
]


../data/wikipedia/p.parquet:  52%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                   | 263/506 [22:54<24:24,  6.03s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 10 column 197 (char 739)
../data/wikipedia/p.parquet:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████             

Expecting ',' delimiter: line 10 column 197 (char 739)
[
  {
    "prompt": "According to the principle of maximum work, what is the relationship between pure chemical reactions and heat evolution?",
    "A": "Pure chemical reactions are always accompanied by heat evolution.",
    "B": "Pure chemical reactions are sometimes accompanied by heat evolution.",
    "C": "Pure chemical reactions are never accompanied by heat evolution.",
    "D": "Pure chemical reactions are accompanied by heat absorption.",
    "E": "Pure chemical reactions are not affected by heat evolution.",
    "answer": "A",
    "basis": "The principle of maximum work states that every pure chemical reaction is accompanied by the evolution of heat. This is mentioned in the text: 'Berthelot's version was essentially: "every pure chemical reaction is accompanied by evolution of heat."'"
  }
]


../data/wikipedia/p.parquet:  55%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                             | 277/506 [24:00<20:18,  5.32s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 17447 tokens (13447 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is pressure?",
    "A": "The force applied perpendicular to the surface of an object per unit area",
    "B": "The force applied parallel to the surface of an object per unit area",
    "C": "The force applied perpendicular to the surface of an object per unit volume",
    "D": "The force applied parallel to the surface of an object per unit volume",
    "E": "None of the above",
    "answer": "A",
    "basis": "According to the text, pressure is defined as the force applied perpendicular to the surface of an object per unit area."
  }
]


../data/wikipedia/p.parquet:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                     | 336/506 [28:58<14:20,  5.06s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 18808 tokens (14808 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
    {
        "prompt": "What is the ultimate abstract principle of actual existence according to Whitehead's Process and Reality?",
        "A": "Creativity",
        "B": "Efficient causality",
        "C": "Singular causality",
        "D": "Nomic causality",
        "E": "Novelty",
        "answer": "A",
        "basis": "The ultimate abstract principle of actual existence for Whitehead is creativity. It is manifest in what can be called 'singular causality' and is a process of becoming, a creative advance into novelty."
    }
]


../data/wikipedia/p.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 506/506 [43:02<00:00,  5.10s/it]
../data/wikipedia/q.parquet:  40%|████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                            | 17/42 [01:30<02:28,  5.93s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
  

Invalid \escape: line 4 column 14 (char 88)
[
  {
    "prompt": "What is the Hamiltonian of the quantum clock model?",
    "A": "-J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j + Z_i Z^\dagger_j ) + g \sum_j (X_j + X^\dagger_j) \right)",
    "B": "-J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j + Z_i Z^\dagger_j ) - g \sum_j (X_j + X^\dagger_j) \right)",
    "C": "-J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j - Z_i Z^\dagger_j ) + g \sum_j (X_j + X^\dagger_j) \right)",
    "D": "-J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j - Z_i Z^\dagger_j ) - g \sum_j (X_j + X^\dagger_j) \right)",
    "E": "-J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j - Z_i Z^\dagger_j ) - g \sum_j (X_j - X^\dagger_j) \right)",
    "answer": "A",
    "basis": "The Hamiltonian of the quantum clock model is given by H = -J \left( \sum_{ \langle i, j \rangle} (Z^\dagger_i Z_j + Z_i Z^\dagger_j ) + g \sum_j (X_j + X^\dagger_j) \right). This is stated in the text: 'The Hamil

../data/wikipedia/q.parquet:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████               | 39/42 [03:32<00:13,  4.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 5 column 16 (char 131)
../data/wikipedia/q.parquet:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 5 column 16 (char 131)
[
  {
    "prompt": "What is the conductance of a quantum point contact quantized in units of?",
    "A": "2e^2/h",
    "B": "2e^2/\hbar",
    "C": "2e^2",
    "D": "2h",
    "E": "2e",
    "answer": "A",
    "basis": "The conductance of a quantum point contact is quantized in units of 2e^2/h, as mentioned in the text: 'The conductance of a QPC is quantized in units of 2e^2/h, the so-called conductance quantum.'"
  }
]


../data/wikipedia/q.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [03:50<00:00,  5.49s/it]
../data/wikipedia/r.parquet:  46%|██████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                | 155/338 [12:19<11:45,  3.86s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 64369 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is Richard Hammond's profession?",
    "A": "Chemist",
    "B": "Biologist",
    "C": "Physicist",
    "D": "Astronomer",
    "E": "Geologist",
    "answer": "C",
    "basis": "The text states that Richard Hammond is a theoretical physicist and works for the United States Army Research Laboratory."
  }
]


../data/wikipedia/r.parquet:  61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                | 207/338 [16:23<11:44,  5.38s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, your messages resulted in 48632 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What are the two chemical reactions discovered by Robin Ferrier?",
    "A": "Ferrier rearrangement and Ferrier carbocyclization",
    "B": "Ferrier glycosidation and Ferrier carbocyclization",
    "C": "Ferrier rearrangement and Fischer glycosidation",
    "D": "Ferrier rearrangement and Calvin glycosidation",
    "E": "Ferrier glycosidation and Calvin rearrangement",
    "answer": "A",
    "basis": "The text states that Robin Ferrier discovered two chemical reactions, the Ferrier rearrangement and the Ferrier carbocyclization."
  }
]


Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.hand

This model's maximum context length is 16385 tokens. However, you requested 16777 tokens (12777 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What are the two chemical reactions discovered by Robin Ferrier?",
    "A": "Ferrier rearrangement and Ferrier carbocyclization",
    "B": "Ferrier glycosidation and Ferrier carbocyclization",
    "C": "Ferrier rearrangement and Fischer glycosidation",
    "D": "Ferrier rearrangement and Calvin glycosidation",
    "E": "Ferrier glycosidation and Calvin rearrangement",
    "answer": "A",
    "basis": "The text states that Robin Ferrier discovered two chemical reactions, the Ferrier rearrangement and the Ferrier carbocyclization."
  }
]


../data/wikipedia/r.parquet:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                       | 300/338 [23:43<03:11,  5.04s/it]

Invalid \escape: line 5 column 23 (char 129)
[
  {
    "prompt": "What is a regular numerical predicate?",
    "A": "A kind of relation over integers",
    "B": "A subset of \mathbb N^r for some arity r",
    "C": "A formal language that is regular",
    "D": "A monadic second order formula",
    "E": "A first order logic formula",
    "answer": "B",
    "basis": "According to the given text, a regular numerical predicate is defined as a subset of \mathbb N^r for some arity r."
  }
]


Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 5 column 23 (char 129)
../data/wikipedia/r.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 338/338 [26:44<00:00,  4.75s/it]
../data/wikipedia/s.parquet:   4%|████████▋                                                                                                                         

Invalid \escape: line 5 column 31 (char 177)
[
  {
    "prompt": "What is the stopping rule in the Sequential Probability Ratio Test (SPRT)?",
    "A": "Continue monitoring when a < S_i < b",
    "B": "Accept H_1 when S_i \geq b",
    "C": "Accept H_0 when S_i \leq a",
    "D": "Both B and C",
    "E": "None of the above",
    "answer": "D",
    "basis": "The stopping rule in SPRT is that if S_i \geq b, H_1 is accepted, and if S_i \leq a, H_0 is accepted. Therefore, the correct answer is both B and C."
  }
]


../data/wikipedia/s.parquet:  22%|█████████████████████████████████████████████                                                                                                                                                                  | 135/621 [10:53<46:19,  5.72s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream

This model's maximum context length is 16385 tokens. However, you requested 17554 tokens (13554 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is a SNPSTR?",
    "A": "A compound genetic marker composed of one or more SNPs and one microsatellite (STR)",
    "B": "A database that contains all SNPSTRs in five model genomes",
    "C": "A type of genetic marker suitable for drawing population genetic inferences",
    "D": "A combination of single nucleotide polymorphisms (SNPs) and microsatellites (STRs)",
    "E": "A type of repetitive DNA sequence",
    "answer": "A",
    "basis": "The text states that a SNPSTR is a compound genetic marker composed of one or more SNPs and one microsatellite (STR)."
  }
]


../data/wikipedia/s.parquet:  60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                   | 372/621 [29:58<18:12,  4.39s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 123 (char 465)
../data/wikipedia/s.parquet:  60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎    

Invalid \escape: line 10 column 123 (char 465)
[
  {
    "prompt": "What is the electronic Hamiltonian in atomic units for the 'spherium' model?",
    "A": "-(abla_1^2/2) - (abla_2^2/2) + (1/u)",
    "B": "-(abla_1^2/2) + (abla_2^2/2) + (1/u)",
    "C": "(abla_1^2/2) - (abla_2^2/2) + (1/u)",
    "D": "(abla_1^2/2) + (abla_2^2/2) + (1/u)",
    "E": "None of the above",
    "answer": "A",
    "basis": "The given text states that the electronic Hamiltonian in atomic units for the 'spherium' model is given by: \hat{H} = -(abla_1^2/2) - (abla_2^2/2) + (1/u)."
  }
]


../data/wikipedia/s.parquet:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                             | 486/621 [39:10<10:17,  4.58s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 43, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 1294 (char 1772)
../data/wikipedia/s.parquet:  78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 10 column 1294 (char 1772)
[
    {
        "prompt": "What is the purpose of the water boundary layer in pan frying?",
        "A": "To prevent the proteins on the surface of the meat from binding with the oil or pan surface",
        "B": "To increase the thermal conductivity of the pan",
        "C": "To enhance the flavor of the food",
        "D": "To create a non-stick surface for easy cleaning",
        "E": "To increase the heat transfer between the burner and the food",
        "answer": "A",
        "basis": "In pan frying, the water that exits the meat forms a barrier between the meat and the oil or the surface of the pan. This barrier is critical for the success of pan frying meat. When meat cooks, the proteins on the surface of the meat denature because of the heat. This means that many of the secondary bonds that give the proteins their shape are broken. The protein molecules want to reform those interactions to return to their most thermodynamically 

../data/wikipedia/s.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 621/621 [50:51<00:00,  4.91s/it]
../data/wikipedia/t.parquet:  76%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                  | 276/365 [24:48<07:31,  5.07s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, you requested 19543 tokens (15543 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "According to Lee Smolin's book, 'The Life of the Cosmos', what principle does he apply to the birth of universes?",
    "A": "Natural selection",
    "B": "Quantum mechanics",
    "C": "Black hole collapse",
    "D": "Heat death",
    "E": "Darwinian selective pressures",
    "answer": "A",
    "basis": "In the book, Smolin details his Fecund universes which applies the principle of natural selection to the birth of universes."
  }
]


../data/wikipedia/t.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 365/365 [32:59<00:00,  5.42s/it]
../data/wikipedia/u.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [06:29<00:00,  5.56s/it]
../data/wikipedia/v.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 122/122 [11:32<00:00,  5.68s/it]
../data/wikipedia/w.parquet:  93%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▋        | 155/167 [13:58<01:00,  5.03s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py",

Expecting ',' delimiter: line 10 column 222 (char 411)
[
  {
    "prompt": "When was the Woods Hole Oceanographic Institution established?",
    "A": "1927",
    "B": "1930",
    "C": "1950",
    "D": "1977",
    "E": "1985",
    "answer": "B",
    "basis": "The Woods Hole Oceanographic Institution was established in 1930, as mentioned in the text: 'The committee's recommendation for establishing a permanent independent research laboratory on the East Coast to "prosecute oceanography in all its branches" led to the founding in 1930 of the Woods Hole Oceanographic Institution.'"
  }
]


../data/wikipedia/w.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 167/167 [15:09<00:00,  5.44s/it]
../data/wikipedia/x.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [01:11<00:00,  5.14s/it]
../data/wikipedia/y.parquet:   3%|█████▌                                                                                                                                                                                                            | 1/38 [00:07<04:26,  7.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create

This model's maximum context length is 16385 tokens. However, you requested 16953 tokens (12953 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Where did Yash Pal study for his BSc Honours in Physics?",
    "A": "Tata Institute of Fundamental Research",
    "B": "University of the Punjab",
    "C": "Delhi University",
    "D": "Panjab University",
    "E": "Massachusetts Institute of Technology",
    "answer": "B",
    "basis": "According to the text, Yash Pal studied for his BSc Honours in Physics at the Lahore campus of the undivided University of the Punjab."
  }
]


../data/wikipedia/y.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 38/38 [03:17<00:00,  5.19s/it]
../data/wikipedia/z.parquet:  45%|██████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                  | 14/31 [01:10<01:18,  4.63s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_41623/2705274036.py", line 42, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_41623/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/op

This model's maximum context length is 16385 tokens. However, your messages resulted in 17178 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the main purpose of a zenith camera?",
    "A": "To observe Earth's gravity field",
    "B": "To capture images of stars near the zenith",
    "C": "To measure the tilt of the telescope axis",
    "D": "To determine the geodetic coordinates",
    "E": "To track and scan celestial objects",
    "answer": "A",
    "basis": "The text states that a zenith camera is used primarily for the local surveys of Earth's gravity field."
  }
]


../data/wikipedia/z.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [02:34<00:00,  4.98s/it]


In [12]:
 df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")