In [4]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [5]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [6]:
def query_prompt(prompt, max_tokens=4000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[
            {"role": "system", "content": "You are a professor at a science university and creating a exam for your students."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [7]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            for word in [
                "geology",
                "physics",
                "chemistry",
                "mathematical",
                "biology",
                "astronomy",
                "ecology",
                "genetics",
                "statistics",
                "theoretical"
            ]:
                if word.lower() in cat.lower():
                    return True
            
        return False
    
    def text_preprocess(text):
        return text.replace("===", "\n").replace("==", "\n")

    df = pd.read_parquet(fname)
    df_science = df[df["categories"].apply(f)]
    df_science["text"] = "title: " + df_science["title"] + "\n" + df_science["text"].apply(text_preprocess)
    return df_science.sample(len(df_science)//40)

In [8]:
files = glob.glob("../data/wikipedia/*.parquet")

In [9]:
import time

In [10]:
import pickle

In [11]:
import json

In [12]:
from datetime import datetime as dt
import os

In [13]:
texts = []

In [14]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
You are an expert AI assistant who specializes in answering multiple-choice questions. You may use the context below if it helps you to answer the following multiple-choice question.
The output should be an array of json format, with "prompt" as the question statement, "A," "B," "C," "D," and "E" as choices, "answer" as the answer choice (one of A through E).

Context:
{series['text']}

"""
    return prompt

def f(series):
    if series["A"] != series["A"]:
        if type(series["choices"]) == dict:
            for key in ["A", "B", "C", "D", "E"]:
                series[key] = series["choices"][key]
        elif type(series["choices"] == list):
            for i, key in enumerate(["A", "B", "C", "D", "E"]):
                series[key] = series["choices"][i]
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

first = True
for file in files:
    if os.path.basename(file) in ["all.parquet"]:
        print(f"pass: {file}")
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            if first:
                print(texts_json)
                first = False
            if type(texts_json) == dict:
                text_json["wiki_id"] = series["id"]
                text_json["original_text"] = series["text"]
                texts.append(text_json)
            else:
                for text_json in texts_json:
                    text_json["wiki_id"] = series["id"]
                    text_json["original_text"] = series["text"]
                    texts.append(text_json)
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
        if i % 20 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

../data/wikipedia/a.parquet:   2%|███▎                                                                                                                                                                                                              | 1/63 [00:09<09:55,  9.61s/it]

[{'prompt': 'What is an albedo feature in planetary geology?', 'A': 'A large area on the surface of a planet that shows a contrast in brightness or darkness with adjacent areas', 'B': 'A small area on the surface of a planet that shows a contrast in brightness or darkness with adjacent areas', 'C': 'A feature on the surface of a planet that is only visible through space probes', 'D': 'A feature on the surface of a planet that is only visible through optical telescopes', 'E': 'A feature on the surface of a planet that is only visible through ground-based telescopes using adaptive optics', 'answer': 'A'}]


../data/wikipedia/a.parquet:  22%|██████████████████████████████████████████████▍                                                                                                                                                                  | 14/63 [01:32<05:08,  6.30s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/a.parquet:  24%|█████████████████████████████████████████████████▊                                                      

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT a type of assay based on the nature of the assay process?

A) End point assay
B) Kinetic assay
C) High throughput assay
D) Multiplex assay
E) Ligand binding assay

Answer: E) Ligand binding assay


../data/wikipedia/a.parquet:  32%|██████████████████████████████████████████████████████████████████▎                                                                                                                                              | 20/63 [02:13<04:57,  6.93s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/a.parquet:  33%|█████████████████████████████████████████████████████████████████████▋                                  

Expecting value: line 1 column 1 (char 0)
Which of the following statements about gene synthesis is true?

A. Gene synthesis requires template DNA for the construction and assembly of genes.
B. Artificial gene synthesis allows for the synthesis of DNA molecules with no limits on the nucleotide sequence or size.
C. Oligonucleotide synthesis can produce DNA sequences longer than a few hundred base pairs.
D. The error frequency in gene synthesis decreases with longer oligonucleotides.
E. Gene synthesis methods do not require the usage of chemically synthesized oligonucleotides.


../data/wikipedia/a.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [07:11<00:00,  6.85s/it]


pass: ../data/wikipedia/all.parquet


../data/wikipedia/b.parquet:  73%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 33/45 [03:53<01:22,  6.86s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/b.parquet:  76%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which team holds the record for the longest winning streak in NCAA Division I women's basketball? 

A) UConn
B) Stanford
C) Baylor
D) Tennessee
E) Notre Dame

Answer: A) UConn


../data/wikipedia/b.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [05:08<00:00,  6.85s/it]
../data/wikipedia/c.parquet:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                      | 63/77 [08:36<01:43,  7.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following statements about the Central Asian Orogenic Belt is true?

A) The Central Asian Orogenic Belt is primarily composed of sedimentary rocks.
B) The Central Asian Orogenic Belt is located between the East European Craton and the South China Craton.
C) The Central Asian Orogenic Belt is one of the smallest orogenic belts in the world.
D) The Central Asian Orogenic Belt is not known for its mineral resources.
E) The formation history of the Central Asian Orogenic Belt is well understood and agreed upon by scientists.

Answer: B) The Central Asian Orogenic Belt is located between the East European Craton and the South China Craton.


../data/wikipedia/c.parquet:  91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 70/77 [09:31<00:52,  7.47s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, your messages resulted in 16473 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is callus in plant biology?",
    "A": "A mass of organized plant parenchyma cells",
    "B": "A type of plant hormone",
    "C": "A type of plant tissue culture medium",
    "D": "A type of plant growth regulator",
    "E": "A type of plant cell death",
    "answer": "A"
  }
]


../data/wikipedia/c.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [10:19<00:00,  8.05s/it]
../data/wikipedia/d.parquet:  52%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                   | 21/40 [02:27<02:29,  7.89s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT a simplifying assumption of dynamic discrete choice models?

A) Flow utility is additively separable and linear in parameters
B) The optimization problem can be written as a Bellman equation
C) The states follow a Markov chain
D) The distribution of unobserved factors is assumed to be Type I extreme value
E) The decision process is uncertain about future transitions in the states and realizations of unobserved factors

Answer: D) The distribution of unobserved factors is assumed to be Type I extreme value


../data/wikipedia/d.parquet:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 38/40 [05:13<00:30, 15.19s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 16866 tokens (12866 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What is the Debye model?",
    "A": "A method for estimating the phonon contribution to the specific heat in a solid",
    "B": "A method for estimating the electron contribution to the specific heat in a solid",
    "C": "A method for estimating the photon contribution to the specific heat in a solid",
    "D": "A method for estimating the phonon contribution to the specific heat in a liquid",
    "E": "None of the above",
    "answer": "A"
  },
  {
    "prompt": "What does the Debye model correctly predict?",
    "A": "The low-temperature dependence of the heat capacity of solids",
    "B": "The high-temperature dependence of the heat capacity of solids",
    "C": "The heat capacity of liquids",
    "D": "The heat capacity of gases",
    "E": "None of the above",
    "answ

../data/wikipedia/d.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40/40 [05:40<00:00,  8.51s/it]
../data/wikipedia/e.parquet:  24%|███████████████████████████████████████████████████                                                                                                                                                              | 11/45 [01:22<04:21,  7.69s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following statements about eukaryotic initiation factor 3 (eIF3) is true?

A) eIF3 is composed of 13 identical subunits.
B) eIF3 is only involved in cap-dependent translation initiation.
C) eIF3 does not interact with other initiation factors.
D) eIF3 plays a role in programmed stop codon readthrough.
E) eIF3 is not conserved across eukaryotes.

Answer: D) eIF3 plays a role in programmed stop codon readthrough.


../data/wikipedia/e.parquet:  71%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                            | 32/45 [03:41<01:13,  5.68s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/e.parquet:  73%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about epigenetic priming?

A) Epigenetic priming refers to the modification of a cell's epigenome triggered by external biological triggers or pathways.
B) Epigenetic priming is a reversible process that converts chromatin from euchromatin to heterochromatin.
C) Epigenetic priming has only been investigated in neuroscience research.
D) Epigenetic priming is a permanent modification to a cell's epigenome.
E) Epigenetic priming is a targeted process that affects specific chromatin sites within a cell.

Answer: A) Epigenetic priming refers to the modification of a cell's epigenome triggered by external biological triggers or pathways.


../data/wikipedia/e.parquet:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 40/45 [04:49<00:41,  8.24s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/e.parquet:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT a mechanism involved in epigenetic modifications?

A) DNA methylation
B) Histone modifications
C) MicroRNA expression
D) Telomere shortening
E) Transcription factor binding


../data/wikipedia/e.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [05:41<00:00,  7.59s/it]
../data/wikipedia/f.parquet:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                | 20/29 [01:55<00:53,  6.00s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Prompt: Which of the following topics is NOT covered in Fluctuation and Noise Letters?

A) Noise-enhanced phenomena including stochastic resonance
B) Cardiovascular dynamics
C) Quantum fluctuations
D) Statistical physics
E) Artificial intelligence and machine learning

Answer: E) Artificial intelligence and machine learning


../data/wikipedia/f.parquet:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                    | 24/29 [02:16<00:28,  5.71s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "What is a fermionic condensate?",
    "A": "A superfluid phase formed by fermionic particles at low temperatures",
    "B": "A superfluid phase formed by bosonic particles at low temperatures",
    "C": "A state of electrons in a superconductor",
    "D": "A state of helium-3 atoms at very low temperatures",
    "E": "A state of rubidium atoms at very low temperatures",
    "answer": "A"
  }
]


../data/wikipedia/f.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [07:46<00:00, 16.10s/it]
../data/wikipedia/g.parquet:   7%|██████████████▋                                                                                                                                                                                                   | 3/43 [00:18<04:01,  6.05s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Prompt: Which region in Victoria is primarily made up of Palaeozoic rocks?

A) Northwest
B) Northeast
C) Southwest
D) Southeast
E) Central

Answer: D) Southeast


../data/wikipedia/g.parquet:  30%|███████████████████████████████████████████████████████████████▏                                                                                                                                                 | 13/43 [01:16<03:01,  6.04s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/g.parquet:  33%|████████████████████████████████████████████████████████████████████                                    

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about the Green-Kubo relations?

A. The Green-Kubo relations give the exact mathematical expression for transport coefficients in terms of integrals of time correlation functions.
B. The Green-Kubo relations are only valid for systems at equilibrium.
C. The Green-Kubo relations are derived from the fluctuation theorem.
D. The Green-Kubo relations are only applicable to linear transport coefficients.
E. The Green-Kubo relations are not applicable to fluctuations far from equilibrium.

Answer: A


../data/wikipedia/g.parquet:  65%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                         | 28/43 [02:48<01:27,  5.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/g.parquet:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which gene is proposed to be associated with spirituality according to the God Gene hypothesis?

A) Vesicular monoamine transporter 2 (VMAT2)
B) Serotonin transporter (SERT)
C) Dopamine receptor D4 (DRD4)
D) Monoamine oxidase A (MAOA)
E) Oxytocin receptor (OXTR)

Answer: A) Vesicular monoamine transporter 2 (VMAT2)


../data/wikipedia/g.parquet:  95%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎         | 41/43 [04:07<00:13,  6.57s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/g.parquet:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about Gisela Dulko's career statistics?

A) She won a total of 8 singles titles.
B) She won a total of 17 doubles titles.
C) She won a total of 4 mixed doubles titles.
D) She won a total of 30 career titles.
E) She won a total of 185 tournaments.


../data/wikipedia/g.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [04:19<00:00,  6.04s/it]
../data/wikipedia/h.parquet:  16%|█████████████████████████████████▏                                                                                                                                                                                | 6/38 [00:51<05:44, 10.78s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/ope

This model's maximum context length is 16385 tokens. However, your messages resulted in 28795 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Where did Hugh Osborn obtain his PhD?",
    "A": "University of Cambridge",
    "B": "University College London",
    "C": "University of Sussex",
    "D": "Queen Mary University of London",
    "E": "University of Glasgow",
    "answer": "B"
  },
  {
    "prompt": "In which year did Hugh Osborn obtain the first proof of the four-dimensional C-theorem?",
    "A": "1989",
    "B": "1990",
    "C": "2001",
    "D": "2004",
    "E": "2011",
    "answer": "A"
  },
  {
    "prompt": "Who found the nonperturbative proof of the four-dimensional C-theorem?",
    "A": "Hugh Osborn",
    "B": "John Cardy",
    "C": "Zohar Komargodski",
    "D": "Adam Schwimmer",
    "E": "Sigurd Zienau",
    "answer": "C"
  },
  {
    "prompt": "In collaboration with whom did Hugh Osborn obtain explicit expressions for the conformal blocks in four-di

../data/wikipedia/h.parquet:  29%|████████████████████████████████████████████████████████████▌                                                                                                                                                    | 11/38 [01:21<03:35,  7.97s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, your messages resulted in 21572 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the traditional ecological knowledge of the Bisa people?",
    "A": "The Bisa people believe that caterpillars have been with them since time immemorial, as gifts from god.",
    "B": "The Bisa people believe that caterpillars are dangerous and should not be consumed.",
    "C": "The Bisa people believe that caterpillars are sacred and should only be consumed during specific rituals.",
    "D": "The Bisa people believe that caterpillars are a valuable source of nutrition and should be consumed regularly.",
    "E": "The Bisa people do not have any traditional ecological knowledge regarding caterpillars.",
    "answer": "A"
  }
]


../data/wikipedia/h.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 38/38 [04:03<00:00,  6.40s/it]
../data/wikipedia/i.parquet:  11%|████████████████████████                                                                                                                                                                                          | 4/35 [00:22<02:56,  5.68s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about Interatomic Coulombic Decay (ICD)?

A. ICD is a relaxation process that can only occur in atomic clusters.
B. ICD is a process that occurs when an atom or molecule is in a state energetically higher than the ionization threshold of other atoms or molecules in the neighborhood.
C. ICD is a process that competes with slow radiative decay and autoionization.
D. ICD is a process that can only occur after core-electron excitations.
E. ICD is a process that typically takes place on the picosecond time scale.

Answer: B


../data/wikipedia/i.parquet:  71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 25/35 [03:01<01:17,  7.73s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/i.parquet:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT a goal of the International Mammalian Genome Society (IMGS)?

A) Facilitating the creation of databases of genetic information
B) Organizing meetings for mammalian geneticists to share expertise
C) Coordinating the mapping and sequencing of model organisms
D) Promoting and coordinating the genetic and genomic study of mammals
E) Supervising the organization of genetic data into genetic maps and reference genomes

Answer: D) Promoting and coordinating the genetic and genomic study of mammals


../data/wikipedia/i.parquet:  77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 27/35 [03:16<01:00,  7.59s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/i.parquet:  80%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which organization has been the flagship conference of the APBioNet?

A. Thailand National Center for Genetic Engineering and Biotechnology (BIOTEC)
B. Asia Pacific Bioinformatics Network (APBioNet)
C. International Conference on Bioinformatics (InCoB)
D. Bioinformation journal
E. BMC Bioinformatics

Answer: C. International Conference on Bioinformatics (InCoB)


../data/wikipedia/i.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [04:04<00:00,  6.98s/it]
../data/wikipedia/j.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [03:27<00:00,  6.90s/it]
../data/wikipedia/k.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [02:21<00:00,  8.30s/it]
../data/wikipedia/l.parquet:   9%|██████████████████▍                                                                                                                       

This model's maximum context length is 16385 tokens. However, your messages resulted in 33154 tokens. Please reduce the length of the messages.
{
  "prompt": "Which organization is responsible for statistics in the United States?",
  "A": "United Nations Secretariat",
  "B": "United States Census Bureau",
  "C": "Central Bureau of Statistics",
  "D": "National Bureau of Statistics",
  "E": "Statistics Division",
  "answer": "B"
}


../data/wikipedia/l.parquet:  28%|██████████████████████████████████████████████████████████▋                                                                                                                                                      | 16/57 [01:28<03:31,  5.17s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 20352 tokens (16352 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "When was the Lincolnshire Naturalists' Union founded?",
    "A": "1893",
    "B": "1894",
    "C": "1895",
    "D": "1896",
    "E": "1897",
    "answer": "A"
  }
]


../data/wikipedia/l.parquet:  35%|█████████████████████████████████████████████████████████████████████████▎                                                                                                                                       | 20/57 [01:46<03:12,  5.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, your messages resulted in 33591 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is the largest league victory for Austin FC?",
    "A": "5-0 v FC Cincinnati",
    "B": "0-4 @ San Jose Earthquakes",
    "C": "2-1 @ FC Dallas",
    "D": "0-3 @ LAFC",
    "E": "2-0 v Violette AC",
    "answer": "A"
  }
]


../data/wikipedia/l.parquet:  53%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                   | 30/57 [04:17<13:40, 30.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/l.parquet:  54%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true based on the given context?

A) There were a total of 94 AFL debuts in 2017.
B) There were a total of 50 players who changed clubs in 2017.
C) The youngest player to debut in 2017 was 18 years and 255 days old.
D) There were a total of 23 players who were traded in 2016.
E) There were a total of 15 players who were free agents in 2016.

Answer: A) There were a total of 94 AFL debuts in 2017.


../data/wikipedia/l.parquet:  67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                     | 38/57 [05:22<02:48,  8.89s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 16576 tokens (12576 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Laura Mersini-Houghton is a proponent of which hypothesis?",
    "A": "Quantum mechanics",
    "B": "Multiverse",
    "C": "Black hole formation",
    "D": "Gravitational dynamics",
    "E": "Hawking radiation",
    "answer": "B"
  }
]


../data/wikipedia/l.parquet:  72%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                          | 41/57 [05:34<01:38,  6.13s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/l.parquet:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements about the La Tène culture is true?

A) The La Tène culture developed and flourished during the late Iron Age.
B) The La Tène culture succeeded the Hallstatt culture without any cultural break.
C) The La Tène culture was influenced by Mediterranean cultures.
D) The La Tène culture was present in France, Belgium, Switzerland, Austria, England, and other regions.
E) All of the above.

Answer: E) All of the above.


../data/wikipedia/l.parquet:  89%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                      | 51/57 [06:35<00:36,  6.01s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/l.parquet:  91%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which player holds the current world record for the most goals scored in an international football match?

A) Archie Thompson
B) David Zdrilic
C) Shokhan Nooraldin Salihi
D) Hacène Lalmas
E) Malika-e-Noor

Answer: C) Shokhan Nooraldin Salihi


../data/wikipedia/l.parquet:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎              | 53/57 [06:45<00:22,  5.64s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/l.parquet:  95%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT a use case for lattice models in finance?

A. Valuing American options
B. Valuing European options
C. Valuing interest rate derivatives
D. Valuing exotic options
E. Valuing equity options

Answer: B. Valuing European options


../data/wikipedia/l.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [07:09<00:00,  7.54s/it]
../data/wikipedia/m.parquet:   9%|██████████████████▊                                                                                                                                                                                               | 6/67 [00:39<06:37,  6.52s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 4 column 79 (char 179)
{
  "prompt": "Which of the following statements about matter collineation is true?",
  "choices": [
    "A. A matter collineation is a vector field that satisfies the condition, \(\mathcal{L}_X T_{ab}=0\)",
    "B. A matter collineation is a vector field that preserves the metric",
    "C. A matter collineation is a vector field that preserves the electric and magnetic fields",
    "D. A matter collineation is a vector field that preserves the energy density, pressure, and fluid flow vector field",
    "E. A matter collineation is a vector field that satisfies the Einstein field equations (EFE)"
  ],
  "answer": "A"
}


../data/wikipedia/m.parquet:  25%|█████████████████████████████████████████████████████                                                                                                                                                            | 17/67 [02:19<06:51,  8.22s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/m.parquet:  27%|████████████████████████████████████████████████████████▏                                               

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about Metamath?

A. Metamath is a programming language used for formalizing mathematical proofs.
B. Metamath is a database of proved theorems in various branches of mathematics.
C. Metamath is a proof checker program written in C.
D. Metamath is a formal language used for archiving, verifying, and studying mathematical proofs.
E. Metamath is a theorem prover program used for interactive browsing of formalized theorems.

Answer: D. Metamath is a formal language used for archiving, verifying, and studying mathematical proofs.


../data/wikipedia/m.parquet:  54%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                | 36/67 [04:25<03:36,  6.98s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[
  {
    "prompt": "Mei-Cheng Wang earned a bachelor's degree in mathematics from which university?",
    "choices": {
      "A": "Johns Hopkins University",
      "B": "University of California, Berkeley",
      "C": "National Tsing Hua University",
      "D": "Stanford University",
      "E": "Harvard University"
    },
    "answer": "C"
  }
]


../data/wikipedia/m.parquet:  64%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                          | 43/67 [10:17<06:52, 17.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/m.parquet:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT an advantage of using mixed oxidant solution for water disinfection compared to other methods?

A) Higher disinfecting power
B) Stable residual chlorine in water
C) Improved taste and smell
D) Elimination of biofilm
E) Lower cost of production


../data/wikipedia/m.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [12:31<00:00, 11.22s/it]
../data/wikipedia/n.parquet:   6%|█████████████▏                                                                                                                                                                                                    | 2/32 [00:12<03:08,  6.28s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following statements about Nordström's theory of gravitation is true?

A. Nordström's theory is a predecessor of general relativity.
B. Nordström's theory is in agreement with observation and experiment.
C. Nordström's theory is a tensor theory of gravitation.
D. Nordström's theory does not consider the effects of gravitation on the geometry of spacetime.
E. Nordström's theory is a self-consistent relativistic theory of gravitation.


../data/wikipedia/n.parquet:  28%|███████████████████████████████████████████████████████████                                                                                                                                                       | 9/32 [00:50<01:55,  5.04s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 17842 tokens (13842 in the messages, 4000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "Which period does the Nanaimo Formation preserve fossils from?",
    "A": "Jurassic",
    "B": "Cretaceous",
    "C": "Triassic",
    "D": "Permian",
    "E": "Cambrian",
    "answer": "B"
  }
]


../data/wikipedia/n.parquet:  59%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                     | 19/32 [01:46<01:19,  6.11s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/n.parquet:  62%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Prompt: Which of the following is NOT a team level statistic in network science based basketball analytics?

A) Team entropy
B) Uphill downhill flux
C) Success/Failure Ratio
D) Team clustering coefficient
E) Average path length

Answer: C) Success/Failure Ratio


../data/wikipedia/n.parquet:  72%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                          | 23/32 [02:18<01:07,  7.49s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/n.parquet:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements accurately describes the "no free lunch" theorem?

A. The theorem states that there is no universal optimization strategy that outperforms all other strategies on every possible problem.
B. The theorem states that all search algorithms are equivalent when their performance is averaged across all possible problems.
C. The theorem states that the computational cost of finding a solution is the same for all solution methods.
D. The theorem states that there is no advantage to specializing an algorithm to a specific problem.
E. The theorem states that the performance of search algorithms depends on the randomness of the objective function.


../data/wikipedia/n.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [03:06<00:00,  5.83s/it]
../data/wikipedia/number.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:33<00:00,  6.66s/it]
../data/wikipedia/o.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [01:52<00:00,  6.27s/it]
../data/wikipedia/other.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Question:
Which of the following is the main goal of the Protein Common Interface Database (ProtCID)?

A) To identify and cluster homodimeric and heterodimeric interfaces observed in crystal structures of homologous proteins.
B) To provide PyMol scripts for each cluster to produce similar images.
C) To compare homodimeric interfaces in all crystals that contain particular domain or chain architectures.
D) To report the number of crystal forms that contain a common interface.
E) To provide an independent check on publicly available annotations of biological interactions for PDB entries.

Answer: A) To identify and cluster homodimeric and heterodimeric interfaces observed in crystal structures of homologous proteins.


../data/wikipedia/p.parquet:  43%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 27/63 [03:25<03:50,  6.40s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/p.parquet:  44%|████████████████████████████████████████████████████████████████████████████████████████████▉           

Expecting value: line 1 column 1 (char 0)
Which of the following statements about persistent carbenes is true?

A. Persistent carbenes are highly reactive and cannot be isolated as pure substances.
B. The stability of persistent carbenes is solely due to steric hindrance by bulky groups.
C. Triplet state carbenes have longer half-lives compared to singlet state carbenes.
D. Persistent carbenes can be prepared by deprotonation of precursor salts with strong bases.
E. Stable carbenes are not suitable for use as ligands in organometallic chemistry.


../data/wikipedia/p.parquet:  71%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 45/63 [05:32<02:00,  6.71s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/p.parquet:  73%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements about polyploidy is true?

A. Polyploidy is a condition in which the cells of an organism have more than one pair of chromosomes.
B. Polyploidy is only common in animals, not in plants.
C. Polyploidy can only occur due to abnormal cell division during mitosis.
D. Polyploidy is a rare occurrence in humans.
E. Polyploidy is a result of fusion of reduced gametes during meiosis.


../data/wikipedia/p.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [07:27<00:00,  7.11s/it]
../data/wikipedia/q.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:32<00:00,  6.41s/it]
../data/wikipedia/r.parquet:  33%|█████████████████████████████████████████████████████████████████████▋                                                                                                                                           | 14/42 [01:29<02:38,  5.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/

Expecting value: line 1 column 1 (char 0)
Which of the following statements accurately describes the Rhenohercynian Zone?

A) It is a fold belt formed during the Hercynian orogeny.
B) It consists of folded and thrust sedimentary rocks.
C) It was deposited in a back-arc basin along the southern margin of Laurussia.
D) It extends from Cornwall and Ireland in the west to the Harz mountains in central Germany.
E) All of the above.

Answer: E) All of the above.


../data/wikipedia/r.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 42/42 [04:15<00:00,  6.09s/it]
../data/wikipedia/s.parquet:  43%|█████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 33/77 [03:26<04:39,  6.35s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
   

Expecting value: line 1 column 1 (char 0)
Which of the following markers is NOT used by the SGM Plus DNA profiling system?

A) D2S1338
B) D3S1358
C) D16S539
D) D19S433
E) D21S11

Answer: A) D2S1338


../data/wikipedia/s.parquet:  58%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                      | 45/77 [05:02<03:58,  7.45s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/s.parquet:  60%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following statements about the special unitary group is true?

A. The special unitary group is a subgroup of the unitary group.
B. The special unitary group is a complex Lie group.
C. The special unitary group is isomorphic to the group of quaternions of norm 1.
D. The special unitary group is a simply-connected Lie group.
E. The special unitary group is a subgroup of the general linear group.


../data/wikipedia/s.parquet:  68%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                   | 52/77 [05:41<02:18,  5.53s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/s.parquet:  69%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Which of the following fields has Salvatore Torquato made significant contributions to?

A) Physics
B) Chemistry
C) Mathematics
D) Materials Science
E) All of the above

Answer: E) All of the above


../data/wikipedia/s.parquet:  74%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 57/77 [06:08<01:56,  5.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, your messages resulted in 23368 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What does the SWEAT hypothesis propose?",
    "A": "The Southwestern United States was once connected to East Antarctica.",
    "B": "The Southwestern United States was once connected to Australia.",
    "C": "The Southwestern United States was once connected to India.",
    "D": "The Southwestern United States was once connected to Canada.",
    "E": "The Southwestern United States was never connected to any other landmass.",
    "answer": "A"
  }
]


../data/wikipedia/s.parquet:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                        | 62/77 [06:33<01:29,  5.94s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, your messages resulted in 29785 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "What is a stenotherm?",
    "A": "An organism that can function at a wide range of different body temperatures",
    "B": "An organism that can only live or survive within a narrow temperature range",
    "C": "An organism that lives in deep sea environments",
    "D": "An organism that lives in polar regions",
    "E": "An organism that has a stable internal temperature",
    "answer": "B"
  }
]


../data/wikipedia/s.parquet:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊        | 74/77 [07:42<00:17,  5.82s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/s.parquet:  97%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Prompt: Which of the following statements about Sphenacodon is true?

A) Sphenacodon had a tall dorsal sail similar to Dimetrodon.
B) Sphenacodon is known from New Mexico and the Utah–Arizona border region.
C) Sphenacodon ferox is larger in overall size compared to Sphenacodon ferocior.
D) Sphenacodon and Dimetrodon have the same type of neural spines along their back.
E) Sphenacodon is classified as a therapsid.

Answer: B) Sphenacodon is known from New Mexico and the Utah–Arizona border region.


../data/wikipedia/s.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [08:01<00:00,  6.25s/it]
../data/wikipedia/t.parquet:   4%|█████████▎                                                                                                                                                                                                        | 2/45 [00:09<03:30,  4.90s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/ope

The server is overloaded or not ready yet.
[
  {
    "prompt": "Who introduced the three-domain system of biological classification?",
    "A": "Carl Woese",
    "B": "Otto Kandler",
    "C": "Mark Wheelis",
    "D": "Salvador Luria",
    "E": "Ernst Mayr",
    "answer": "A"
  }
]


../data/wikipedia/t.parquet:  20%|██████████████████████████████████████████                                                                                                                                                                        | 9/45 [02:36<05:51,  9.75s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/t.parquet:  22%|██████████████████████████████████████████████▍                                                         

Expecting value: line 1 column 1 (char 0)
Which of the following statements accurately describes the book "The Genesis Flood: The Biblical Record and its Scientific Implications"?

A) The book argues that the age of the Earth is well over 20 million years.
B) The book defends the theory of evolution and criticizes young Earth creationism.
C) The book was published in the late nineteenth century and had a significant impact on Christian views of the Flood.
D) The book received positive reviews from mainstream scientists and geologists.
E) The book was written by John C. Whitcomb and Henry M. Morris and elevated young Earth creationism to a position of fundamentalist orthodoxy.

Answer: E) The book was written by John C. Whitcomb and Henry M. Morris and elevated young Earth creationism to a position of fundamentalist orthodoxy.


../data/wikipedia/t.parquet:  31%|█████████████████████████████████████████████████████████████████                                                                                                                                                | 14/45 [04:00<05:39, 10.94s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/t.parquet:  33%|█████████████████████████████████████████████████████████████████████▋                                  

Expecting value: line 1 column 1 (char 0)
Which experiment directly observed the transverse Doppler effect and time dilation for the first time?

A) Michelson-Morley experiment
B) Kennedy-Thorndike experiment
C) Ives-Stilwell experiment
D) Hughes-Drever experiment
E) Sagnac effect


../data/wikipedia/t.parquet:  44%|████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                    | 20/45 [04:35<02:45,  6.61s/it]

Expecting value: line 1 column 1 (char 0)
Which of the following is NOT one of Tinbergen's four questions?

A) Function (adaptation)
B) Phylogeny (evolution)
C) Mechanism (causation)
D) Ontogeny (development)
E) Ecology (environment)


Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/t.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [06:59<00:00,  9.33s/it]
../data/wikipedia/u.parquet:  12%|██████████████████████████▍                                                                            

Expecting value: line 1 column 1 (char 0)
Which player has the most caps for the Uruguay national football team?

A) Diego Godín
B) Luis Suárez
C) Edinson Cavani
D) Fernando Muslera
E) Maxi Pereira

Answer: A) Diego Godín


../data/wikipedia/u.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:46<00:00,  5.81s/it]
../data/wikipedia/v.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [01:31<00:00,  6.12s/it]
../data/wikipedia/w.parquet:  10%|█████████████████████                                                                                                                                                                                             | 2/20 [00:10<01:36,  5.36s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 38, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_29328/1456674105.p

This model's maximum context length is 16385 tokens. However, your messages resulted in 19129 tokens. Please reduce the length of the messages.
[
  {
    "prompt": "Who is William Kruskal?",
    "A": "An American mathematician and statistician",
    "B": "A British physicist",
    "C": "A German chemist",
    "D": "A French biologist",
    "E": "A Russian engineer",
    "answer": "A"
  }
]


../data/wikipedia/w.parquet:  85%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                               | 17/20 [01:29<00:16,  5.59s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
../data/wikipedia/w.parquet:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting value: line 1 column 1 (char 0)
Prompt: Who delivered the Witherby Memorial Lecture in 2019?

A) Arthur Landsborough Thomson
B) David Lack
C) H. N. Southern
D) Bob Furness
E) Claire Spottiswoode

Answer: D) Bob Furness


../data/wikipedia/w.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [01:42<00:00,  5.12s/it]
../data/wikipedia/x.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.74s/it]
../data/wikipedia/y.parquet:  75%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                    | 3/4 [00:15<00:05,  5.25s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_29328/324541624.py", line 39, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/

Expecting value: line 1 column 1 (char 0)
Which of the following statements is true about the Yukawa Institute for Theoretical Physics?

A) The institute was founded in 1944 by Yoshitaka Mimura.
B) The institute was named after the first Japanese citizen to receive the Nobel Prize in Physics.
C) The institute is located in Hiroshima, Japan.
D) The institute's research areas include non-equilibrium statistical physics and non-linear physics.
E) The institute has had 13 academic positions since its inception in 1952.

Answer: B



../data/wikipedia/z.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:18<00:00,  6.21s/it]


In [15]:
 df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

In [16]:

df_texts["answer"].value_counts()

A                                                                                              435
B                                                                                              167
C                                                                                              144
D                                                                                               93
E                                                                                               84
Arturo Zychlinsky                                                                                3
It must have been published within the past 6 calendar years of the year of its nomination.      2
James Ivory                                                                                      2
Discovery of the autoimmune regulator and the AIRE gene (1997)                                   2
A and B                                                                                          1
Theodore C