In [1]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [3]:
def query_prompt(prompt, max_tokens=1000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [4]:
import random

In [5]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            for word in [
                "geology",
                "physics",
                "chemistry",
                "mathematical",
                "biology",
                "astronomy",
                "ecology",
                "genetics",
                "statistics",
                "theoretical"
            ]:
                if word.lower() in cat.lower():
                    return True
            
        return False
    
    def text_preprocess(text):
        return text.replace("====", "\n\n").replace("===", "\n\n").replace("==", "\n\n")

    def sep_n(text, n=40):
        try:
            text = text.split(".")
            start_index = random.randint(0, len(text) - n)
            return ".".join(text[start_index:start_index+n])
        except:
            return None
    
    df = pd.read_parquet(fname)
    df_science = df[df["category"].apply(f)]
    df_science["text"] = df_science["text"].apply(text_preprocess)
    df_science["text"] = df_science["text"].apply(sep_n)
    df_science = df_science[df_science["text"].notnull()]
    return df_science.sample(len(df_science)//5)

In [6]:
files = glob.glob("../data/wikipedia_fixed/*.parquet")

In [7]:
import time

In [8]:
import pickle

In [9]:
import json

In [10]:
from datetime import datetime as dt
import os

In [11]:
texts = []

In [25]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
You are a professor at a science university and are creating a test for your students.
Please create one exam based on the provided text, where you choose the most accurately statement from five options for the question.
The output should json format below:
{{"prompt": <the question text>, "answer" <answer(one of A through E)>, "A": <option A>, "B": <option B>, "C": <option C>, "D": <option D>, "E": <option E>}}

Context:
{series['text']}

Attention:
- The five opinions should be LONG sentences.

"""
    return prompt

def f(series):
    try:
        if series["A"] != series["A"]:
            if type(series["answer"]) == dict:
                for key in ["A", "B", "C", "D", "E"]:
                    series[key] = series["choices"][key]
            elif type(series["answer"] == list):
                for i, key in enumerate(["A", "B", "C", "D", "E"]):
                    series[key] = series["choices"][i]
    except:
        return series
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

first = True
for file in files:
    if os.path.basename(file) in ["all.parquet"]:
        print(f"pass: {file}")
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            if first:
                print(texts_json)
                first = False
            if type(texts_json) == dict:
                texts_json["wiki_id"] = series["id"]
                texts_json["original_text"] = series["text"]
                texts.append(texts_json)
                for col in ["A", "B", "C", "D", "E", "answer", "prompt"]:
                    if col not in texts_json:
                        print(f"{col} not existed: {texts_json}")
            else:
                for text_json in texts_json:
                    text_json["wiki_id"] = series["id"]
                    text_json["original_text"] = series["text"]
                    texts.append(text_json)
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
            time.sleep(10)
        if i % 20 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_parquet(f"output_gpt3.5_generate/{now_date}.parquet")

../data/wikipedia_fixed/a.parquet:   0%|▏                                                                                 | 1/388 [00:05<35:05,  5.44s/it]

{'prompt': "What is the title of Alan Tutton Johns' PhD thesis?", 'answer': 'C', 'A': 'The mechanism of propionic acid formation in fermentation', 'B': 'Advanced study in U.S.', 'C': 'The mechanism of propionic acid formation in fermentation with special reference to the rumen of the sheep', 'D': 'Honorary doctorate citation, Alan Johns, 1985', 'E': 'No. 47237'}


../data/wikipedia_fixed/a.parquet:   4%|███▌                                                                             | 17/388 [02:34<54:38,  8.84s/it]

Invalid \escape: line 1 column 72 (char 71)
{"prompt": "Which of the following statements is true about the Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets?", 
"answer": "C", 
"A": "Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets are closed sets in the usual topology on Baire space or Cantor space.", 
"B": "Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets are effectively open sets in the usual topology on Baire space or Cantor space.", 
"C": "Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets can be defined in the language of Peano arithmetic by a Σ 1 0 {\displaystyle \Sigma _{1}^{0}} formula.", 
"D": "Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets are subsets of Baire space or Cantor space that are computably enumerable.", 
"E": "Σ 1 0 {\displaystyle \Sigma _{1}^{0}} sets are sometimes called effectively closed sets."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 72 (char 71)
../data/wikipedia_fixed/a.parquet:  10%|████████▎                                                                        | 40/388 [05:59<50:43,  8.75s/it]

Extra data: line 1 column 677 (char 676)
{"prompt": "What is the Shuey equation used for?", "answer": "C", "A": "To determine the amplitudes of reflected and refracted waves at a planar interface for an incident P-wave", "B": "To calculate the reflection coefficient at normal incidence and describe the variation of reflection amplitudes at intermediate offsets", "C": "To describe the behavior of reflection amplitudes at large angles/far offsets and the variation of reflection amplitudes with offset", "D": "To determine the effects of density and P- or S-wave velocity variations on the reflection amplitudes", "E": "To calculate the intersect and gradient for every time sample in every Common Midpoint Gather"},


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 340, in decode
    raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 1 column 677 (char 676)
../data/wikipedia_fixed/a.parquet:  25%|████████████████████▍                                                            | 98/388 [15:21<37:53,  7.84s/it]

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which statement accurately describes the conservation of angular momentum?", "answer": "C", "A": "The conservation of angular momentum is only applicable to planetary systems.", "B": "The conservation of angular momentum is a result of Noether's theorem.", "C": "The conservation of angular momentum is associated with rotational invariance.", "D": "The conservation of angular momentum is a direct consequence of Newton's second law.", "E": "The conservation of angular momentum is not applicable in quantum mechanics due to particle spin."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which statement accurately describes the role of tRNA synthetases?", "answer": "C", "A": "tRNA synthetases are enzymes that catalyze the attachment of amino acids to tRNA molecules.", "B": "tRNA synthetases are responsible for the hydrolysis of pyrophosphate during DNA synthesis.", "C": "Proper balance of tRNA and aminoacyl-tRNA synthetase is crucial for accurate aminoacylation in vivo.", "D": "tRNA synthetases play a role in prebiotic peptide bond formation through amino acid phosphorylation.", "E": "tRNA synthetases are involved in the translation process by facilitating the synthesis of tRNA molecules."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, your messages resulted in 5641 tokens. Please reduce the length of the messages.
{"prompt": "What is the typical off-peak service in trains per hour at the station?", "answer": "A", "A": "The typical off-peak service at the station consists of 4 trains per hour to Highbury & Islington via Surrey Quays and 4 trains per hour to West Croydon.", "B": "The typical off-peak service at the station consists of 2 trains per hour to Highbury & Islington via Surrey Quays and 2 trains per hour to West Croydon.", "C": "The typical off-peak service at the station consists of 1 train per hour to Highbury & Islington via Surrey Quays and 1 train per hour to West Croydon.", "D": "The typical off-peak service at the station consists of 3 trains per hour to Highbury & Islington via Surrey Quays and 3 trains per hour to West Croydon.", "E": "The typical off-peak service at the station consists of 5 trains per hour to Highbury & Islington via Sur

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Invalid \escape: line 1 column 302 (char 301)
{"prompt": "Which of the following statements is the most accurate based on the provided text?", "answer": "C", "A": "The Iwasawa Main Conjectures for GL2 is a paper published in Inventiones Mathematicae in 2014.", "B": "The Birch and Swinnerton-Dyer conjecture is true for most elliptic curves over $\mathbb Q$.", "C": "A majority of elliptic curves over $\mathbb Q$ satisfy the Birch and Swinnerton-Dyer conjecture.", "D": "The BSD conjecture is true for most elliptic curves.", "E": "The Iwasawa Main Conjectures for GL2 is a paper published in 2013."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 302 (char 301)
../data/wikipedia_fixed/c.parquet:   6%|████▊                                                                          | 28/455 [04:49<1:05:28,  9.20s/it]

This model's maximum context length is 4097 tokens. However, your messages resulted in 15626 tokens. Please reduce the length of the messages.
{"prompt": "When were cetaceans first recognized as mammals?", "answer": "B", "A": "Cetaceans were recognized as mammals in the 16th century by Rondelet.", "B": "Cetaceans were recognized as mammals in 1758 by Carl Linnaeus.", "C": "Cetaceans were recognized as mammals in the 20th century by whalers.", "D": "Cetaceans were recognized as mammals in the 1980s by the Tethys Institute of Milan.", "E": "Cetaceans were recognized as mammals in the 1960s by dedicated research institutes."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4242 tokens (3242 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Who served as the editor-in-chief of the journal from 2012 to 2022?", "answer": "D", "A": "Josephine A. Morello served as the founding editor of the journal in 1988.", "B": "Betty Ann Forbes was appointed as the editor-in-chief of the journal in 1997.", "C": "Irving Nachamkin served as the editor-in-chief of the journal from 2002 to 2012.", "D": "Jo-Anne H. Young served as the editor-in-chief of the journal from 2012 to 2022.", "E": "Graeme Forrest is the current editor-in-chief of the journal."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4894 tokens (3894 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements is true about the class numbers of imaginary quadratic number fields?", "answer": "C", "A": "The class numbers of imaginary quadratic number fields are all equal to 1.", "B": "The class numbers of imaginary quadratic number fields are all equal to 2.", "C": "The class numbers of imaginary quadratic number fields can vary and are not limited to 1 or 2.", "D": "The class numbers of imaginary quadratic number fields are all greater than 2.", "E": "The class numbers of imaginary quadratic number fields are all less than or equal to 1."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4734 tokens (3734 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "When was Devil's Slide formed?", "answer": "C", "A": "Devil's Slide was formed by a draining sea that poured down the center of the Slide.", "B": "Devil's Slide was formed by the erosion of limestone layers over millions of years.", "C": "Utah Geologist believe the Slide was formed 170 to 180 million years ago by a draining sea that poured down the center of the Slide.", "D": "Devil's Slide was formed during the Great Depression when the mining company shut down operations.", "E": "Devil's Slide was formed when God threw Lucifer out of heaven and he slid down the mountainside to hell along the route of Devil's Slide."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4703 tokens. Please reduce the length of the messages.
{"prompt": "Which statement accurately describes the relationship between DNA supercoiling and chromosome condensation?", "answer": "C", "A": "DNA supercoiling is not involved in chromosome condensation.", "B": "Chromosome condensation is solely dependent on ATP.", "C": "DNA supercoiling plays an active role in chromosome condensation.", "D": "TADs (Topologically Associating Domains) are not affected by DNA supercoiling.", "E": "Chromosome condensation is primarily regulated by the sequence of DNA."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 5092 tokens (4092 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What is the purpose of Dalton Discussions?", "answer": "C", "A": "To publish papers associated with oral presentations at scientific meetings", "B": "To provide a forum for the exchange of views and newly acquired results in inorganic chemistry", "C": "To serve as a permanent record of scientific meetings in the form of a special issue of a journal", "D": "To hold annual meetings for the inorganic chemistry community", "E": "To discuss important publications in inorganic chemistry"}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4109 tokens (3109 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements accurately describes the Dirac operator?", "answer": "C", "A": "The Dirac operator is a differential operator that acts on sections of a spinor bundle and is defined locally using a local orthonormal basis for the tangent space of a spin manifold.", "B": "The Dirac operator is a self-adjoint operator that acts on a four-component wave function in the Sobolev space of smooth, square-integrable functions.", "C": "The Dirac operator is a differential operator that describes the propagation of a free fermion in three dimensions and is written in the form D = c α → ⋅ ( − i ℏ ∇ x ) + m c 2 β.", "D": "The Dirac operator is a differential operator that arises in Clifford analysis and is defined as D = ∑ j = 1 n e j ∂ ∂ x j, where {ej: j = 1, ..., n} is an ortho

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 1 column 700 (char 699)
{"prompt": "Which of the following statements is the most accurate?", "answer": "C", "A": "Dialetheism is a philosophical position that allows for true contradictions and challenges the principle of non-contradiction.", "B": "Logical consequence is the relationship between statements where one statement follows logically from another.", "C": "The Law of Non-Contradiction is a fundamental principle of classical logic that states that a statement cannot be both true and false at the same time.", "D": "Paraconsistent set theory is a mathematical framework that allows for contradictions without leading to inconsistency.", "E": "Nagarjuna was an Indian philosopher who argued for the concept of "emptiness" and the idea that all things are ultimately empty of inherent existence."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 700 (char 699)
../data/wikipedia_fixed/d.parquet: 100%|████████████████████████████████████████████████████████████████████████████████| 260/260 [30:54<00:00,  7.13s/it]
../data/wikipedia_fixed/e.parquet:  10%|████████▏                                                                        | 29/288 [03:49<30:05,  6.97s/it]

This model's maximum context length is 4097 tokens. However, you requested 4812 tokens (3812 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What is the reason behind Erasmus Montanus's arguments with everyone he encounters?", "answer": "C", "A": "He wants to prove absurdities and rely on arguments from ignorance.", "B": "He is jealous of other scholars and wants to assert his superiority.", "C": "He insists on being referred to by his Latinised name and enjoys disputing.", "D": "He is trying to impress his fiancée's parents and gain their approval.", "E": "He is trying to avoid military service by enlisting in the clergy."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4747 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements accurately describes Elliott Lieb?", "answer": "C", "A": "Elliott Lieb received the ESI Medal in 2022.", "B": "Elliott Lieb was a recipient of the 2022 APS Medal for Exceptional Achievement in Research.", "C": "Elliott Lieb is a renowned mathematician and physicist who has received several prestigious awards, including the ESI Medal, the 2022 APS Medal for Exceptional Achievement in Research, and the Gauss Prize.", "D": "Elliott Lieb was a past president of the International Association of Mathematical Physics.", "E": "Elliott Lieb is a Fellow of the American Mathematical Society and a member of the Royal Society."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4656 tokens (3656 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which statement accurately describes the history of vaccines?", "answer": "C", "A": "Vaccines were first developed in the 21st century by the Committee on Emerging Microbial Threats to Health.", "B": "The World Economic Forum played a significant role in the development of vaccines.", "C": "Vaccines have a long history and have played a crucial role in changing the world by preventing and controlling epidemics.", "D": "The World Health Organization (WHO) has recently published a report on the history of vaccines.", "E": "The emergence of COVID-19 has led to a decline in the use of vaccines worldwide."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Invalid \escape: line 1 column 339 (char 338)
{"prompt": "Which of the following statements accurately describes the research conducted by Emil J. Straube?", "answer": "C", "A": "Emil J. Straube's research focuses on Sobolev estimates for the complex Green operator on weakly pseudoconvex boundaries.", "B": "Emil J. Straube's research primarily deals with the regularity of the ∂ ¯ {\displaystyle {\overline {\partial }}} -Neumann problem.", "C": "Emil J. Straube has conducted research on various topics including Sobolev estimates, regularity of the ∂ ¯ {\displaystyle {\overline {\partial }}} -Neumann problem, and semi-classical analysis of Schrödinger operators.", "D": "Emil J. Straube's research is mainly centered around Levi foliations in pseudoconvex boundaries and vector fields that commute approximately with ∂ ¯ {\displaystyle {\overline {\partial }}} .", "E": "Emil J. Straube's research primarily focuses on a sufficient condition for global regularity of the ∂ ¯ {\displaystyle {\ov

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 339 (char 338)
../data/wikipedia_fixed/e.parquet:  68%|██████████████████████████████████████████████████████▋                         | 197/288 [25:25<11:35,  7.64s/it]

This model's maximum context length is 4097 tokens. However, you requested 4511 tokens (3511 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements is the most accurate based on the provided text?", "answer": "C", "A": "Chess ratings are solely determined by a player's performance in official tournaments.", "B": "The Nunn Plan is a strategy developed by Viswanathan Anand for the World Chess Championship.", "C": "Rating inflation in chess is a recognized issue that has been discussed and analyzed by experts.", "D": "Planeswalker Points is a system introduced to track and reward players' performance in chess tournaments.", "E": "Intrinsic chess ratings are a new concept proposed in a research paper published in the AAAI Conference on Artificial Intelligence."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4703 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements accurately describes coevolution?", "answer": "C", "A": "Coevolution is the process by which two or more species reciprocally affect each other's evolution through natural selection.", "B": "Coevolution occurs when two species have a mutualistic relationship and both benefit from each other's presence.", "C": "Coevolution is a dynamic process in which the interactions between species lead to reciprocal evolutionary changes.", "D": "Coevolution is a rare phenomenon that only occurs in highly specialized ecological niches.", "E": "Coevolution is a static process in which species remain unchanged over time."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following statements accurately describes the central limit theorem?", "answer": "C", "A": "The central limit theorem states that the sum of a large number of independent and identically distributed random variables will be approximately normally distributed.", "B": "The central limit theorem states that the mean of a large number of independent and identically distributed random variables will be approximately normally distributed.", "C": "The central limit theorem states that the distribution of the sum (or average) of a large number of independent and identically distributed random variables will be approximately normally distributed, regardless of the shape of the original distribution.", "D": "The central limit theorem states that the distribution of the maximum or minimum of a large number of independent and identically distributed

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, you requested 4247 tokens (3247 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements accurately describes the use of high performance liquid chromatography (HPLC) in pharmaceutical analyses?", "answer": "C", "A": "High performance liquid chromatography (HPLC) is a widely used technique in pharmaceutical analyses due to its ability to separate and quantify complex mixtures of compounds.", "B": "HPLC is a technique that is primarily used in forensic toxicology for the analysis of drugs and their metabolites.", "C": "HPLC is a powerful analytical technique used in pharmaceutical analyses to separate, identify, and quantify drug compounds in complex mixtures.", "D": "HPLC is a relatively new technique that has limited applications in pharmaceutical analyses.", "E": "HPLC is a low sensitivity technique that is not suitable for the analysis o

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "What was one of the main focuses of the professor's studies?", "answer": "C", "A": "He was primarily focused on the synthesis of cluster compounds with bonds between different transition elements.", "B": "His main area of research was the study of boron hydrides.", "C": "Among the many foci of his studies were complexes of fluorocarbon, isocyanide, polyolefin, alkylidene and alkylidyne ligands.", "D": "He dedicated his research to the discovery of versatile reagents for the synthesis of organometallic compounds.", "E": "His research primarily revolved around the investigation of carbon-metal and metal-metal multiple bonds."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, your messages resulted in 4102 tokens. Please reduce the length of the messages.
{"prompt": "What was the purpose of Carl Linnaeus's book Fundamenta Botanica?", "answer": "C", "A": "Fundamenta Botanica was a book dedicated to famous botanists and aimed to honor their contributions to the field.", "B": "Fundamenta Botanica was a book that outlined Linnaeus's ideas for the reformation of botanical taxonomy.", "C": "Fundamenta Botanica laid the foundations for Linnaeus's system of nomenclature, classification, and botanical terminology.", "D": "Fundamenta Botanica was a book that focused on the description and classification of different plant species.", "E": "Fundamenta Botanica was a book that discussed the forces and variations within the plant kingdom."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 1 column 129 (char 128)
{"prompt": "Which of the following statements accurately reflects the content of the provided text?", "answer": "C", "A": "The "One Health" initiative focuses on the relationship between chronic infection and autoimmune diseases.", "B": "The study discussed in the text examines the link between Mycobacterium avium subspecies paratuberculosis and type-1 diabetes mellitus.", "C": "The text discusses the importance of research on zoonoses in the context of the "One Health" initiative.", "D": "The study mentioned in the text argues against the need for more microbial genomes.", "E": "The text provides an overview of the Functional Molecular Infection Epidemiology research program."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 129 (char 128)
../data/wikipedia_fixed/f.parquet: 100%|████████████████████████████████████████████████████████████████████████████████| 179/179 [27:57<00:00,  9.37s/it]
../data/wikipedia_fixed/g.parquet:   9%|███████▎                                                                         | 30/330 [04:10<38:40,  7.73s/it]

Expecting ',' delimiter: line 1 column 193 (char 192)
{"prompt": "Which source provides information on station usage statistics for London?", "answer": "C", "A": "Archived from the original on 9 November 2020. Retrieved 9 November 2020.", "B": ""Station Usage Data" (XLSX). Usage Statistics for London Stations, 2020. Transport for London. 16 April 2021. Retrieved 1 January 2022.", "C": ""Station Usage Data" (XLSX). Usage Statistics for London Stations, 2021. Transport for London. 12 July 2022. Retrieved 7 September 2022.", "D": ""Estimates of station usage". Rail statistics. Office of Rail Regulation.", "E": "Chronology of London Railways by H.V.Borley"}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 193 (char 192)
../data/wikipedia_fixed/g.parquet:   9%|███████▌                                                                         | 31/330 [04:29<54:28, 10.93s/it]

This model's maximum context length is 4097 tokens. However, your messages resulted in 4977 tokens. Please reduce the length of the messages.
{"prompt": "Which source provides information on station usage statistics for London?", "answer": "C", "A": "Archived from the original on 9 November 2020. Retrieved 9 November 2020.", "B": ""Station Usage Data" (XLSX). Usage Statistics for London Stations, 2020. Transport for London. 16 April 2021. Retrieved 1 January 2022.", "C": ""Station Usage Data" (XLSX). Usage Statistics for London Stations, 2021. Transport for London. 12 July 2022. Retrieved 7 September 2022.", "D": ""Estimates of station usage". Rail statistics. Office of Rail Regulation.", "E": "Chronology of London Railways by H.V.Borley"}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4127 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements is the most accurate based on the provided text?", 
"answer": "C", 
"A": "The Equivalence Principle was first introduced in a paper published in 1961 by L.", 
"B": "CP violation is a phenomenon that is related to the concept of gravity.", 
"C": "'t Hooft's book, Spookrijders in de wetenschap, discusses the topic of CP violation and its connection to gravity.", 
"D": "The paper by Kowitt in 1996 explores the concept of gravitational repulsion and its relationship to Dirac antimatter.", 
"E": "The article by Chardin and Rax in 1992 presents evidence supporting the idea that CP violation is influenced by gravity."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 1 column 569 (char 568)
{"prompt": "What did Darwin comment about writing a book?", "answer": "C", "A": "Darwin believed that writing a book was a waste of time and effort.", "B": "Darwin believed that writing a book was the best way to prove one's intelligence.", "C": "Darwin sardonically commented that writing a book is a proof of earnestness and that you do not form your opinions without undergoing labor of some kind.", "D": "Darwin believed that writing a book was a way to gain recognition from other geologists.", "E": "Darwin believed that writing a book was a way to promote the "crater of elevation" theory."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 569 (char 568)
../data/wikipedia_fixed/g.parquet: 100%|████████████████████████████████████████████████████████████████████████████████| 330/330 [47:42<00:00,  8.67s/it]
../data/wikipedia_fixed/h.parquet:  76%|█████████████████████████████████████████████████████████████                   | 194/254 [26:14<07:45,  7.76s/it]

This model's maximum context length is 4097 tokens. However, your messages resulted in 4965 tokens. Please reduce the length of the messages.
{"prompt": "What is the interpretation of the steep north-dipping Haukå Fault?", "answer": "C", "A": "The Haukå Fault is a post-Devonian brittle structure that formed close to the original basin edge.", "B": "The Haukå Fault is a low-angle extensional fault known as the Hornelen Detachment.", "C": "The Haukå Fault is interpreted to increase in displacement westwards, locally cutting out the alluvial fan deposits.", "D": "The Haukå Fault is a west–southwest trending anticline and syncline parallel to the faulted margin.", "E": "The Haukå Fault is a major extensional structure with tens of km of displacement."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "What is the term coined by Jan Kaiser and Thomas Röckmann in 2008?", "answer": "B", "A": "Isotopocule analysis of biologically produced nitrous oxide in various environments", "B": "Isotopologue", "C": "Correction of mass spectrometric isotope ratio measurements for isobaric isotopologues of O2, CO, CO2, N2O and SO2", "D": "Stable Isotopes in Tree Rings", "E": "Terminology, Definitions and Properties"}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, you requested 5050 tokens (4050 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What is the purpose of using an internal standard in analytical techniques?", "answer": "C", "A": "An internal standard is used to determine the concentration of other analytes by calculating response factor.", "B": "An internal standard is used to create calibration curves that ignore the uncertainty between measurements.", "C": "An internal standard is used to mitigate uncertainty in preparatory steps and ensure accurate measurement of the analyte.", "D": "An internal standard is used to select the appropriate ionization method in liquid chromatography-mass spectrometry.", "E": "An internal standard is used to observe how the analyte and internal standard signals change with varying experimental conditions in ICP-OES."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4188 tokens (3188 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What is the purpose of the Impact Field Studies Group (IFSG)?", "answer": "C", "A": "The IFSG is a scientific organization that conducts research on impact craters and impact structures.", "B": "The IFSG is a group of researchers, professionals, and students who study impact sites and maintain the Impact Database.", "C": "The IFSG is a scientific organization that emphasizes geologic field research of suspected and confirmed impact craters and impact structures.", "D": "The IFSG is a group of researchers, professionals, and students who organize field trips to impact-related sites.", "E": "The IFSG is a scientific organization that accepts submissions of proposed new impact sites and requires submitters to do significant homework before submitting."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following statements accurately describes the Integrated Biosphere Simulator Model (IBIS)?", "answer": "C", "A": "The IBIS model was developed by Colin, Ramankutty, Levis, Pollard, Sitch, and Haxeltine.", "B": "The IBIS model is a land surface process model that does not consider vegetation dynamics.", "C": "The IBIS model is an integrated biosphere model that simulates land surface processes, terrestrial carbon balance, and vegetation dynamics.", "D": "The IBIS model is only available as Version 2.5 and cannot be accessed online.", "E": "The IBIS model was developed by Foley, Kucharik, and Polzin in 2005."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following statements accurately describes Joel L. Lebowitz?", "answer": "C", "A": "Joel L. Lebowitz is a renowned physicist who has made significant contributions to the field of statistical mechanics, particularly in the study of large deviations in nonequilibrium steady states and the rigorous analysis of Gibbs equilibrium ensembles.", "B": "Joel L. Lebowitz has been the editor-in-chief of the Journal of Statistical Physics since 1975, a prestigious position that he held until September 2018.", "C": "Joel L. Lebowitz has received numerous honors and awards for his contributions to both equilibrium and non-equilibrium statistical mechanics, including the Boltzmann Medal, the Nicholson Medal, the Delmer S. Fahrney Medal, the Henri Poincaré Prize, the Volterra Award, the Heineman Prize for Mathematical Physics, the Max Planck Medal, the G

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following journals contains an article co-authored by Jack Thorne?", "answer": "C", "A": "The Journal of the Institute of Mathematics of Jussieu contains an article co-authored by Jack Thorne.", "B": "The Journal of the American Mathematical Society contains an article co-authored by Jack Thorne.", "C": "The Research in the Mathematical Sciences contains an article co-authored by Jack Thorne.", "D": "The American Journal of Mathematics contains an article co-authored by Jack Thorne.", "E": "The Journal of the Institute of Mathematics of Jussieu and the Journal of the American Mathematical Society both contain articles co-authored by Jack Thorne."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, you requested 4424 tokens (3424 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements is true about the approximate limit of a Lebesgue-measurable function?", "answer": "C", "A": "The approximate limit of a Lebesgue-measurable function is always unique.", "B": "A Lebesgue-measurable function has an approximate limit at every point of density of its domain.", "C": "A Lebesgue-measurable function has an approximate limit at a point of density if and only if there exists a measurable subset of its domain where the restriction of the function has a limit equal to the approximate limit.", "D": "The approximate limit of a Lebesgue-measurable function is always equal to the function's value at that point of density.", "E": "A Lebesgue-measurable function is approximately continuous at every point of density of its domain."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4614 tokens (3614 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Who were some of Constantine Posse's students?", "answer": "A", "A": "Veniamin Kagan and D. D. Morduhai-Boltovskoi were among his students.", "B": "Constantine Posse had no students.", "C": "Constantine Posse's students are unknown.", "D": "Constantine Posse's students were not mentioned in the text.", "E": "Constantine Posse's students were not important."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 1 column 420 (char 419)
{"prompt": "What is one of the key aspects of the Kumeyaay civilization's cosmological belief system?", "answer": "C", "A": "The Kumeyaay civilization developed a complex system for tracking time based on astronomical observations and visual registration.", "B": "The Kumeyaay people created sand paintings and rock art to depict the movement of celestial bodies such as the sun, moon, and constellations.", "C": "The "Men in a square" rupestric painting at El Vallecito aligns with sunlight on the Fall equinox, indicating the Kumeyaay's knowledge of astronomical events.", "D": "Observation areas were constructed by the Kumeyaay to observe and record astronomical events, although many were destroyed before protective measures were implemented.", "E": "The Milky Way constellations, known as Hatotkeur or the Spine of the Sky, held cultural significance in Kumeyaay astronomy."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 420 (char 419)
../data/wikipedia_fixed/k.parquet:  89%|████████████████████████████████████████████████████████████████████████▏        | 90/101 [11:52<01:35,  8.68s/it]

Expecting ',' delimiter: line 1 column 903 (char 902)
{"prompt": "What was Kurt Otto Friedrichs' greatest contribution to applied mathematics?", "answer": "A", "A": "Kurt Otto Friedrichs' greatest contribution to applied mathematics was his work on partial differential equations, which has had a significant impact on various fields such as physics, fluid dynamics, and elasticity.", "B": "Kurt Otto Friedrichs' greatest contribution to applied mathematics was his research and writings on existence theory, numerical methods, differential operators in Hilbert space, non-linear buckling of plates, flows past wings, solitary waves, shock waves, combustion, magneto-fluid dynamical shock waves, relativistic flows, quantum field theory, perturbation of the continuous spectrum, scattering theory, and symmetric hyperbolic equations.", "C": "Kurt Otto Friedrichs' greatest contribution to applied mathematics was his collaboration with Cartan in giving a "geometrized" formulation of Newtonian gravit

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 903 (char 902)
../data/wikipedia_fixed/k.parquet: 100%|████████████████████████████████████████████████████████████████████████████████| 101/101 [13:50<00:00,  8.22s/it]
../data/wikipedia_fixed/l.parquet:  29%|███████████████████████▏                                                        | 124/427 [16:06<35:40,  7.06s/it]

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "What is one of the features of LIONsolver?", "answer": "C", "A": "LIONsolver is a software architecture that allows for problem-specific extensions and can be used as a post-processing tool for all optimization schemes.", "B": "LIONsolver won the first prize of the Michael J. Fox Foundation – Kaggle Parkinson's Data Challenge in 2013.", "C": "LIONsolver permits interactive multi-objective optimization and has a user interface for visualizing results and facilitating decision making.", "D": "LIONsolver is a reactive search and intelligent optimization tool that leverages the wisdom of the crowd.", "E": "LIONsolver is a programming by optimization tool that can be used for autonomous search."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following statements is the most accurate based on the provided text?", 
 "answer": "C", 
 "A": "Sherrardspark Wood is a Local Nature Reserve according to Natural England.", 
 "B": "Tewinbury is a Site of Special Scientific Interest according to Natural England.", 
 "C": "Therfield Heath is both a Local Nature Reserve and a Site of Special Scientific Interest according to Natural England.", 
 "D": "Thorley Wash is a Local Nature Reserve according to the Herts and Middlesex Wildlife Trust.", 
 "E": "Tring Reservoirs is a Site of Special Scientific Interest according to Natural England."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Unterminated string starting at: line 1 column 5023 (char 5022)
{"prompt": "Who is a professor at a science university?", "answer": "E", "A": "com Ebony O'Dea at AustralianFootball.com Caitlin Gould at AustralianFootball.com Isabella Shannon at AustralianFootball.com Tamara Luke at AustralianFootball.com Brooke Vernon at AustralianFootball.com Emily Bonser at AustralianFootball.com EllaWood at AustralianFootball.com Jacqueline Parry at AustralianFootball.com Mia King at AustralianFootball.com Britney Gutknecht at AustralianFootball.com Hannay Munyard at AustralianFootball.com Poppy Kelly at AustralianFootball.com Britney Gutknecht at AustralianFootball.com Emma O'Driscoll at AustralianFootball.com Montana McKinnon at AustralianFootball.com Sarah Halvorsen at AustralianFootball.com Kate Dempsey at AustralianFootball.com Samantha Johnson at AustralianFootball.com Brenna Tarrant at AustralianFootball.com Lucy Bellinger at AustralianFootball.com Nekaela Butler at AustralianFootball.com Cia

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 5023 (char 5022)
../data/wikipedia_fixed/l.parquet:  63%|██████████████████████████████████████████████████▌                             | 270/427 [44:56<35:00, 13.38s/it]

This model's maximum context length is 4097 tokens. However, your messages resulted in 4853 tokens. Please reduce the length of the messages.
{"prompt": "Which player has the most international appearances?", "answer": "C", "A": "Emmanuel Okwi has played for the national football team.", "B": "Kasun Nadika Jayasuriya is a well-known football player.", "C": "Seydou Keïta has the most international appearances.", "D": "Vedat Muriqi is a talented football player.", "E": "Jason Cunliffe has represented his country in international matches."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4645 tokens. Please reduce the length of the messages.
{"prompt": "Who holds the record for the most games played for Adelaide in the AFL Women's (AFLW)?", "answer": "A", "A": "The player who holds the record for the most games played for Adelaide in the AFL Women's (AFLW) is yet to be updated as of the end of round 3, 2023.", "B": "The player who holds the record for the most games played for Adelaide in the AFL Women's (AFLW) is currently unknown.", "C": "The player who holds the record for the most games played for Adelaide in the AFL Women's (AFLW) is listed on the Australian Football website.", "D": "The player who holds the record for the most games played for Adelaide in the AFL Women's (AFLW) is expected to be announced soon.", "E": "The player who holds the record for the most games played for Adelaide in the AFL Women's (AFLW) is not mentioned in the provided text."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 5873 tokens. Please reduce the length of the messages.
{"prompt": "When was the transfer of Darwin Nunez communicated to CMVM?", "answer": "D", "A": "The transfer of Darwin Nunez was communicated to CMVM on 19 August 2020.", "B": "The transfer of Darwin Nunez was communicated to CMVM on 1 September 2016.", "C": "The transfer of Darwin Nunez was communicated to CMVM on 3 July 2019.", "D": "The transfer of Darwin Nunez was communicated to CMVM on 1 February 2023.", "E": "The transfer of Darwin Nunez was communicated to CMVM on 13 June 2022."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4444 tokens (3444 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements accurately describes the Prix Lalande?", "answer": "C", "A": "The Prix Lalande was established in 1714 by the Paris Academy of Sciences.", "B": "The Prix Lalande was awarded for achievements in the field of héliographie.", "C": "The Prix Lalande was awarded by the Paris Academy of Sciences from 1881 to 1915.", "D": "The Prix Lalande was mentioned in the journal La Lumière.", "E": "The Prix Lalande was awarded for achievements in the field of beaux-arts."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "According to the text, what is Maurice Pirenne's stance on the use of curves in perspective?", "answer": "D", "A": "Pirenne believes that a truly 'physiological' perspective should consist of some kind of pseudo-development upon the picture plane of an image curved in shape like the retinal image.", "B": "Pirenne argues that due to the curvature of the retina, the geometrical construction of perspective should also use curves, leading to systems of 'curvilinear perspective'.", "C": "Pirenne suggests that the retinal image is what we see, and therefore a truly 'physiological' perspective should be based on the curved shape of the retinal image.", "D": "Pirenne refutes the idea that a truly 'physiological' perspective should consist of curves, stating that the retinal image is not what we see and that central, 'rectilinear', perspective is the only met

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, your messages resulted in 6240 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements accurately describes Marcela Carena's research focus?", "answer": "C", "A": "Marcela Carena's research primarily focuses on the study of quantum mechanics and its applications in technology.", "B": "Marcela Carena's research is centered around the exploration of the origins and evolution of the universe.", "C": "Carena's research is focused on models of new physics beyond the Standard Model and their manifestations in particle physics experiments.", "D": "Marcela Carena's research primarily focuses on the development of renewable energy sources and sustainable technologies.", "E": "Carena's research primarily focuses on the study of biological systems and their applications in medicine."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which of the following statements accurately describes the Modular Chemical Descriptor Language (MCDL)?", "answer": "C", "A": "The Modular Chemical Descriptor Language (MCDL) was first introduced in a paper published in 2001 by Gakh, Burnett, and colleagues.", "B": "MCDL is a Java-based chemical structure editor that supports the creation and manipulation of modular chemical descriptors.", "C": "The Modular Chemical Descriptor Language (MCDL) is a language used to represent stereochemical modules and has been the subject of multiple research papers.", "D": "Open Babel is a chemical toolbox that incorporates the Modular Chemical Descriptor Language (MCDL) as one of its features.", "E": "The availability, reliability, and security of information systems and human-computer interaction are topics discussed in a book that mentions the Modular Chemical Des

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, your messages resulted in 5282 tokens. Please reduce the length of the messages.
{"prompt": "Which statement accurately describes the research mentioned in the provided text?", "answer": "C", "A": "The research discussed in the text primarily focuses on the physical properties of biological membranes and their role in ion and nonelectrolyte selectivity.", "B": "The research mentioned in the text investigates the regulation of a-type potassium current and its multiple modes of regulation.", "C": "The research mentioned in the text includes studies on specific ion permeation, the physical basis of ion and nonelectrolyte selectivity, and neuronal channels and receptors.", "D": "The research discussed in the text primarily focuses on the physiological effects of ion permeation and the regulation of a-type potassium current.", "E": "The research mentioned in the text primarily focuses on the molecular neurology of neuronal channel

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Invalid \escape: line 1 column 295 (char 294)
{"prompt": "Which method was used by Borexino to detect geo-neutrinos?", "answer": "C", "A": "Borexino used seismic data to detect geo-neutrinos.", "B": "Borexino used neutrino tomography to detect geo-neutrinos.", "C": "Borexino detected geo-neutrinos through the process ν ¯ + p + ⟶ e + + n {\displaystyle {\ce {{\bar {\nu }}+p^{+}\longrightarrow e^{+}{+n}}}} .", "D": "Borexino used IceCube data to detect geo-neutrinos.", "E": "Borexino used KM3NeT data to detect geo-neutrinos."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 295 (char 294)
../data/wikipedia_fixed/n.parquet:  71%|████████████████████████████████████████████████████████▉                       | 138/194 [16:48<06:27,  6.92s/it]

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "What is the main focus of Nikolaus Rajewsky's research?", "answer": "C", "A": "BIMSB integrates experimental and computational methods to understand disease progressions.", "B": "Nikolaus Rajewsky is the head of BIMSB and has secured permanent funding for the project.", "C": "Nikolaus Rajewsky's research focuses on understanding the role of RNA in gene regulation.", "D": "The BMBF provided additional funding for the new building of BIMSB.", "E": "LifeTime is a pan-European consortium chaired by Nikolaus Rajewsky that aims to revolutionize healthcare."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which statement accurately describes the importance of calculating natality rate?", "answer": "C", "A": "Calculating natality rate is only relevant for animal species, not plants or humans.", "B": "Calculating natality rate is important for wildlife management, but not for making government policies or conducting research on species preservation.", "C": "Calculating natality rate is crucial for understanding the reproductive ability of a population and making informed decisions for species preservation and population growth policies.", "D": "Calculating natality rate is primarily used to determine the effects of environmental chemicals/toxins on women of childbearing age.", "E": "Calculating natality rate is only relevant for studying the polar bear population in Svalbard, Norway."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which article discusses the challenges and strategies of antisense oligonucleotide drug delivery?", "answer": "B", "A": "The article 'Enhanced Antisense Oligonucleotide Delivery Using Cationic Liposomes Grafted with Trastuzumab: A Proof-of-Concept Study in Prostate Cancer' explores the use of cationic liposomes and trastuzumab to enhance the delivery of antisense oligonucleotides in prostate cancer.", "B": "The article 'The Challenges and Strategies of Antisense Oligonucleotide Drug Delivery' specifically focuses on the challenges and strategies associated with delivering antisense oligonucleotide drugs.", "C": "The article 'A morphological distinction between neurones of the male and female, and the behaviour of the nucleolar satellite during accelerated nucleoprotein synthesis' discusses the morphological differences between male and female neurons

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, your messages resulted in 4157 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements accurately describes the role of the protein Prp24 in U6 snRNA?", "answer": "C", "A": "Prp24 is responsible for the structural rearrangements of U6 RNA in Saccharomyces cerevisiae.", "B": "Prp24 is a binding site in U6 snRNA and plays a role in the annealing of U6 and U4 snRNAs.", "C": "Prp24 is involved in multiple functions in U6 RNA structural rearrangements in Saccharomyces cerevisiae.", "D": "Prp24 is a component of the LSm complex in yeast U6 snRNPs.", "E": "Prp24 is a protein that interacts with U6 snRNA and plays a role in its characterization."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "What is the significance of the Puente Hills Fault in the Los Angeles area?", "answer": "C", "A": "The Puente Hills Fault is a transform fault that has a high frequency of major ruptures, posing a significant risk to the Los Angeles area.", "B": "The Puente Hills Fault is responsible for the 1987 Whittier Narrows earthquake and the 2010 light event, both of which caused damage and deaths in the Los Angeles area.", "C": "The Puente Hills Fault is a blind thrust fault that runs through the Los Angeles Basin and has the potential to cause a major earthquake with substantial impact in the Los Angeles area.", "D": "The Puente Hills Fault is responsible for the shortening of the northern Los Angeles Basin, with geodetic studies showing a rate of 4.5-5 millimeters per year.", "E": "The Puente Hills Fault was discovered in 1999 and is visually distinct with 

Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which book is being reviewed in the text?", "answer": "C", "A": "The Religious Worlds of Isaac Newton by Rob Iliffe is a comprehensive exploration of the religious beliefs and practices of the famous scientist.", "B": "Priest of Nature: The Religious Worlds of Isaac Newton is a groundbreaking book that delves into the religious aspects of Isaac Newton's life and work.", "C": "The book being reviewed in the text is Priest of Nature: The Religious Worlds of Isaac Newton by Rob Iliffe.", "D": "Isaac Newton's religious beliefs and their influence on his scientific work are examined in the book Priest of Nature: The Religious Worlds of Isaac Newton.", "E": "Rob Iliffe's book, Priest of Nature: The Religious Worlds of Isaac Newton, provides a detailed analysis of the religious dimensions of Isaac Newton's life and scientific contributions."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "When was the initial formation of the Facho-Pico Alto Volcanic Complex?", "answer": "C", "A": "The initial formation of the Facho-Pico Alto Volcanic Complex occurred between 2 and 5 million years ago.", "B": "The initial formation of the Facho-Pico Alto Volcanic Complex occurred between 3 and 5 million years ago.", "C": "The initial formation of the Facho-Pico Alto Volcanic Complex was a phase of intense volcanism, resulting from submarine eruptions as early as 5 million years ago.", "D": "The initial formation of the Facho-Pico Alto Volcanic Complex occurred between 4 and 6 million years ago.", "E": "The initial formation of the Facho-Pico Alto Volcanic Complex occurred between 1 and 3 million years ago."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Who is the author of the book 'Other Minds: The Octopus, the Sea, and the Deep Origins of Consciousness'?", "answer": "A", "A": "Peter Godfrey-Smith", "B": "Robert T. Pennock", "C": "Ken Gewertz", "D": "The University of Sydney", "E": "The American Philosophical Society"}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

This model's maximum context length is 4097 tokens. However, you requested 4796 tokens (3796 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements accurately describes the cyclooxygenase reaction mechanism?", "answer": "C", "A": "The cyclooxygenase reaction mechanism involves the fragmentation of prostaglandin endoperoxides in aqueous solution.", "B": "The cyclooxygenase reaction mechanism is mediated by the enzyme cyclooxygenase-2.", "C": "The cyclooxygenase reaction mechanism is described in the article 'The cyclooxygenase reaction mechanism' by van der Donk et al.", "D": "The cyclooxygenase reaction mechanism results in the formation of aldehyde products from PGH2.", "E": "The cyclooxygenase reaction mechanism is classified by the International Union of Basic and Clinical Pharmacology as a prostanoid receptor."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Invalid \escape: line 1 column 194 (char 193)
{"prompt": "What is the definition of a K-quasiregular map?", "answer": "C", "A": "A K-quasiregular map is a differentiable map that satisfies the inequality ‖ D f ( x ) ‖ n ≤ K | J f ( x ) | {\displaystyle \|Df(x)\|^{n}\leq K|J_{f}(x)|} at all points in the region D in Rn to Rn.", "B": "A K-quasiregular map is a continuous map in the Sobolev space W1,n loc whose partial derivatives in the sense of distributions have locally summable n-th power, and satisfies the inequality ‖ D f ( x ) ‖ n ≤ K | J f ( x ) | {\displaystyle \|Df(x)\|^{n}\leq K|J_{f}(x)|} almost everywhere.", "C": "A K-quasiregular map is a differentiable map that satisfies the inequality ‖ D f ( x ) ‖ n ≤ K | J f ( x ) | {\displaystyle \|Df(x)\|^{n}\leq K|J_{f}(x)|} at all points in the region D in Rn to Rn.", "D": "A K-quasiregular map is a map that is K-quasiregular with some K, and excludes constant maps from the class of quasiregular maps.", "E": "A K-quasiregular map is 

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 194 (char 193)
../data/wikipedia_fixed/q.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [04:45<00:00,  7.94s/it]
../data/wikipedia_fixed/r.parquet:  23%|██████████████████▌                                                              | 60/262 [06:29<24:25,  7.25s/it]

This model's maximum context length is 4097 tokens. However, you requested 4954 tokens (3954 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What was Robert A. Frosch's role at the United Nations Environmental Program?", "answer": "C", "A": "Robert A. Frosch served as the assistant executive director of the United Nations Environmental Program from January 1973 to July 1975.", "B": "Robert A. Frosch was responsible for overseeing the continuation of the development effort on the Space Shuttle program at the United Nations Environmental Program.", "C": "Robert A. Frosch, with the rank of assistant secretary general of the United Nations, was responsible for substantive global program activities of the United Nations system and other international activities related to environment matters at the United Nations Environmental Program.", "D": "Robert A. Frosch worked as a research scientist and director of research programs for H

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4314 tokens. Please reduce the length of the messages.
{"prompt": "Who wrote the book 'Descartes: a biography'?", "answer": "C", "A": "The Cambridge Companion to Descartes was edited by John Cottingham.", "B": "The book 'Math and Mathematicians: The History of Math Discoveries Around the World; Vol. 1' was written by Leonard C. Bruno.", "C": "Desmond M. Clarke wrote the book 'Descartes: a biography'.", "D": "The book 'Descartes' Life and the Development of His Philosophy' was written by Geneviève Rodis-Lewis.", "E": "The book 'Descartes, Rene | Internet Encyclopedia of Philosophy' was written by an unknown author."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4199 tokens. Please reduce the length of the messages.
{"prompt": "Which website provides information on the most individual penalty goals in a match?", "answer": "D", "A": "Rugbyworldcup.com provides information on the most individual penalty goals in a tournament/season.", "B": "Espnscrum.com provides information on the most individual drop goals.", "C": "Espnscrum.com provides information on the most individual penalty goals.", "D": "Espnscrum.com provides information on the most individual penalty goals in a match.", "E": "Rugbyworldcup.com provides information on the most drop goals."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4125 tokens (3125 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following statements is the most accurate about Professor Ruth King?", "answer": "C", "A": "She graduated with a BSc in Mathematics with Statistics in 1998 from the University of Bristol.", "B": "She has been the Thomas Bayes' Chair of Statistics at the University of Edinburgh since 2010.", "C": "She has 69 publications on 'Google Scholar' since 2001, with a total of 1370 citations since 2014.", "D": "She co-organised the ICMS workshop on 'Addressing Statistical Challenges of Modern Technological Advances' in 2019.", "E": "She worked as a research associate at the University of Cambridge from 2001 to 2003."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 5519 tokens. Please reduce the length of the messages.
{"prompt": "Who is Sylvain Garel?", "answer": "C", "A": "Sylvain Garel is a specialist in Canadian cinema.", "B": "Sylvain Garel is a professor at a science university.", "C": "Sylvain Garel is a specialist in Canadian cinema and has been featured in various publications.", "D": "Sylvain Garel is a filmmaker known for his work in French cinema.", "E": "Sylvain Garel is a renowned scientist in the field of cinema studies."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, you requested 4533 tokens (3533 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "What is the general rule for the shear velocity in relation to the mean flow velocity?", "answer": "B", "A": "The shear velocity is always equal to the mean flow velocity.", "B": "The shear velocity is typically between 5% and 10% of the mean flow velocity.", "C": "The shear velocity is always greater than the mean flow velocity.", "D": "The shear velocity is always less than the mean flow velocity.", "E": "The shear velocity is unrelated to the mean flow velocity."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

Expecting ',' delimiter: line 1 column 1264 (char 1263)
{"prompt": "What is the significance of the Republic upland lacustrine fossil beds?", "answer": "C. The Republic upland lacustrine fossil beds are significant as they represent the earliest known records of the Rosaceae (rose family) and Aceraceae (maple family), and have yielded over 200 species in fossilized form.", "A": "A. The Republic upland lacustrine fossil beds are significant because they are located in the city of Republic, which has a rich history of fossil discoveries.", "B": "B. The Republic upland lacustrine fossil beds are significant because they are part of a series of Eocene lake beds with abundant fossil plants, insects, fish, and other ancient life.", "C": "C. The Republic upland lacustrine fossil beds are significant as they represent the earliest known records of the Rosaceae (rose family) and Aceraceae (maple family), and have yielded over 200 species in fossilized form.", "D": "D. The Republic upland lacust

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 1 column 1264 (char 1263)
../data/wikipedia_fixed/s.parquet:  85%|███████████████████████████████████████████████████████████████████▉            | 426/502 [48:12<08:03,  6.36s/it]

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{"prompt": "Which statement accurately describes the distribution of Sporothrix globosa?", "answer": "C", "A": "Sporothrix globosa is primarily found in Europe and North America.", "B": "Sporothrix globosa is a common pathogen in animals but rarely affects humans.", "C": "Sporothrix globosa is known to cause sapronoses in Asia.", "D": "Sporothrix globosa is exclusively found in soil and does not infect living organisms.", "E": "Sporothrix globosa is a newly discovered species and its distribution is still unknown."}


Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/conda/lib/python3.10/http/client.py", line 287, in _read_status
    raise RemoteDisconnected("Remote end closed connection without"
http.client.RemoteDisconnected: Remote end closed connection without response

During handling of the above exception, another exception occurred

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{"prompt": "Which of the following books is not mentioned in the provided text?", "answer": "E", "A": "Fundamental processes in ecology: an earth systems approach by David M. Wilkinson", "B": "Systems ecology: an introduction to ecological modelling by R. L. Kitching", "C": "Steps to an Ecology of Mind by Gregory Bateson", "D": "Systems Analysis in Ecology by Kenneth Edmund Ferguson", "E": "Modeling Biological Systems: Principles and Applications by J. W. Haefner"}


../data/wikipedia_fixed/s.parquet: 100%|██████████████████████████████████████████████████████████████████████████████| 502/502 [1:01:11<00:00,  7.31s/it]
../data/wikipedia_fixed/t.parquet:  20%|████████████████▏                                                                | 57/286 [05:59<22:43,  5.96s/it]

Unterminated string starting at: line 1 column 4844 (char 4843)
{"prompt": "What is the purpose of the tonograph device?", "answer": "Cagnazzi's tonograph device is designed to help store and preserve the tones and inflections of the human voice, allowing for precise measurement and transcription of vocal intonation. It provides a way to accurately represent the tone and intensity of the human voice, which the diatonic and chromatic scales of music cannot capture. The device consists of a hollow brass cylindrical section with a piston and a graduated scale, allowing for adjustments in the length of the cylinder and generating different sounds. By matching one's voice with the sound produced by the device, the tonograph can measure the intonation and inflection of the human voice. Additionally, the tonograph serves as a means to store and preserve vocal information by transcribing the measured values above or below a text. It is a valuable tool for declamation schools and acting schools

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 1 column 4844 (char 4843)
../data/wikipedia_fixed/t.parquet:  35%|███████████████████████████▉                                                    | 100/286 [11:30<19:16,  6.22s/it]

This model's maximum context length is 4097 tokens. However, you requested 4720 tokens (3720 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{"prompt": "Which of the following articles discusses the association between regular exercise and symptoms of anxiety and depression?", "answer": "C", "A": "The article by Boker et al. (2011) introduces OpenMx, an open-source extended structural equation modeling framework.", "B": "The article by De Moor et al. (2008) tests causality in the association between regular exercise and symptoms of anxiety and depression.", "C": "The article by De Moor et al. (2008) tests causality in the association between regular exercise and symptoms of anxiety and depression.", "D": "The article by Burt et al. (2009) discusses nonshared environmental mediation of the association between deviant peer affiliation and adolescent externalizing behaviors over time.", "E": "The article by Boker et al. (2011) introduces O

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4647 tokens. Please reduce the length of the messages.
{"prompt": "Which statement accurately reflects the content of the provided text?", "answer": "C", "A": "Chemists from the Czech Republic are among the world's leading experts in the development of a drug against the coronavirus.", "B": "The United States reported the first case of the novel coronavirus in a publication from The New England Journal of Medicine.", "C": "The Czech government has ordered a drug for the coronavirus from a Czech specialist.", "D": "Gilead Sciences has offered an experimental drug for the treatment of coronavirus.", "E": "The FDA will authorize the use of remdesivir for Covid-19 after a trial showed a positive effect on recovery time."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4163 tokens. Please reduce the length of the messages.
{"prompt": "Which statement accurately describes the relationship between volatility and the tendency to condense?", "answer": "C", "A": "Substances with low volatility will condense more readily than highly volatile ones.", "B": "Solids are generally more volatile than liquids, except for some exceptions like dry ice and iodine.", "C": "Differences in volatility can be observed by comparing the rate of evaporation or sublimation of substances when exposed to the atmosphere.", "D": "Vapor pressures and boiling points are numerical values used to describe volatility.", "E": "Volatility is solely determined by the strength of the interactions between molecules."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

This model's maximum context length is 4097 tokens. However, your messages resulted in 4445 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements accurately describes the leaf flush regime?", "answer": "C", "A": "The leaf flush regime is determined by the climatic regime and refers to the timing and duration of leaf growth and shedding in plant communities.", "B": "The leaf flush regime is influenced by the elevation range and refers to the types of leaves found in different vegetation types.", "C": "The leaf flush regime can be categorized as evergreen, semideciduous, deciduous, alternate, or ephemeral based on the timing and duration of leaf growth and shedding.", "D": "The leaf flush regime is determined by the thermal realm and refers to the temperature requirements for leaf growth and shedding in plant communities.", "E": "The leaf flush regime is influenced by the substrate and refers to the types of leaves found in different soil types."

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{"prompt": "Which of the following statements is the most accurate based on the provided text?", "answer": "C", "A": "Alcohol and benzene can be mixed together without any adverse effects on human health.", "B": "The untargeted in vivo microbial metabolite profiling via the SPME/GC × GC-QTOFMS approach is a novel method for recognizing pathogens in food matrixes.", "C": "Direct-injection mass spectrometry adds the time dimension to (B)VOC analysis.", "D": "Proton Transfer Reaction Mass Spectrometry (PTR-MS) is a high-speed mass spectrometry technique.", "E": "The PTR-QiTOF mass spectrometer is known for its extreme sensitivity and high speed."}


../data/wikipedia_fixed/v.parquet:  78%|███████████████████████████████████████████████████████████████▊                  | 74/95 [07:50<02:41,  7.70s/it]

This model's maximum context length is 4097 tokens. However, your messages resulted in 4721 tokens. Please reduce the length of the messages.
{"prompt": "Which of the following statements is the most accurate based on the provided text?", "answer": "C", "A": "Alcohol and benzene can be mixed together without any adverse effects on human health.", "B": "The untargeted in vivo microbial metabolite profiling via the SPME/GC × GC-QTOFMS approach is a novel method for recognizing pathogens in food matrixes.", "C": "Direct-injection mass spectrometry adds the time dimension to (B)VOC analysis.", "D": "Proton Transfer Reaction Mass Spectrometry (PTR-MS) is a high-speed mass spectrometry technique.", "E": "The PTR-QiTOF mass spectrometer is known for its extreme sensitivity and high speed."}


Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_15213/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 700, in _interpret_response
    self._interpret_response_line(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 765, in _interpret_response_line
    raise self.handle

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{"prompt": "Which statement accurately describes the use of oxygen and carbon isotopes of foraminifera in paleoceanography?", "answer": "C", "A": "Oxygen and carbon isotopes of foraminifera are used to determine the age of sedimentary rocks.", "B": "Oxygen and carbon isotopes of foraminifera are used to study the effects of climate change on marine ecosystems.", "C": "Oxygen and carbon isotopes of foraminifera are used to reconstruct past ocean temperatures and carbon dioxide levels.", "D": "Oxygen and carbon isotopes of foraminifera are used to analyze the composition of seawater.", "E": "Oxygen and carbon isotopes of foraminifera are used to study the migration patterns of marine species."}


../data/wikipedia_fixed/v.parquet:  86%|██████████████████████████████████████████████████████████████████████▊           | 82/95 [09:00<01:51,  8.57s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 755, in _interpret_response_line
    data = json.loads(rbody)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{"prompt": "Which statement accurately describes the use of oxygen and carbon isotopes of foraminifera in paleoceanography?", "answer": "C", "A": "Oxygen and carbon isotopes of foraminifera are used to determine the age of sedimentary rocks.", "B": "Oxygen and carbon isotopes of foraminifera are used to study the effects of climate change on marine ecosystems.", "C": "Oxygen and carbon isotopes of foraminifera are used to reconstruct past ocean temperatures and carbon dioxide levels.", "D": "Oxygen and carbon isotopes of foraminifera are used to analyze the composition of seawater.", "E": "Oxygen and carbon isotopes of foraminifera are used to study the migration patterns of marine species."}


../data/wikipedia_fixed/v.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████| 95/95 [10:25<00:00,  6.58s/it]
../data/wikipedia_fixed/w.parquet:  12%|█████████▎                                                                       | 14/121 [01:30<13:23,  7.51s/it]

Invalid \escape: line 1 column 159 (char 158)
{"prompt": "What is the basis of Tajima's D?", "answer": "C", "A": "Watterson's estimator is commonly used for its simplicity.", "B": "The estimate of θ ^ w {\displaystyle {\widehat {\theta \,}}_{w}} , often denoted as θ ^ w {\displaystyle {\widehat {\theta \,}}_{w}} , is θ ^ w = K a n , {\displaystyle {\widehat {\theta \,}}_{w}={K \over a_{n}},} where K {\displaystyle K} is the number of segregating sites (an example of a segregating site would be a single-nucleotide polymorphism) in the sample and a n = ∑ i = 1 n − 1 1 i {\displaystyle a_{n}=\sum _{i=1}^{n-1}{1 \over i}} is the ( n − 1 ) {\displaystyle (n-1)} th harmonic number.", "C": "Comparing the value of the Watterson's estimator, to nucleotide diversity is the basis of Tajima's D which allows inference of the evolutionary regime of a given locus.", "D": "The assumptions made are that there is a sample of n {\displaystyle n} haploid individuals from the population of interest, that t

Traceback (most recent call last):
  File "/tmp/ipykernel_15213/3124827155.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 1 column 159 (char 158)
../data/wikipedia_fixed/w.parquet:  74%|████████████████████████████████████████████████████████████▏                    | 90/121 [09:29<03:14,  6.27s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 755, in _interpret_response_line
    data = json.loads(rbody)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{"prompt": "Which of the following statements is the most accurate based on the provided text?", "answer": "C", "A": "The Pittsburgh Post-Gazette published an article titled 'Prince of Puzzles' on October 25, 1999.", "B": "The Philadelphia Inquirer reported on the 2009 Inquirer Sudoku National Championship.", "C": "The MIT Mystery Hunt 2019 Wrapup video can be found on YouTube.", "D": "BoardGameGeek features information about the game 'Roll for the Galaxy'.", "E": "The Washington Post published an article on March 9, 1993, about the top 10 finalists in Westinghouse's competition."}


../data/wikipedia_fixed/w.parquet: 100%|████████████████████████████████████████████████████████████████████████████████| 121/121 [13:00<00:00,  6.45s/it]
../data/wikipedia_fixed/x.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [01:23<00:00,  6.43s/it]
../data/wikipedia_fixed/y.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████| 28/28 [03:09<00:00,  6.76s/it]
../data/wikipedia_fixed/z.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [02:32<00:00,  6.62s/it]


In [27]:
df_texts = pd.DataFrame(texts)
df_texts = df_texts.apply(f, axis=1)

df_texts.to_parquet(f"output_gpt3.5_generate/{now_date}.parquet")

In [28]:
len(df_texts)

5839