In [1]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [3]:
def query_prompt(prompt, max_tokens=1000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [4]:
import random

In [15]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            for word in [
                "geology",
                "physics",
                "chemistry",
                "mathematical",
                "biology",
                "astronomy",
                "ecology",
                "genetics",
                "statistics",
                "theoretical"
            ]:
                if word.lower() in cat.lower():
                    return True
            
        return False
    
    def text_preprocess(text):
        return text.replace("====", "\n\n").replace("===", "\n\n").replace("==", "\n\n")

    def sep_n(text, n=750):
        try:
            text = text.split(" ")
            start_index = random.randint(0, len(text) - n)
            return " ".join(text[start_index:start_index+n])
        except:
            return None
    
    df = pd.read_parquet(fname)
    df_science = df[df["categories"].apply(f)]
    df_science["text"] = df_science["text"].apply(text_preprocess)
    df_science["text"] = df_science["text"].apply(sep_n)
    df_science = df_science[df_science["text"].notnull()]
    return df_science.sample(len(df_science)//5)

In [16]:
files = glob.glob("../data/wikipedia/*.parquet")

In [17]:
import time

In [18]:
import pickle

In [19]:
import json

In [20]:
from datetime import datetime as dt
import os

In [21]:
texts = []

In [22]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
You are a professional machine learning engineer who creates datasets for use in supervised learning of multiple choice questions and are very knowledgeable about science.
Please make a multiple-choice questions about below context.

Context:
{series['text']}

Attention:
- The output should be json format.
- Json format key is "prompt" as the question statement, "A," "B," "C," "D," and "E" as choices, "answer" as the answer choice (one of A through E).
"""
    return prompt

def f(series):
    try:
        if series["A"] != series["A"]:
            if type(series["answer"]) == dict:
                for key in ["A", "B", "C", "D", "E"]:
                    series[key] = series["choices"][key]
            elif type(series["answer"] == list):
                for i, key in enumerate(["A", "B", "C", "D", "E"]):
                    series[key] = series["choices"][i]
    except:
        return series
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

first = True
for file in files:
    if os.path.basename(file) in ["all.parquet"]:
        print(f"pass: {file}")
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            if first:
                print(texts_json)
                first = False
            if type(texts_json) == dict:
                texts_json["wiki_id"] = series["id"]
                texts_json["original_text"] = series["text"]
                texts.append(texts_json)
            else:
                for text_json in texts_json:
                    text_json["wiki_id"] = series["id"]
                    text_json["original_text"] = series["text"]
                    texts.append(text_json)
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
        if i % 20 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

../data/wikipedia/a.parquet:   1%|█▏                                                                                                                                                                                                               | 1/181 [00:05<17:49,  5.94s/it]

{'prompt': 'Which work addresses the issue of bestiality?', 'A': 'Animal Geographies: Place, Politics, and Identity in the Nature–Culture Borderlands', 'B': 'Animal Spaces, Beastly Places: New Geographies of Human–Animal Relations', 'C': 'Le pratique sauvage: Race, place, and the human–animal divide', 'D': 'Hybrid Geographies: Natures Cultures Spaces', 'E': 'Placing Animals: An Introduction to the Geography of Human–Animal Relations', 'answer': 'C'}


../data/wikipedia/a.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 181/181 [13:35<00:00,  4.51s/it]


pass: ../data/wikipedia/all.parquet


../data/wikipedia/b.parquet:  19%|███████████████████████████████████████▌                                                                                                                                                                        | 24/126 [01:49<06:57,  4.09s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which university did Matkowsky receive his B.S. degree from?",
  "A": "City College of New York",
  "B": "New York University",
  "C": "Rensselaer Polytechnic Institute",
  "D": "Northwestern University",
  "E": "Tel Aviv University",
  "answer": "A"
}


../data/wikipedia/b.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 126/126 [15:14<00:00,  7.26s/it]
../data/wikipedia/c.parquet:  40%|███████████████████████████████████████████████████████████████████████████████████▋                                                                                                                            | 95/236 [07:52<11:26,  4.87s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Unterminated string starting at: line 2 column 13 (char 14)
{
  "prompt": "'aabb'. In other words, they are both homozygous for the same deficiency, which obviously will produce the same phenotype.\n\nComplementation tests in fungi and bacteriophage\n\n Complementation tests can also be carried out with haploid eukaryotes such as fungi, with bacteria and with viruses such as bacteriophage. Research on the fungus Neurospora crassa led to the development of the one-gene-one-enzyme concept that provided the foundation for the subsequent development of molecular genetics. The complementation test was one of the main tools used in the early Neurospora work, because it was easy to do, and allowed the investigator to determine whether any two nutritional mutants were defective in the same, or different genes. The complementation test was also used in the early development of molecular genetics when bacteriophage T4 was one of the main objects of study. In this case the test depends on mixed i

../data/wikipedia/c.parquet:  56%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                          | 133/236 [11:37<08:23,  4.89s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 44 (char 192)
../data/wikipedia/c.parquet:  57%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              

Invalid \escape: line 4 column 44 (char 192)
{
  "prompt": "What is the purpose of the algorithm described in the given context?",
  "A": "To determine the complete center of mass of a system",
  "B": "To calculate the average values of \xi_i and \zeta_i",
  "C": "To generate new points (\xi_i, \zeta_i) based on the mass of the particle x_i",
  "D": "To unfold a cluster straddling the periodic boundaries",
  "E": "To guess the center of mass of a system",
  "answer": "A"
}


../data/wikipedia/c.parquet:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                            | 167/236 [14:15<04:56,  4.30s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is a defensive behavior displayed by some caterpillars?",
  "A": "Feeding in protected environments",
  "B": "Wiggling whip-like organs",
  "C": "Using a silk line to drop off branches",
  "D": "Thrashing about violently",
  "E": "Making high pitched whistles",
  "answer": "D"
}


../data/wikipedia/c.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 236/236 [24:33<00:00,  6.24s/it]
../data/wikipedia/d.parquet:  70%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                              | 89/127 [07:00<03:09,  5.00s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which of the following is true about sequencing theory?",
  "A": "Sequencing theory is primarily concerned with algorithmic issues.",
  "B": "Sequencing theory is a highly interdisciplinary subject based on mathematics, biology, and systems engineering.",
  "C": "Sequencing theory is only studied within the context of computational biology.",
  "D": "Sequencing theory is based on direct application of closed-form mathematical solutions.",
  "E": "Sequencing theory does not account for factors like detectable overlap in sequencing fragments and target multiplicity.",
  "answer": "B"
}


../data/wikipedia/d.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 127/127 [14:53<00:00,  7.04s/it]
../data/wikipedia/e.parquet:  32%|███████████████████████████████████████████████████████████████████▏                                                                                                                                            | 52/161 [04:13<09:16,  5.10s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 2 column 17 (char 57)
{"prompt": "What is Euler's identity?", 
 "A": "e^{ix} = \cos x + i\sin x", 
 "B": "e^{i\pi} = -1", 
 "C": "e^{i\pi} +1 = 0", 
 "D": "e^z = \lim_{n\to\infty} \left(1+\frac z n \right)^n", 
 "E": "e^{i \pi} = \cos \pi + i\sin \pi", 
 "answer": "B"}


../data/wikipedia/e.parquet:  55%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                             | 89/161 [07:27<05:27,  4.56s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is Edward Belbruno's artistic media?",
  "A": "Paintings",
  "B": "Sculptures",
  "C": "Photography",
  "D": "Music",
  "E": "Dance",
  "answer": "A"
}


../data/wikipedia/e.parquet:  60%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                    | 96/161 [13:02<16:31, 15.25s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "According to A. Lima-de-Faria's book 'Evolution Without Selection: Form and Function by Autoevolution', what is the central premise of the book?",
  "A": "The laws of physics and chemistry generate the basic forms found in living organisms.",
  "B": "Natural selection is the mechanism of evolution.",
  "C": "Autoevolutionism is a form of orthogenesis.",
  "D": "Evolution occurs through random events.",
  "E": "Selection can be weighed and poured into a vial.",
  "answer": "A"
}


../data/wikipedia/e.parquet:  71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                            | 114/161 [20:25<04:11,  5.36s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 3 column 21 (char 159)
../data/wikipedia/e.parquet:  71%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 3 column 21 (char 159)
{
  "prompt": "According to the free electron model, what is the expression for the specific heat capacity of free electrons in a metal?",
  "A": "C_V = \frac{\pi^2}{3} k_{\rm B}^2TD(E_{\rm F})",
  "B": "C_V = \frac{\pi^2}{2} k_{\rm B}\left( \frac{T}{T_{\rm F}} \right)",
  "C": "C_V = \gamma T +AT^3",
  "D": "C_V = \frac{\pi^2}{2} \frac{k_{\rm B}^2T}{\epsilon_f}",
  "E": "C_V = \tfrac{3}{2}k_{\rm B}",
  "answer": "B"
}


../data/wikipedia/e.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 161/161 [24:19<00:00,  9.06s/it]
../data/wikipedia/f.parquet:   7%|███████████████                                                                                                                                                                                                   | 7/98 [00:29<06:34,  4.34s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 4 column 27 (char 198)
{
  "prompt": "According to the given context, what is the probability of transition from the initial state (ith) to the final state (fth)?",
  "A": "w_{fi} = |a_f(t)|^2",
  "B": "w_{fi} = \frac{1}{\hbar^2} \left|\int_0^t \langle f|H'(t')|i\rangle e^{\mathrm{i}\omega_{fi} t'} dt'\right|^2",
  "C": "w_{fi} = \frac{1}{\hbar^2} \left|\int_0^t \langle f|H'(t')|i\rangle e^{\mathrm{i}\omega_{ki} t'} dt'\right|^2",
  "D": "w_{fi} = \frac{1}{\hbar^2} \left|\int_0^t \langle f|H'(t')|i\rangle e^{\mathrm{i}\omega t'} dt'\right|^2",
  "E": "w_{fi} = \frac{1}{\hbar^2} \left|\int_0^t \langle f|H'(t')|i\rangle e^{\mathrm{i}\omega_{fi} t'} dt'\right|^2",
  "answer": "B"
}


../data/wikipedia/f.parquet:  31%|███████████████████████████████████████████████████████████████▉                                                                                                                                                 | 30/98 [02:30<04:42,  4.16s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which of the following is NOT a variation-dependent mechanism of modern coexistence theory?",
  "A": "Covariance",
  "B": "Storage effects",
  "C": "Relative nonlinearities",
  "D": "Fitness-density covariances",
  "E": "Intraspecific competition",
  "answer": "E"
}


../data/wikipedia/f.parquet:  37%|████████████████████████████████████████████████████████████████████████████▊                                                                                                                                    | 36/98 [08:00<20:07, 19.47s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 49 (char 294)
../data/wikipedia/f.parquet:  38%|██████████████████████████████████████████████████████████████████████████████▉                                                     

Invalid \escape: line 4 column 49 (char 294)
{
  "prompt": "Which of the following statements is true about the estimation process in functional principal component analysis (FPCA)?",
  "A": "The estimate of the mean function μ(tij) is obtained by taking the average at each location tij.",
  "B": "The estimate of the covariance function \hat{G}(s, t) is obtained by smoothing the raw covariances.",
  "C": "The estimation of eigenvalues λk and eigenvectors vk is carried out by numerical linear algebra.",
  "D": "The estimate of σ2 is obtained by integrating the smoothed version of the diagonal elements of the raw covariance matrices.",
  "E": "The estimate of the k-th FPC ξk is obtained by numerical integration if the observations are dense; otherwise, best linear unbiased predictors are used.",
  "answer": "E"
}


../data/wikipedia/f.parquet:  59%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                     | 58/98 [09:45<02:49,  4.25s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is the term used for the point of minimum radius of curvature (maximum curvature) of a fold?",
  "A": "Hinge line",
  "B": "Hinge zone",
  "C": "Hinge point",
  "D": "Axial surface",
  "E": "Fold axis",
  "answer": "C"
}


../data/wikipedia/f.parquet:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                    | 66/98 [15:37<06:21, 11.93s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 5 column 32 (char 172)
../data/wikipedia/f.parquet:  68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 5 column 32 (char 172)
{
  "prompt": "Which formula is used to determine the future value using simple interest?",
  "A": "FV = PV(1+rt)",
  "B": "FV = PV(1+i)^t",
  "C": "FV = {(1+r)^n - 1 / r} \cdot \mathrm{(payment\ amount)}",
  "D": "FV = PV(1+rt) + \mathrm{(payment\ amount)}",
  "E": "FV = PV(1+i)^t + \mathrm{(payment\ amount)}",
  "answer": "A"
}


../data/wikipedia/f.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [17:59<00:00, 11.02s/it]
../data/wikipedia/g.parquet:   3%|█████▉                                                                                                                                                                                                           | 4/141 [00:19<10:54,  4.78s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is the purpose of the Comparative Toxicogenomics Database (CTD)?",
  "A": "To understand the effects of environmental compounds on human health",
  "B": "To prioritize genes for future study in biomedical research",
  "C": "To catalog all known genetic diseases",
  "D": "To identify associations between genes and diseases",
  "E": "To integrate complex, heterogeneous data sets in gene prioritization",
  "answer": "A"
}


../data/wikipedia/g.parquet:  21%|██████████████████████████████████████████▊                                                                                                                                                                     | 29/141 [07:07<07:36,  4.07s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which geological event led to the formation and rapid erosion of mountain ranges in the north of Britain?",
  "A": "The Variscan orogeny",
  "B": "The Caledonian orogeny",
  "C": "The Quaternary period",
  "D": "The Carboniferous period",
  "E": "The Devonian period",
  "answer": "B"
}


../data/wikipedia/g.parquet:  27%|████████████████████████████████████████████████████████                                                                                                                                                        | 38/141 [13:11<17:40, 10.30s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 2 column 106 (char 107)
../data/wikipedia/g.parquet:  28%|█████████████████████████████████████████████████████████▌                                                                         

Invalid \escape: line 2 column 106 (char 107)
{
  "prompt": "What is the relationship between the real and imaginary parts of the retarded propagator G^{\mathrm{R}}(\mathbf{k},\omega)?",
  "A": "The real part is equal to the imaginary part",
  "B": "The real part is the negative of the imaginary part",
  "C": "The real part is the principal value of the integral of the imaginary part",
  "D": "The real part is the Hilbert transform of the imaginary part",
  "E": "The real part is the sum of the imaginary part and the Cauchy principal part of the integral",
  "answer": "D"
}


../data/wikipedia/g.parquet:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                | 130/141 [19:52<00:50,  4.59s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is the topic of Volovik's Russian doctoral thesis?",
  "A": "Dynamics of a particle strongly interacting with a Bose System",
  "B": "Topology of defects in condensed matter",
  "C": "Effects of symmetry in superfluids and superconductors",
  "D": "Low temperature quantum spin liquids",
  "E": "Emergence of gravitation as a collective vacuum excitation",
  "answer": "B"
}


../data/wikipedia/g.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 141/141 [25:40<00:00, 10.92s/it]
../data/wikipedia/h.parquet:  30%|███████████████████████████████████████████████████████████████▍                                                                                                                                                | 39/128 [02:48<06:42,  4.52s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "What is the name of Heston Blumenthal's first restaurant outside Bray?",
  "A": "The Fat Duck",
  "B": "Dinner by Heston Blumenthal",
  "C": "The Perfectionists' Cafe",
  "D": "The Mandarin Oriental Hyde Park",
  "E": "The Little Chef",
  "answer": "B"
}


../data/wikipedia/h.parquet:  61%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                 | 78/128 [11:13<05:16,  6.33s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which term is used to denote electronic states of diatomic molecules?",
  "A": "Even and odd",
  "B": "Gerade and ungerade",
  "C": "Atomic and molecular",
  "D": "Orthogonal and polarized",
  "E": "LCAO and psi functions",
  "answer": "B"
}


../data/wikipedia/h.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 128/128 [20:07<00:00,  9.44s/it]
../data/wikipedia/i.parquet:  43%|████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                                       | 50/117 [03:34<04:26,  3.98s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 5 column 86 (char 380)
{
  "prompt": "Which of the following statements about the Heilbronn triangle problem is correct?",
  "A": "The problem has been solved for all possible arrangements of points in the unit square.",
  "B": "The upper bound for the minimal area of a triangle formed by three points is O(1/n^2).",
  "C": "The lower bound for the minimal area of a triangle formed by three points is \Omega((\log n)/n^2).",
  "D": "The upper bound for the minimal area of a triangle formed by three points is \exp(c \sqrt{\log n})/n^{8/7}.",
  "E": "The average case analysis using the incompressibility method shows that the area of the smallest triangle is \Theta(1/n^3).",
  "answer": "E"
}


../data/wikipedia/i.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 117/117 [08:35<00:00,  4.41s/it]
../data/wikipedia/j.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [04:19<00:00,  4.39s/it]
../data/wikipedia/k.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44/44 [03:31<00:00,  4.81s/it]
../data/wikipedia/l.parquet:   3%|██████▊                                                                                                                                   

HTTP code 502 from API (<html>
<head><title>502 Bad Gateway</title></head>
<body>
<center><h1>502 Bad Gateway</h1></center>
<hr><center>cloudflare</center>
</body>
</html>
)
{
  "prompt": "Which process is useful in extracting organic compounds such as organochloride and organophosphorus pesticides from water samples?",
  "A": "Direct organic extraction",
  "B": "Multistage countercurrent continuous processes",
  "C": "Mixer-settlers",
  "D": "Centrifugal extractors",
  "E": "Extraction without chemical change",
  "answer": "A"
}


../data/wikipedia/l.parquet:  12%|█████████████████████████                                                                                                                                                                                       | 26/216 [06:45<11:54,  3.76s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 3 column 16 (char 103)
../data/wikipedia/l.parquet:  12%|██████████████████████████                                                                                                          

Invalid \escape: line 3 column 16 (char 103)
{
  "prompt": "In the long wavelength limit (q→0), what is the expression for ε(0,ω)?",
  "A": "1 - V_{\mathbf q} L^2 / (m \omega_0^2) q^2 n",
  "B": "1 + \frac{V_{\mathbf q} L^2}{m \omega_0^2} \sum_{i,j}{ q_i q_j n \delta_{ij}}",
  "C": "1 - \frac{2 \pi e^2}{\epsilon q L^2} \frac{L^2}{m \omega_0^2} q^2 n",
  "D": "1 - \frac{\omega_{\rm pl}^2(\mathbf q)}{\omega_0^2}",
  "E": "None of the above",
  "answer": "D"
}


../data/wikipedia/l.parquet:  19%|████████████████████████████████████████▍                                                                                                                                                                       | 42/216 [07:53<12:03,  4.16s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 44, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_21932/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
 

This model's maximum context length is 4097 tokens. However, your messages resulted in 4252 tokens. Please reduce the length of the messages.
{
  "prompt": "What is the phenomenon observed by Friedlieb Ferdinand Runge and Raphael E. Liesegang?",
  "A": "Formation of concentric rings of insoluble silver dichromate",
  "B": "Formation of layers or bands of precipitate",
  "C": "Formation of clear regions with no sensible precipitate",
  "D": "Formation of alternating regions of clear gel and precipitate rings",
  "E": "Formation of fine coagulant or flocs",
  "answer": "A"
}


../data/wikipedia/l.parquet:  42%|███████████████████████████████████████████████████████████████████████████████████████▋                                                                                                                        | 91/216 [11:03<07:46,  3.73s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 13 (char 114)
../data/wikipedia/l.parquet:  43%|████████████████████████████████████████████████████████████████████████████████████████▌                                           

Invalid \escape: line 4 column 13 (char 114)
{
  "prompt": "Which equation represents the gravitational potential energy?",
  "A": "U = - abla U",
  "B": "U = \int_{V_n} \mathrm{d} \mathbf{m} \times \mathbf{g}",
  "C": "E = T + U",
  "D": "U = - \frac{GmM}{2 \left | \mathbf{r} \right |}",
  "E": "U = \int_{V_n} \mathrm{d} \mathbf{m} \cdot \mathbf{g}",
  "answer": "D"
}


../data/wikipedia/l.parquet:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 162/216 [16:12<03:26,  3.83s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Unterminated string starting at: line 5 column 8 (char 2208)
../data/wikipedia/l.parquet:  75%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Unterminated string starting at: line 5 column 8 (char 2208)
{
  "prompt": "Which of the following is the correct derivation of the logarithm of a product law?",
  "A": "Let b ∈ ℝ₊, b ≠ 1, and let x, y ∈ ℝ₊. We want to relate the expressions log_b(x) and log_b(y). This can be done more easily by rewriting in terms of exponentials, whose properties we already know. Additionally, since we are going to refer to log_b(x) and log_b(y) quite often, we will give them some variable names to make working with them easier: Let m = log_b(x), and let n = log_b(y). Rewriting these as exponentials, we see that m = log_b(x) ⟺ b^m = x and n = log_b(y) ⟺ b^n = y. From here, we can relate b^m (i.e. x) and b^n (i.e. y) using exponent laws as: xy = (b^m)(b^n) = b^m ⋅ b^n = b^{m + n}. To recover the logarithms, we apply log_b to both sides of the equality. log_b(xy) = log_b(b^{m + n}). The right side may be simplified using one of the logarithm properties from before: we know that log_b(b^{m + n}) = m + n,

../data/wikipedia/l.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 216/216 [20:45<00:00,  5.77s/it]
../data/wikipedia/m.parquet:  38%|███████████████████████████████████████████████████████████████████████████████▎                                                                                                                                | 77/202 [05:53<08:25,  4.04s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 3 column 18 (char 156)
{
  "prompt": "What is the multipole expansion of the electrostatic interaction energy between two non-overlapping charge distributions?",
  "A": "U_{AB} = \sum_{i\in A} \sum_{j\in B} \frac{q_i q_j}{4\pi\varepsilon_0 r_{ij}}",
  "B": "U_{AB} = \sum_{L=0}^\infty \sum_{M=-L}^L \, (-1)^M I_L^{-M}(\mathbf{R}_{AB})\; R^M_L( \mathbf{r}_{Ai} - \mathbf{r}_{Bj})",
  "C": "U_{AB} = \frac{1}{4\pi\varepsilon_{0}}\sum_{\ell=0}^{\infty} \sum_{m=-\ell}^{\ell}(-1)^{m} I^{-m}_{\ell}(\mathbf{R}_{AB}) Q^{m}_{\ell}",
  "D": "U_{AB} = \frac{1}{4\pi\varepsilon_{0}}\sum_{\ell=0}^{\infty}\left[\frac{4\pi}{2\ell + 1}\right]^{1/2}\;\frac{1}{R^{\ell + 1}} \sum_{m=-\ell}^{\ell}(-1)^{m} Y^{-m}_{\ell}(\hat{R}_{AB}) Q^{m}_{\ell}",
  "E": "U_{AB} = \frac{q_\mathrm{tot}}{4\pi \varepsilon_0 R_{AB}}",
  "answer": "B"
}


../data/wikipedia/m.parquet:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                      | 149/202 [11:19<03:22,  3.82s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 3 column 19 (char 111)
../data/wikipedia/m.parquet:  74%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 3 column 19 (char 111)
{
  "prompt": "What is the result of Mehler's formula when the variables x and y coincide?",
  "A": "K(x,y;0)= \delta(x-y)",
  "B": "K(x,y;0)= \exp\left(-\coth(2t)~(x^2+y^2)/2 + \operatorname{csch}(2t)~xy\right)",
  "C": "K(x,y;t+t') = \int dy K(x,y;t) K(y,z;t')",
  "D": "K(x,y;0)= \frac{1}{\sqrt{(1-\rho^2)}} \exp\left({4xy\rho - (1+\rho^2)(x^2+y^2)\over 2(1-\rho^2)}\right)",
  "E": "K(x,y;0)= \sum_{n\ge 0} \frac {(\rho/2)^n}{n!} H_n(x) H_n(y) \exp(-(x^2+y^2)/2)",
  "answer": "A"
}


../data/wikipedia/m.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 202/202 [15:09<00:00,  4.50s/it]
../data/wikipedia/n.parquet:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                                      | 62/106 [04:21<03:14,  4.41s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 44, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_21932/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/opena

This model's maximum context length is 4097 tokens. However, you requested 4707 tokens (3707 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{
  "prompt": "According to the semantic account of computation, what additional constraint must a system meet in order to be considered a computation?",
  "A": "The system must manipulate representations with semantic content.",
  "B": "The system must have an indefinite number of computational descriptions.",
  "C": "The system must be observer-related.",
  "D": "The system must be able to compute an indefinite number of tasks.",
  "E": "The system must be able to appeal to the evolutionary history of the system.",
  "answer": "A"
}


../data/wikipedia/n.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 106/106 [07:26<00:00,  4.22s/it]
../data/wikipedia/number.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [01:18<00:00,  3.73s/it]
../data/wikipedia/o.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [04:42<00:00,  4.48s/it]
../data/wikipedia/other.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which of the following is a limitation of perturbation theory?",
  "A": "Large perturbations",
  "B": "Non-adiabatic states",
  "C": "Difficult computations",
  "D": "Time-independent perturbation theory",
  "E": "First order corrections",
  "answer": "B"
}


../data/wikipedia/p.parquet:  80%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                        | 163/203 [17:42<13:38, 20.47s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 7 column 40 (char 499)
../data/wikipedia/p.parquet:  81%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 7 column 40 (char 499)
{
  "prompt": "Which of the following statements about parity transformation is true?",
  "A": "Parity transformation is a test for chirality of a physical phenomenon.",
  "B": "All fundamental interactions of elementary particles are symmetric under parity.",
  "C": "A matrix representation of parity has a determinant equal to 1.",
  "D": "In quantum mechanics, wave functions that are unchanged by a parity transformation are described as odd functions.",
  "E": "Parity forms the abelian group \mathbb{Z}_2.",
  "answer": "C"
}


../data/wikipedia/p.parquet:  83%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                  | 169/203 [18:10<03:41,  6.51s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 3 column 20 (char 157)
../data/wikipedia/p.parquet:  84%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 3 column 20 (char 157)
{
  "prompt": "What is the equation for the lower stability line of water in the Pourbaix diagram at standard temperature and pressure?",
  "A": "E_h = - V_T\lambda\,\ce{pH}",
  "B": "E_h = {E^\circ} - \frac{V_T \lambda}{2} \left ( \log\ \ce{[Fe^{2+}]^2} + 6 \ \ce{pH}\right)",
  "C": "E_h = {1.0826} - {0.1775} \ {pH} \quad (\text{in volts})",
  "D": "E_h = 1.229 V - 0.05916\,\ce{pH}",
  "E": "E_h = {E^\circ} - \frac{V_T \lambda}{2} \left ( \log\left ( \frac\ce{[Fe^{2+}]^2[H2O]^3}\ce{[Fe2O3]} \right ) + 6 \ \ce{pH}\right)",
  "answer": "A"
}


../data/wikipedia/p.parquet:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋             | 190/203 [19:52<00:59,  4.56s/it]Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/opt/conda/lib/python3.10/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/opt/conda/lib/python3.10/http/client.py", line 1375, in getresponse
    response.begin()
  File "/opt/conda/lib/python3.10/http/client.py", line 318, in begin
    version, status, reason = self._read_status()
  File "/opt/co

Error communicating with OpenAI: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
{
  "prompt": "Which class of pseudoenzyme acts as a scaffold for assembly of signalling complexes?",
  "A": "Pseudokinase",
  "B": "Pseudophosphatase",
  "C": "Pseudoprotease",
  "D": "Pseudoligase (pseudo-Ubiquitin E3)",
  "E": "Pseudonuclease",
  "answer": "A"
}


../data/wikipedia/p.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 203/203 [26:12<00:00,  7.75s/it]
../data/wikipedia/q.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [01:33<00:00,  4.90s/it]
../data/wikipedia/r.parquet:  60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                    | 75/126 [05:25<03:09,  3.72s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 44, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_21932/72703211.py"

This model's maximum context length is 4097 tokens. However, you requested 4669 tokens (3669 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{
  "prompt": "What is McKay's area of expertise?",
  "A": "Physics",
  "B": "Mechanical Engineering",
  "C": "Astrophysics",
  "D": "Naqahdah generator technology",
  "E": "Ancient technology",
  "answer": "C"
}


../data/wikipedia/r.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 126/126 [09:07<00:00,  4.34s/it]
../data/wikipedia/s.parquet:   4%|████████▎                                                                                                                                                                                                       | 10/250 [00:45<20:07,  5.03s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 4 column 27 (char 162)
{
  "prompt": "What is the stopping rule in sequential probability ratio testing (SPRT)?",
  "A": "Continue monitoring if a < S_i < b",
  "B": "Accept H_1 if S_i \geq b",
  "C": "Accept H_0 if S_i \leq a",
  "D": "Both B and C",
  "E": "None of the above",
  "answer": "D"
}


../data/wikipedia/s.parquet:   5%|█████████▉                                                                                                                                                                                                      | 12/250 [00:55<20:13,  5.10s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 44, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_21932/72703211.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)
 

This model's maximum context length is 4097 tokens. However, you requested 4685 tokens (3685 in the messages, 1000 in the completion). Please reduce the length of the messages or completion.
{
  "prompt": "Which of the following is true about energy density?",
  "A": "Energy density is commonly measured in kilojoules (kJ) or megajoules (MJ).",
  "B": "Proteins have higher energy densities than fats.",
  "C": "Water content is not a factor in computing energy density.",
  "D": "Foods with high energy density have less than three calories per gram.",
  "E": "Fiber or sugar alcohols increase the energy density of foods.",
  "answer": "A"
}


../data/wikipedia/s.parquet:  51%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                     | 127/250 [09:31<08:36,  4.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 8 column 3 (char 776)
../data/wikipedia/s.parquet:  51%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                   

Expecting ',' delimiter: line 8 column 3 (char 776)
{
  "prompt": "In classical genetics, a mating experiment called a reciprocal cross is performed to test if an animal's trait is sex-linked. Which of the following is true about X-linked dominant inheritance?",
  "A": "All offspring of a carrier female have a 50% chance of inheriting the mutation if the father does not carry the recessive allele.",
  "B": "100% of the daughters will be affected if only the father is affected.",
  "C": "Males have only a single X chromosome and therefore have only one copy of X-linked genes.",
  "D": "Females possessing one X-linked recessive mutation are considered carriers and will generally not manifest clinical symptoms of the disorder.",
  "E": "There are fewer X-linked dominant conditions than X-linked recessive conditions."
  "answer": "B"
}


../data/wikipedia/s.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 250/250 [18:40<00:00,  4.48s/it]
../data/wikipedia/t.parquet:  29%|███████████████████████████████████████████████████████████▍                                                                                                                                                    | 40/140 [02:42<06:58,  4.19s/it]

Invalid \escape: line 4 column 89 (char 292)
{
  "prompt": "Which of the following statements is true about two-step M-estimators?",
  "A": "The asymptotic variance of a two-step M-estimator is generally the same as that of the usual M-estimator.",
  "B": "The identification condition for two-step M-estimators is that E[m(W_{1},\theta,\gamma^*)] has multiple maximizers over \Theta.",
  "C": "Two-step M-estimators have asymptotic normality under regularity conditions.",
  "D": "The asymptotic variance of a two-step M-estimator is always smaller than that of the usual M-estimator.",
  "E": "Two-step M-estimators are not consistent for large sample sizes.",
  "answer": "C"
}


Traceback (most recent call last):
  File "/tmp/ipykernel_21932/598235146.py", line 45, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 4 column 89 (char 292)
../data/wikipedia/t.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 140/140 [10:16<00:00,  4.40s/it]
../data/wikipedia/u.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 3 column 12 (char 176)
{
  "prompt": "Which of the following components of the linearized Vasiliev equations describes the self-dual and anti self-dual components of the Maxwell tensor?",
  "A": "C_{\alpha\beta}",
  "B": "C_{\dot{\alpha}\dot{\beta}}",
  "C": "C_{\alpha\beta\gamma\delta}",
  "D": "C_{\dot{\alpha}\dot{\beta}\dot{\gamma}\dot{\delta}}",
  "E": "C_{\alpha_1...\alpha_{2s}}",
  "answer": "A"
}


../data/wikipedia/v.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [03:35<00:00,  4.68s/it]
../data/wikipedia/w.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [04:25<00:00,  4.42s/it]
../data/wikipedia/x.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:21<00:00,  5.33s/it]
../data/wikipedia/y.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

In [24]:
df_texts = pd.DataFrame(texts)
df_texts = df_texts.apply(f, axis=1)

df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")