In [1]:
import openai
import pandas as pd
import tqdm
import glob

import warnings
warnings.filterwarnings("ignore")

In [2]:
with open("../apikey/apikey.txt", "r") as f:
    openai.api_key = f.readline().replace("\n", "")

In [3]:
def query_prompt(prompt, max_tokens=5000):
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo-16k",
        messages=[{"role": "user", "content": prompt}],
        max_tokens=max_tokens,
        temperature=0,
    )
    return response["choices"][0]["message"]["content"]

In [4]:
def get_df(fname):
    def f(categories):
        for cat in categories:
            for word in [
                "geology",
                "science",
                "physics",
                "chemistry",
                "mathematical",
                "biology",
                "astronomy",
                "ecology",
                "genetics",
            ]:
                if word.lower() in cat.lower():
                    return True
            
        return False
    
    def text_preprocess(text):
        return text.replace("===", "\n").replace("==", "\n")

    df = pd.read_parquet(fname)
    df_science = df[df["categories"].apply(f)]
    df_science["text"] = "title: " + df_science["title"] + "\n" + df_science["text"].apply(text_preprocess)
    return df_science.sample(len(df_science)//100)

In [5]:
files = glob.glob("../data/wikipedia/*.parquet")

In [6]:
import time

In [7]:
import pickle

In [8]:
import json

In [9]:
from datetime import datetime as dt
import os

In [10]:
texts = []

In [11]:
import traceback 
batch_size = 1

def make_prompt(series):
    prompt = f"""
# 依頼
You are a professor at a science university and are creating a test for your students.
Using the given text, create a question in which you select the most appropriate statement from the five options in the question text. Also, extract the evidence for your answer.
The output should be an array in json format, with "prompt" as the problem statement, "A," "B," "C," "D," and "E" as choices, "answer" as the answer choice (one of A through E), and "basis" as the rationale. Please make sure that the answer choices are not all the same, e.g., all five answers are A.

# text
## text 1 
title: {series['title']}

{series['text']}


# attention
Please create 7 question.
"""
    return prompt

def f(series):
    if series["A"] != series["A"]:
        if type(series["choices"]) == dict:
            for key in ["A", "B", "C", "D", "E"]:
                series[key] = series["choices"][key]
        elif type(series["choices"] == list):
            for i, key in enumerate(["A", "B", "C", "D", "E"]):
                series[key] = series["choices"][i]
    return series

now_date = dt.now().strftime("%Y%m%d%H%M%S")

first = True
for file in files:
    if os.path.basename(file) in ["all.parquet"]:
        print(f"pass: {file}")
        continue
    df_science = get_df(file)
    
    for i in tqdm.tqdm(range(len(df_science)), desc=file):
        try:
            series = df_science.iloc[i]
            prompt = make_prompt(series)
            text = query_prompt(prompt)
            texts_json = json.loads(text)
            for text_json in texts_json:
                text_json["wiki_id"] = series["id"]
                text_json["original_text"] = series["text"]
                texts.append(text_json)
            if first:
                print(texts_json)
                first = False
        except Exception as e:
            print(e)
            traceback.print_exc()
            print(text)
        if i % 20 == 0:
            df_texts = pd.DataFrame(texts)
            df_texts = df_texts.apply(f, axis=1)

            df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

../data/wikipedia/a.parquet:   1%|██▌                                                                                                                                                                                                               | 1/83 [00:29<40:18, 29.49s/it]

[{'prompt': "What is Allison Arieff's current position?", 'A': 'Editorial Director of Print for the MIT Technology Review', 'B': 'Senior Editor of City Monitor - The New Statesman', 'C': 'Editorial Director for SPUR', 'D': 'Editor-in-chief of Dwell magazine', 'E': 'Senior content lead for IDEO', 'answer': 'A', 'basis': 'The text states that Allison Arieff is the Editorial Director of Print for the MIT Technology Review.', 'wiki_id': '4378675', 'original_text': 'title: Allison Arieff\nAllison Arieff (born October 29, 1966Adam Bluestein, So What Do You Do, Allison Arieff? (Interview), mediabistro, July 29, 2003. Retrieved 2012-03-05.) is the Editorial Director of Print for the MIT Technology Review. An American writer and editor, she was, most recently, Senior Editor of City Monitor - The New Statesman. She was previously editorial director for the urban planning and policy think tank, SPUR (the San Francisco Bay Area Planning and Urban Research Association). From 2007-2020, she wrote a 

../data/wikipedia/a.parquet:  16%|████████████████████████████████▋                                                                                                                                                                                | 13/83 [06:12<34:35, 29.65s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 23 column 42 (char 1089)
../data/wikipedia/a.parquet:  17%|███████████████████████████████████▎                                                                                      

Expecting ',' delimiter: line 23 column 42 (char 1089)
[
  {
    "prompt": "What is an antibody microarray?",
    "A": "A technology used for detecting protein expression from various biofluids",
    "B": "A technology used for detecting DNA sequences",
    "C": "A technology used for detecting chemical compounds",
    "D": "A technology used for detecting tissue samples",
    "E": "A technology used for detecting protein phosphorylation",
    "answer": "A",
    "basis": "In the given text, it is mentioned that an antibody microarray is a specific form of protein microarray used for detecting protein expression from various biofluids including serum, plasma, and cell or tissue lysates."
  },
  {
    "prompt": "Who introduced the concept and methodology of antibody microarrays?",
    "A": "Roger Ekins",
    "B": "Tse Wen Chang",
    "C": "Nancy T. Chang",
    "D": "Ruo-Pan Huang",
    "E": "RayBiotech, Inc.",
    "answer": "Tse Wen Chang",
    "basis": "In the given text, it is mentione

../data/wikipedia/a.parquet:  51%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                       | 42/83 [21:02<20:26, 29.92s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 340, in decode
    raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 2 column 1 (char 444)
../data/wikipedia/a.parquet:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                    | 43/83 [21:34<20:31, 30.78s/it]

Extra data: line 2 column 1 (char 444)
{"prompt": "Which series by Andre Norton features a former Confederate soldier as the protagonist?", "A": "Astra, or Pax", "B": "Beast Master (Hosteen Storm)", "C": "Cycle of Oak, Yew, Ash, and Rowan", "D": "Drew Rennie", "E": "Elvenbane, or the Halfblood Chronicles", "answer": "D", "basis": "The Drew Rennie series features a former Confederate soldier as the protagonist. The other series mentioned in the text do not have this character."}
{"prompt": "Which series by Andre Norton is a collaboration with Mercedes Lackey?", "A": "Astra, or Pax", "B": "Beast Master (Hosteen Storm)", "C": "Cycle of Oak, Yew, Ash, and Rowan", "D": "Drew Rennie", "E": "Elvenbane, or the Halfblood Chronicles", "answer": "E", "basis": "The Elvenbane series is a collaboration between Andre Norton and Mercedes Lackey. The other series mentioned in the text are not collaborations."}
{"prompt": "Which series by Andre Norton is set in a world with three moons?", "A": "Astra, o

../data/wikipedia/a.parquet:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                    | 75/83 [38:18<04:21, 32.66s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 53 column 78 (char 4181)
../data/wikipedia/a.parquet:  92%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 53 column 78 (char 4181)
[
    {
        "prompt": "What is adiabatic accessibility?",
        "A": "The ability to transform a system from one equilibrium state to another without the transfer of energy as heat or transfer of matter",
        "B": "The ability to transform a system from one equilibrium state to another by doing work on the system",
        "C": "The ability to transform a system from one equilibrium state to another by transferring energy as heat",
        "D": "The ability to transform a system from one equilibrium state to another by transferring matter",
        "E": "The ability to transform a system from one equilibrium state to another by transferring both energy as heat and matter",
        "answer": "A",
        "basis": "According to the given text, adiabatic accessibility refers to the ability to transform a system from one equilibrium state to another without the transfer of energy as heat or transfer of matter."
    },
    {
        "

../data/wikipedia/a.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [42:38<00:00, 30.83s/it]


pass: ../data/wikipedia/all.parquet


../data/wikipedia/b.parquet:  49%|██████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                          | 23/47 [11:37<12:51, 32.13s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 40 column 116 (char 1875)
../data/wikipedia/b.parquet:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▋              

Expecting ',' delimiter: line 40 column 116 (char 1875)
[
  {
    "prompt": "What was Bruce H. McCormick's field of study at Cambridge University?",
    "A": "Quantum field theory",
    "B": "Computer science",
    "C": "Physics",
    "D": "Bioengineering",
    "E": "Information visualization",
    "answer": "A",
    "basis": "In the text, it is mentioned that Bruce H. McCormick studied quantum field theory with Professor Paul Dirac at Cambridge University."
  },
  {
    "prompt": "Where did Bruce H. McCormick serve as the head of the electrical engineering and computer science department?",
    "A": "Texas A&M; University",
    "B": "University of Illinois at Urbana-Champaign",
    "C": "MIT",
    "D": "Harvard University",
    "E": "Lawrence Berkeley Laboratory",
    "answer": "B",
    "basis": "According to the text, Bruce H. McCormick served as the head of the electrical engineering and computer science department at the University of Illinois at Urbana-Champaign."
  },
  {
    "pr

../data/wikipedia/b.parquet:  68%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                  | 32/47 [16:26<07:47, 31.13s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 50 column 93 (char 2293)
../data/wikipedia/b.parquet:  70%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 50 column 93 (char 2293)
[
  {
    "prompt": "Barbara Caine is known for her work in which field?",
    "A": "Nineteenth-century studies",
    "B": "Gender studies",
    "C": "Philosophy",
    "D": "Social sciences",
    "E": "Historical inquiry",
    "answer": "B",
    "basis": "The text states that Barbara Caine has written extensively on British and Australian women's history and has established the first Women's Studies Centre in Australia. This indicates her work in the field of gender studies."
  },
  {
    "prompt": "Which of the following organizations is Barbara Caine a Fellow of?",
    "A": "Australian Academy of the Humanities",
    "B": "Academy of the Social Sciences in Australia",
    "C": "British Royal Historical Society",
    "D": "All of the above",
    "E": "None of the above",
    "answer": "D",
    "basis": "The text mentions that Barbara Caine is an elected Fellow of the Australian Academy of the Humanities, the Academy of the Social 

../data/wikipedia/b.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 47/47 [23:57<00:00, 30.59s/it]
../data/wikipedia/c.parquet:  36%|██████████████████████████████████████████████████████████████████████████▍                                                                                                                                      | 26/73 [13:12<22:25, 28.62s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Expecting ',' delimiter: line 60 column 65 (char 4373)
[
  {
    "prompt": "What is craniosacral therapy?",
    "A": "A form of alternative medicine that uses gentle touch to adjust the immovable joints of the skull",
    "B": "A form of alternative medicine that uses gentle touch to feel non-existent rhythmic movements of the skull's bones",
    "C": "A form of alternative medicine that uses gentle touch to manipulate the bones of the skull",
    "D": "A form of alternative medicine that uses gentle touch to detect the rhythmic movement of the cranial bones",
    "E": "A form of alternative medicine that uses gentle touch to relieve stress or tension",
    "answer": "B",
    "basis": "The text states that craniosacral therapy uses gentle touch to feel non-existent rhythmic movements of the skull's bones."
  },
  {
    "prompt": "What is the basis of craniosacral therapy?",
    "A": "The belief that the bones of the skull can be moved by manipulations",
    "B": "The belief that all il

../data/wikipedia/c.parquet:  51%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                                       | 37/73 [19:05<19:46, 32.97s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 10 column 113 (char 440)
../data/wikipedia/c.parquet:  52%|████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                     

Invalid \escape: line 10 column 113 (char 440)
[
    {
        "prompt": "Which of the following characterizations of the exponential function involves the limit of a series?",
        "A": "Characterization 1",
        "B": "Characterization 2",
        "C": "Characterization 3",
        "D": "Characterization 4",
        "E": "Characterization 5",
        "answer": "B",
        "basis": "Characterization 2 defines the exponential function as the value of the infinite series e^x = \sum_{n=0}^\infty {x^n \over n!} = 1 + x + \frac{x^2}{2!} + \frac{x^3}{3!} + \frac{x^4}{4!} + \cdots"
    },
    {
        "prompt": "Which of the following characterizations of the exponential function involves the integral of the function 1/t?",
        "A": "Characterization 1",
        "B": "Characterization 2",
        "C": "Characterization 3",
        "D": "Characterization 4",
        "E": "Characterization 5",
        "answer": "C",
        "basis": "Characterization 3 defines the exponential functi

../data/wikipedia/c.parquet:  60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                                   | 44/73 [22:29<13:57, 28.88s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 60 column 48 (char 1660)
../data/wikipedia/c.parquet:  62%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 60 column 48 (char 1660)
[
  {
    "prompt": "What is the genre of the film Chameleon?",
    "A": "Romantic comedy",
    "B": "Horror",
    "C": "Science fiction action",
    "D": "Drama",
    "E": "Documentary",
    "answer": "C",
    "basis": "The text states that Chameleon is a science fiction action film."
  },
  {
    "prompt": "Who is the main character in Chameleon?",
    "A": "Kam",
    "B": "Ghen",
    "C": "Quinn",
    "D": "Maddy",
    "E": "Mozser",
    "answer": "A",
    "basis": "The text mentions that Bobbie Phillips plays the character Kam."
  },
  {
    "prompt": "When was Chameleon originally aired?",
    "A": "October 22, 1998",
    "B": "November 15, 1998",
    "C": "December 5, 1998",
    "D": "January 10, 1999",
    "E": "February 3, 1999",
    "answer": "A",
    "basis": "The text states that Chameleon was originally aired on October 22, 1998."
  },
  {
    "prompt": "Who wrote the film Chameleon?",
    "A": "Bennett Cohen",
    "B":

../data/wikipedia/c.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 73/73 [36:52<00:00, 30.31s/it]
../data/wikipedia/d.parquet:  63%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                             | 32/51 [15:52<08:51, 28.00s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/d.parquet:  65%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                         | 33/51 [

'str' object does not support item assignment
{"questions": [{"prompt": "Who directed the film Dinosaur?", "A": "Ralph Zondag and Eric Leighton", "B": "Phil Tippett and Paul Verhoeven", "C": "David Womersley", "D": "Thomas G. Smith", "E": "George Scribner", "answer": "A", "basis": "The film was directed by Ralph Zondag and Eric Leighton."}, {"prompt": "What is the main protagonist's name in Dinosaur?", "A": "Aladar", "B": "Kron", "C": "Zini", "D": "Bruton", "E": "Baylene", "answer": "A", "basis": "The main protagonist's name is Aladar."}, {"prompt": "What is the genre of Dinosaur?", "A": "Adventure", "B": "Romance", "C": "Horror", "D": "Comedy", "E": "Documentary", "answer": "A", "basis": "Dinosaur is an adventure film."}, {"prompt": "What is the budget of Dinosaur?", "A": "$127.5 million", "B": "$45 million", "C": "$25 million", "D": "$198 million", "E": "$350 million", "answer": "A", "basis": "Dinosaur had a budget of $127.5 million."}, {"prompt": "When was Dinosaur released?", "A": 

../data/wikipedia/d.parquet:  71%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                             | 36/51 [17:40<06:56, 27.74s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/d.parquet:  73%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                         | 37/51 [18:07<06:25, 27.53s/it]

'str' object does not support item assignment
{"questions": [{"prompt": "What is the term used to describe highly ordered, branched polymeric molecules?", "A": "Arborols", "B": "Cascade molecules", "C": "Dendrimers", "D": "Dendrons", "E": "Polymeric compounds", "answer": "C", "basis": "The text states that dendrimers are highly ordered, branched polymeric molecules."}, {"prompt": "Who made the first dendrimers?", "A": "Fritz Vögtle", "B": "R.G. Denkewalter", "C": "Donald Tomalia", "D": "George R. Newkome", "E": "Craig Hawker", "answer": "A", "basis": "The text states that the first dendrimers were made by Fritz Vögtle."}, {"prompt": "What are the two methods of dendrimer synthesis?", "A": "Divergent synthesis and convergent synthesis", "B": "Click chemistry and convergent synthesis", "C": "Divergent synthesis and click chemistry", "D": "Polymerization and convergent synthesis", "E": "Divergent synthesis and polymerization", "answer": "A", "basis": "The text states that there are two de

../data/wikipedia/d.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51/51 [25:27<00:00, 29.95s/it]
../data/wikipedia/e.parquet:  27%|████████████████████████████████████████████████████████▌                                                                                                                                                        | 13/48 [06:40<19:14, 32.99s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/e.parquet:  29%|████████████████████████████████████████████████████████████▉                                                                                                                                                    | 14/48 [

'str' object does not support item assignment
{"questions": [{"prompt": "What is the definition of engineering?", "A": "The use of scientific principles to design and build machines, structures, and other items", "B": "The study of the human body and its functions", "C": "The application of physics, chemistry, biology, and engineering principles to carry out chemical processes", "D": "The design and construction of public and private works, such as infrastructure and buildings", "E": "The design, study, and manufacture of various electrical and electronic systems", "answer": "A", "basis": "According to the text, engineering is defined as the use of scientific principles to design and build machines, structures, and other items."}, {"prompt": "What is the origin of the term 'engineering'?", "A": "Derived from the Latin ingenium, meaning 'cleverness' and ingeniare, meaning 'to contrive, devise'", "B": "Derived from the word engineer, which dates back to the 14th century", "C": "Derived f

../data/wikipedia/e.parquet:  56%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                           | 27/48 [13:18<10:06, 28.86s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 50 (char 1578)
../data/wikipedia/e.parquet:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉        

Invalid \escape: line 34 column 50 (char 1578)
[
    {
        "prompt": "What are Eisenstein series?",
        "A": "Particular modular forms with infinite series expansions",
        "B": "Polynomials in and ",
        "C": "Theta functions",
        "D": "Modular invariants of an elliptic curve",
        "E": "Holomorphic modular forms for the modular group",
        "answer": "A",
        "basis": "The text states that Eisenstein series are particular modular forms with infinite series expansions."
    },
    {
        "prompt": "What is the key property of Eisenstein series?",
        "A": "Their -invariance",
        "B": "Their convergence",
        "C": "Their Fourier expansion",
        "D": "Their relation to modular invariants",
        "E": "Their relation to theta functions",
        "answer": "A",
        "basis": "The text states that the key property of Eisenstein series is their -invariance."
    },
    {
        "prompt": "What is the relation between the modular inva

../data/wikipedia/e.parquet:  79%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                           | 38/48 [19:51<05:36, 33.65s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 14 column 15 (char 1266)
../data/wikipedia/e.parquet:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 14 column 15 (char 1266)
[
    {
        "prompt": "What is electron cyclotron resonance?",
        "A": "The phenomenon observed when the frequency of incident radiation coincides with the natural frequency of rotation of electrons in magnetic fields.",
        "B": "The phenomenon observed when the frequency of incident radiation coincides with the natural frequency of rotation of ions in magnetic fields.",
        "C": "The phenomenon observed when the frequency of incident radiation coincides with the natural frequency of rotation of protons in magnetic fields.",
        "D": "The phenomenon observed when the frequency of incident radiation coincides with the natural frequency of rotation of neutrons in magnetic fields.",
        "E": "The phenomenon observed when the frequency of incident radiation coincides with the natural frequency of rotation of electrons in electric fields.",
        "answer": "A",
        "basis": "According to the text, electron cyclot

../data/wikipedia/e.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 48/48 [25:10<00:00, 31.48s/it]
../data/wikipedia/f.parquet:  56%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                            | 19/34 [09:17<07:19, 29.32s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 340, in decode
    raise JSONDecodeError("Extra data", s, end)
json.decoder.JSONDecodeError: Extra data: line 1 column 2 (char 1)
../data/wikipedia/

Extra data: line 1 column 2 (char 1)
1. prompt: When was the Faculty of Earth Sciences established?
- A. In the beginning as the Applied Geological Center
- B. In 1970
- C. When it became one of the King Abdulaziz University schools
- D. When it was founded by the Ministry of Petroleum and Mineral Resources
- E. When it became a specialized geology college in the Middle East

answer: B
basis: The text states that the Faculty of Earth Sciences was founded in 1970.

2. prompt: How many geoscience departments does the Faculty of Earth Sciences currently have?
- A. 5
- B. 7
- C. 10
- D. 23
- E. 65

answer: B
basis: The text mentions that the faculty currently has seven geoscience departments.

3. prompt: What degrees does the Faculty of Earth Sciences offer?
- A. Bachelor of Science (B.S.) only
- B. Master of Science (M.S.) only
- C. Doctor of Philosophy (Ph.D.) only
- D. B.S., M.S., and Ph.D. in all departments
- E. B.S., M.S., and Ph.D. in most departments

answer: E
basis: The text stat

../data/wikipedia/f.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [16:23<00:00, 28.94s/it]
../data/wikipedia/g.parquet:  40%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                             | 18/45 [08:35<12:12, 27.14s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 14 column 35 (char 784)
[
    {
        "prompt": "What is the gyration tensor?",
        "A": "A tensor that describes the second moments of position of a collection of particles",
        "B": "A tensor that describes the moment of inertia of a collection of particles",
        "C": "A tensor that describes the distribution of particles in a collection",
        "D": "A tensor that describes the shape of a collection of particles",
        "E": "A tensor that describes the density of particles in a collection",
        "answer": "A",
        "basis": "The gyration tensor is defined as a tensor that describes the second moments of position of a collection of particles."
    },
    {
        "prompt": "What is the mathematical definition of the gyration tensor?",
        "A": "S_{mn} = \frac{1}{N}\sum_{i=1}^{N} r_{m}^{(i)} r_{n}^{(i)}",
        "B": "S_{mn} = \frac{1}{2N^{2}}\sum_{i=1}^{N}\sum_{j=1}^{N} (r_{m}^{(i)} - r_{m}^{(j)}) (r_{n}^{(i)} - r_{n}^{(j)})",
   

../data/wikipedia/g.parquet:  49%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                                                          | 22/45 [10:55<11:43, 30.60s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/g.parquet:  51%|██████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                                      | 23/45 [11:24<10:56, 29.85s/it]

'str' object does not support item assignment
{"questions": [{"prompt": "What is the estimated age range of the sedimentary rock layers in the Grand Canyon?", "A": "200 million to 2 billion years old", "B": "75 million to 6 million years old", "C": "2 million to 500,000 years old", "D": "1.8 billion to 1.6 billion years old", "E": "1.4 billion years ago", "answer": "A", "basis": "The text states that the sedimentary rock layers in the Grand Canyon range in age from about 200 million to nearly 2 billion years old."}, {"prompt": "What event is largely responsible for creating the Rocky Mountains to the east of the Grand Canyon?", "A": "The Laramide orogeny", "B": "The opening of the Gulf of California", "C": "Volcanic activity", "D": "Ice ages", "E": "Human activity", "answer": "A", "basis": "The text states that the uplift of the region that created the Rocky Mountains started about 75 million years ago during the Laramide orogeny."}, {"prompt": "What caused the formation of the ancestr

../data/wikipedia/g.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 45/45 [21:20<00:00, 28.46s/it]
../data/wikipedia/h.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 43/43 [19:44<00:00, 27.55s/it]
../data/wikipedia/i.parquet:  46%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                                 | 16/35 [07:52<08:51, 27.97s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not 

'str' object does not support item assignment
{"questions": [{"prompt": "What international literary prize did Ismail Kadare win in 2005?", "A": "Prix mondial Cino Del Duca", "B": "Herder Prize", "C": "Man Booker International Prize", "D": "Prince of Asturias Award of Arts", "E": "Jerusalem Prize", "answer": "C", "basis": "In 2005, Kadare was awarded the inaugural Man Booker International Prize."}, {"prompt": "In which country did Ismail Kadare seek and receive political asylum in 1990?", "A": "France", "B": "Italy", "C": "Spain", "D": "United Kingdom", "E": "Germany", "answer": "A", "basis": "In October 1990, Kadare sought and received political asylum in France."}, {"prompt": "Which of the following statements about Ismail Kadare's literary works is true?", "A": "Most of his works have been translated into English", "B": "He has written over 100 novels", "C": "His works have been published in 100 languages", "D": "He primarily writes poetry", "E": "His works are only available in Alb

Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/i.parquet:  51%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                     | 18/35 [08:54<08:30, 30.00s/it]

'str' object does not support item assignment
{"questions": [{"prompt": "What is the premise of the game 'I Have No Mouth, and I Must Scream'?", "A": "The game takes place in a dystopian world where a mastermind artificial intelligence named 'AM' has destroyed all of humanity except for five people.", "B": "The game is based on a short story by Harlan Ellison and takes place in a post-apocalyptic world.", "C": "The game follows the adventures of five characters who must overcome their fatal flaws.", "D": "The game is set in a concentration camp during World War II.", "E": "The game is about a group of scientists who create a supercomputer that gains sentience and turns against humanity.", "answer": "A", "basis": "The text states that the game takes place in a dystopian world where a mastermind artificial intelligence named 'AM' has destroyed all of humanity except for five people."}, {"prompt": "What is the gameplay like in 'I Have No Mouth, and I Must Scream'?", "A": "The game is a fi

../data/wikipedia/i.parquet:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 33/35 [16:31<00:58, 29.18s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/i.parquet:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████      | 34/35 [16:55<00:27, 27.45s/it]

'str' object does not support item assignment
{"questions": [{"prompt": "Who directed Iron Man 2?", "A": "Jon Favreau", "B": "Justin Theroux", "C": "Robert Downey Jr.", "D": "Gwyneth Paltrow", "E": "Don Cheadle", "answer": "A", "basis": "The film was directed by Jon Favreau."}, {"prompt": "Who plays the role of Tony Stark / Iron Man in Iron Man 2?", "A": "Jon Favreau", "B": "Justin Theroux", "C": "Robert Downey Jr.", "D": "Gwyneth Paltrow", "E": "Don Cheadle", "answer": "C", "basis": "Robert Downey Jr. plays the role of Tony Stark / Iron Man in Iron Man 2."}, {"prompt": "Who plays the role of Pepper Potts in Iron Man 2?", "A": "Jon Favreau", "B": "Justin Theroux", "C": "Robert Downey Jr.", "D": "Gwyneth Paltrow", "E": "Don Cheadle", "answer": "D", "basis": "Gwyneth Paltrow plays the role of Pepper Potts in Iron Man 2."}, {"prompt": "Who plays the role of James Rhodes / War Machine in Iron Man 2?", "A": "Jon Favreau", "B": "Justin Theroux", "C": "Robert Downey Jr.", "D": "Gwyneth Paltro

../data/wikipedia/i.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [17:24<00:00, 29.84s/it]
../data/wikipedia/j.parquet:  17%|███████████████████████████████████▉                                                                                                                                                                             | 11/64 [04:46<22:50, 25.86s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/j.parquet:  19%|███████████████████████████████████████▏                                                                                                                                                                         | 12/64 [

'str' object does not support item assignment
{"questions": [{"prompt": "What is the main focus of James Tully's approach to the study and teaching of politics?", "A": "Renewing and transforming public philosophy", "B": "The politics of cultural recognition", "C": "Practices of civic freedom and global citizenship", "D": "The transformative power of nonviolence", "E": "Sustainability and Gaia citizenship", "answer": "C", "basis": "In the text, it is mentioned that Tully's approach to the study and teaching of politics is a form of historical and critical reflection on problems of political practice in the present. It is an attempt to renew and transform the tradition of public philosophy so it can effectively address the pressing political problems of our age in a genuinely democratic way. This approach is described as practices of civic freedom and global citizenship."}, {"prompt": "What is the main argument of Tully's book 'Public Philosophy in a New Key'?", "A": "The importance of r

../data/wikipedia/j.parquet:  36%|███████████████████████████████████████████████████████████████████████████                                                                                                                                      | 23/64 [10:16<17:54, 26.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_2063/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)


This model's maximum context length is 16385 tokens. However, you requested 20267 tokens (15267 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
  {
    "prompt": "What was Jerzy Maj's profession?",
    "A": "Pharmacologist",
    "B": "Professor of medical sciences",
    "C": "Member of the Polish Academy of Sciences",
    "D": "Director of the Institute of Pharmacology",
    "E": "All of the above",
    "answer": "E",
    "basis": "The text states that Jerzy Maj was a Polish pharmacologist, professor of medical sciences, member of the Polish Academy of Sciences, and director of the Institute of Pharmacology."
  },
  {
    "prompt": "Where did Jerzy Maj graduate from?",
    "A": "Jagiellonian University",
    "B": "Medical Academy of Lublin",
    "C": "Polish Academy of Sciences",
    "D": "Polish Academy of Arts and Sciences",
    "E": "None of the above",
    "answer": "A",
    "basis": "The text mentions that Jerzy Maj graduated in 

../data/wikipedia/j.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [26:42<00:00, 25.03s/it]
../data/wikipedia/k.parquet:  54%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                | 14/26 [05:49<04:49, 24.12s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 24 column 17 (char 1260)
[
    {
        "prompt": "What is the Kepler–Bouwkamp constant?",
        "A": "The radius of the limiting circle obtained by inscribing polygons and circles in a sequence",
        "B": "The decimal expansion of the constant is 0.1149420448",
        "C": "The natural logarithm of the constant is given by a series involving the Riemann zeta function",
        "D": "The constant is obtained by taking the product of cosines of certain angles",
        "E": "The constant is the inverse of the polygon circumscribing constant",
        "answer": "A",
        "basis": "The text states that the Kepler–Bouwkamp constant is obtained as the radius of the limiting circle obtained by inscribing polygons and circles in a sequence."
    },
    {
        "prompt": "How is the decimal expansion of the Kepler–Bouwkamp constant represented?",
        "A": "0.1149420448",
        "B": "0.312832",
        "C": "0.1149420448...",
        "D": "0.312832...",


../data/wikipedia/k.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [11:02<00:00, 25.50s/it]
../data/wikipedia/l.parquet:  84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                 | 42/50 [18:17<03:19, 24.98s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/l.parquet:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                             | 43/50 [

'str' object does not support item assignment
{"questions": [{"prompt": "When did the Advanced LIGO Project begin?", "A": "2002", "B": "2008", "C": "2015", "D": "2016", "E": "2019", "answer": "B", "basis": "The Advanced LIGO Project began in 2008."}, {"prompt": "Which observatory houses one laser interferometer in the primary configuration?", "A": "LIGO Livingston Observatory", "B": "LIGO Hanford Observatory", "C": "GEO600", "D": "Virgo interferometer", "E": "Holometer", "answer": "A", "basis": "The LIGO Livingston Observatory houses one laser interferometer in the primary configuration."}, {"prompt": "What was the first detection of gravitational waves by LIGO called?", "A": "GW150914", "B": "GW151226", "C": "GW170104", "D": "GW170608", "E": "GW170817", "answer": "A", "basis": "The first detection of gravitational waves by LIGO was called GW150914."}, {"prompt": "Which run of LIGO made the first detection of a merger of a neutron star with a black hole?", "A": "O1", "B": "O2", "C": "O

../data/wikipedia/l.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [21:22<00:00, 25.65s/it]
../data/wikipedia/m.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [33:24<00:00, 25.06s/it]
../data/wikipedia/n.parquet:  25%|████████████████████████████████████████████████████▌                                                                                                                                                             | 9/36 [03:57<12:43, 28.29s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not 

'str' object does not support item assignment
{"questions": [{"prompt": "What was Nikolai Bukharin's political stance within the Bolshevik Party?", "A": "Left communist", "B": "Right communist", "C": "Centrist", "D": "Social democrat", "E": "Anarchist", "answer": "B", "basis": "Within the Bolshevik Party, Bukharin was initially a left communist, but gradually moved to the right from 1921."}, {"prompt": "What theory did Bukharin formulate that argued socialism could be developed in a single country?", "A": "Permanent Revolution", "B": "World Revolution", "C": "Socialism in One Country", "D": "Proletarian Dictatorship", "E": "Class Struggle", "answer": "C", "basis": "Bukharin formulated the theory of 'Socialism in One Country' put forth by Stalin in 1924."}, {"prompt": "What was Bukharin's primary contribution to economics?", "A": "Critique of marginal utility theory", "B": "Analysis of imperialism", "C": "Writings on the transition to communism", "D": "All of the above", "E": "None of t

../data/wikipedia/n.parquet:  50%|████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                        | 18/36 [07:40<08:04, 26.92s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/n.parquet:  53%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                                  | 19/36 [08:01<07:04, 24.98s/it]

'str' object does not support item assignment
{"questions": [{"prompt": "What is a negative-index metamaterial?", "A": "A metamaterial with a negative refractive index", "B": "A material with negative values for permittivity and permeability", "C": "A material that exhibits optical properties opposite to those of glass", "D": "A material made from wires and dielectrics", "E": "A material that can manipulate electromagnetic radiation"}, {"prompt": "What are unit cells in negative-index metamaterials?", "A": "Periodic basic parts that are significantly smaller than the wavelength of the radiation", "B": "Stacked or planar cells made from wires and dielectrics", "C": "Cells that are individually tuned to respond in a certain way", "D": "Cells that determine the aggregate response of the material", "E": "Cells that are made from circuit board material"}, {"prompt": "What determines the response of a negative-index metamaterial?", "A": "The geometry of each unit cell", "B": "The refractive 

../data/wikipedia/n.parquet:  78%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                              | 28/36 [11:41<03:13, 24.14s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 18 (char 1468)
../data/wikipedia/n.parquet:  81%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 34 column 18 (char 1468)
[
  {
    "prompt": "What is the NK model?",
    "A": "A mathematical model used in evolutionary biology",
    "B": "A mathematical model used in physics",
    "C": "A mathematical model used in computer science",
    "D": "A mathematical model used in chemistry",
    "E": "A mathematical model used in sociology",
    "answer": "A",
    "basis": "The text states that the NK model has found application in a wide variety of fields, including the theoretical study of evolutionary biology."
  },
  {
    "prompt": "What are the two parameters that can be adjusted in the NK model?",
    "A": "N and K",
    "B": "N and M",
    "C": "K and M",
    "D": "K and L",
    "E": "N and L",
    "answer": "A",
    "basis": "The text mentions that the overall size of the landscape and the number of its local 'hills and valleys' can be adjusted via changes to its two parameters, N and K."
  },
  {
    "prompt": "What does K represent in the NK model?",
    "

../data/wikipedia/n.parquet:  97%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏     | 35/36 [14:40<00:24, 24.21s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 64 column 27 (char 2399)
../data/wikipedia/n.parquet: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Expecting ',' delimiter: line 64 column 27 (char 2399)
[
  {
    "prompt": "Where did Nathan Isgur receive his Ph.D. degree?",
    "A": "University of California, Berkeley",
    "B": "University of Toronto",
    "C": "Caltech",
    "D": "University of Guelph",
    "E": "University of Houston",
    "answer": "B",
    "basis": "According to the text, Nathan Isgur received his Ph.D. degree from the University of Toronto."
  },
  {
    "prompt": "Why did Nathan Isgur become a Canadian citizen?",
    "A": "To pursue his graduate studies",
    "B": "To avoid serving in a war",
    "C": "To travel to the United States",
    "D": "To receive a scholarship",
    "E": "To work as a professor",
    "answer": "B",
    "basis": "The text states that Nathan Isgur became a Canadian citizen due to his inability to travel and his position as a war resister."
  },
  {
    "prompt": "In which field did Nathan Isgur work?",
    "A": "Biology",
    "B": "Chemistry",
    "C": "Physics",
    "D": "Mathematic


../data/wikipedia/number.parquet:  82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                     | 9/11 [03:45<00:52, 26.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 46, in <module>
    text = query_prompt(prompt)
  File "/tmp/ipykernel_2063/4006552839.py", line 2, in query_prompt
    response = openai.ChatCompletion.create(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/chat_completion.py", line 25, in create
    return super().create(*args, **kwargs)
  File "/opt/conda/lib/python3.10/site-packages/openai/api_resources/abstract/engine_api_resource.py", line 153, in create
    response, _, api_key = requestor.request(
  File "/opt/conda/lib/python3.10/site-packages/openai/api_requestor.py", line 298, in request
    resp, got_stream = self._interpret_response(result, stream)

This model's maximum context length is 16385 tokens. However, you requested 19416 tokens (14416 in the messages, 5000 in the completion). Please reduce the length of the messages or completion.
[
    {
        "prompt": "In 1660, a group of twelve men, including Christopher Wren and Robert Boyle, resolved to found a college for the promoting of physico-mathematical experimental learning. What is the name of this college?",
        "A": "Gresham College",
        "B": "Cambridge University",
        "C": "Royal Society",
        "D": "London College",
        "E": "Oxford University",
        "answer": "C",
        "basis": "The text states that the group of twelve men resolved to found a college for the promoting of physico-mathematical experimental learning, which later became the Royal Society."
    },
    {
        "prompt": "Who published the first flora of an English county in 1660?",
        "A": "John Ray",
        "B": "Christopher Wren",
        "C": "Robert Boyle",
        "D

../data/wikipedia/number.parquet: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [04:13<00:00, 23.02s/it]
../data/wikipedia/o.parquet:  89%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                       | 16/18 [07:04<00:49, 24.96s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Invalid \escape: line 30 column 153 (char 1684)
[
  {
    "prompt": "What is the main difference between optical molasses and a MOT?",
    "A": "Optical molasses uses circularly polarized laser beams, while a MOT uses linearly polarized laser beams.",
    "B": "Optical molasses uses circularly polarized laser beams, while a MOT uses circularly polarized magnetic fields.",
    "C": "Optical molasses provides cooling and trapping, while a MOT provides only cooling.",
    "D": "Optical molasses provides only cooling, while a MOT provides cooling and trapping.",
    "E": "Optical molasses uses linearly polarized laser beams, while a MOT uses circularly polarized laser beams.",
    "answer": "D",
    "basis": "The text states that the main difference between optical molasses and a MOT is the absence of a magnetic field in optical molasses. Therefore, optical molasses provides only cooling and no trapping, while a MOT provides both cooling and trapping."
  },
  {
    "prompt": "What is the t

../data/wikipedia/o.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [07:56<00:00, 26.45s/it]
../data/wikipedia/other.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:40<00:00, 20.40s/it]
../data/wikipedia/p.parquet:   3%|███████                                                                                                                                                                                                           | 2/59 [00:47<23:12, 24.43s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not 

'str' object does not support item assignment
{"prompt": "What is the definition of a parity transformation in physics?", "A": "The flip in the sign of one spatial coordinate", "B": "The simultaneous flip in the sign of all three spatial coordinates", "C": "The rotation of a physical phenomenon into its mirror image", "D": "The transformation of a phenomenon into an even function", "E": "The transformation of a phenomenon into an odd function", "answer": "B", "basis": "In the given text, it is stated that a parity transformation in three dimensions refers to the simultaneous flip in the sign of all three spatial coordinates, which is option B."}


../data/wikipedia/p.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [22:57<00:00, 23.35s/it]
../data/wikipedia/q.parquet:   0%|                                                                                                                                                                                                                           | 0/3 [00:00<?, ?it/s]

'str' object does not support item assignment
{"questions": [{"prompt": "When was Queen's University at Kingston established?", "A": "1841", "B": "1869", "C": "1883", "D": "1912", "E": "1957", "answer": "A", "basis": "Queen's University at Kingston was established in 1841 via a royal charter from Queen Victoria."}, {"prompt": "Which university served as a model for Queen's University at Kingston?", "A": "University of Edinburgh", "B": "University of Glasgow", "C": "University of Toronto", "D": "McGill University", "E": "University of Western Ontario", "answer": "A", "basis": "Queen's University at Kingston was modelled after the University of Edinburgh and the University of Glasgow."}, {"prompt": "When did Queen's University at Kingston admit women?", "A": "1869", "B": "1880", "C": "1883", "D": "1912", "E": "1957", "answer": "A", "basis": "Queen's University at Kingston admitted women in 1869, making it the first Canadian university west of the Maritime provinces to do so."}, {"prompt"

Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 49, in <module>
    text_json["wiki_id"] = series["id"]
TypeError: 'str' object does not support item assignment
../data/wikipedia/q.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [01:12<00:00, 24.14s/it]
../data/wikipedia/r.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 53/53 [20:53<00:00, 23.66s/it]
../data/wikipedia/s.parquet:  18%|█████████████████████████████████████▊                                                                                                                                                                           | 17/94 

Expecting ',' delimiter: line 70 column 39 (char 3060)
[
  {
    "prompt": "What is the Simulated Fluorescence Process (SFP) used for?",
    "A": "Scientific visualization of 3D data from fluorescence microscopes",
    "B": "Modeling physical light/matter interaction process",
    "C": "Computing excitation light and emission light",
    "D": "Creating virtual light sources",
    "E": "Rendering computer graphics",
    "answer": "A",
    "basis": "The text states that the SFP is a computing algorithm used for scientific visualization of 3D data from fluorescence microscopes."
  },
  {
    "prompt": "What does the SFP algorithm consider in its computation?",
    "A": "Virtual light sources",
    "B": "Excitation light and emission light",
    "C": "Physical light/matter interaction process",
    "D": "Shadows on objects",
    "E": "Eye of the viewer",
    "answer": "B",
    "basis": "The text mentions that the algorithm considers a virtual light source producing excitation light that il

../data/wikipedia/s.parquet:  54%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                               | 51/94 [20:26<15:37, 21.80s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 34 column 17 (char 1313)
../data/wikipedia/s.parquet:  55%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              

Invalid \escape: line 34 column 17 (char 1313)
[
  {
    "prompt": "What is the Szeged index?",
    "A": "A topological index of a molecule used in biochemistry",
    "B": "A measure of network structure in information theory",
    "C": "A concept introduced by Harry Wiener",
    "D": "A mathematical formula for calculating the Szeged index",
    "E": "A correlation with biological and physicochemical properties",
    "answer": "A",
    "basis": "The text states that the Szeged index is a topological index of a molecule used in biochemistry."
  },
  {
    "prompt": "Who introduced the Szeged index?",
    "A": "Iván Gutman",
    "B": "Harry Wiener",
    "C": "Dendrimer Nanostar",
    "D": "Chemical graph theory",
    "E": "Information theory",
    "answer": "A",
    "basis": "The text states that the Szeged index was introduced by Iván Gutman."
  },
  {
    "prompt": "What does the Szeged index generalize?",
    "A": "The concept of the Wiener index",
    "B": "The concept of a connecte

../data/wikipedia/s.parquet:  66%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                       | 62/94 [24:52<12:22, 23.22s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 5 column 35 (char 191)
../data/wikipedia/s.parquet:  67%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 5 column 35 (char 191)
[
    {
        "prompt": "What is the stopping rule in the sequential probability ratio test (SPRT)?",
        "A": "Continue monitoring when a < S_i < b",
        "B": "Accept H_1 when S_i \geq b",
        "C": "Accept H_0 when S_i \leq a",
        "D": "Accept H_1 when S_i \leq a",
        "E": "Accept H_0 when S_i \geq b",
        "answer": "A",
        "basis": "The stopping rule in the sequential probability ratio test (SPRT) is to continue monitoring when a < S_i < b."
    },
    {
        "prompt": "What are the thresholds a and b in the SPRT?",
        "A": "a \approx \log \frac{ \beta }{1-\alpha} and b \approx \log \frac{1-\beta}{\alpha}",
        "B": "a \approx \log \frac{1-\beta}{\alpha} and b \approx \log \frac{ \beta }{1-\alpha}",
        "C": "a \approx \log \frac{ \alpha }{1-\beta} and b \approx \log \frac{1-\alpha}{\beta}",
        "D": "a \approx \log \frac{1-\alpha}{\beta} and b \approx \log \frac{ \alpha }{1-\beta}",
   

../data/wikipedia/s.parquet:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌    | 92/94 [37:05<00:51, 25.81s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Invalid \escape: line 24 column 11 (char 1250)
../data/wikipedia/s.parquet:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████

Invalid \escape: line 24 column 11 (char 1250)
[
  {
    "prompt": "What is the softmax function?",
    "A": "A function that converts a vector of real numbers into a probability distribution",
    "B": "A function that calculates the maximum value in a vector",
    "C": "A function that normalizes the output of a neural network",
    "D": "A function that calculates the sum of a vector",
    "E": "A function that calculates the average of a vector",
    "answer": "A",
    "basis": "The text states that the softmax function converts a vector of real numbers into a probability distribution of possible outcomes."
  },
  {
    "prompt": "What is the purpose of the softmax function in a neural network?",
    "A": "To calculate the maximum value in the output",
    "B": "To normalize the output to a probability distribution",
    "C": "To calculate the sum of the output",
    "D": "To calculate the average of the output",
    "E": "To calculate the minimum value in the output",
    "answer"

../data/wikipedia/s.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [37:55<00:00, 24.21s/it]
../data/wikipedia/t.parquet:   0%|                                                                                                                                                                                                                          | 0/78 [00:00<?, ?it/s]

Expecting ',' delimiter: line 40 column 218 (char 2625)
[
    {
        "prompt": "What is the aim of tree alignment?",
        "A": "To find an assigned sequence that can obtain a maximum score",
        "B": "To transform multiple sequence alignment into pair sequence alignment",
        "C": "To establish a failure link in the keyword tree",
        "D": "To find the locations of all P_i in a given long string",
        "E": "To align trees into a graph and synthesize them",
        "answer": "B",
        "basis": "The aim of tree alignment is to transform multiple sequence alignment into pair sequence alignment, as mentioned in the text: 'The idea of combinatorial optimization strategy is to transform the multiple sequence alignment into pair sequence alignment to solve this problem.'"
    },
    {
        "prompt": "What is the time complexity of the Aho-Corasick search algorithm?",
        "A": "O(m+n+k)",
        "B": "O(|u|×|v|)",
        "C": "O(m+k)",
        "D": "O(|u|+|v|)

Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 40 column 218 (char 2625)
../data/wikipedia/t.parquet:  67%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                     | 52/78 [18:39<08:45, 20.20s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/li

Expecting ',' delimiter: line 10 column 134 (char 464)
[
  {
    "prompt": "What is a Tipler cylinder?",
    "A": "A hypothetical object that allows time travel",
    "B": "A rotating cylinder that warps spacetime",
    "C": "A visual novel that mentions time travel",
    "D": "A device used in Star Trek Online",
    "E": "A plot device in Chuck Grossart's story",
    "answer": "B",
    "basis": "The text states that a Tipler cylinder is a rotating cylinder that warps spacetime: 'Tipler showed in his 1974 paper, "Rotating Cylinders and the Possibility of Global Causality Violation" that in a spacetime containing a "...sufficiently large rotating cylinder..." which was spinning along its longitudinal axis, the cylinder should create a frame-dragging effect. This frame-dragging effect warps spacetime in such a way that the light cones of objects in the cylinder's proximity become tilted, so that part of the light cone then points backwards along the time axis on a spacetime diagram.'"
  

../data/wikipedia/t.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [28:33<00:00, 21.97s/it]
../data/wikipedia/u.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [03:39<00:00, 24.43s/it]
../data/wikipedia/v.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [06:48<00:00, 22.70s/it]
../data/wikipedia/w.parquet:  54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                          

Expecting ',' delimiter: line 63 column 49 (char 3248)
[
  {
    "prompt": "What is the main theme of the novel Wild Jack?",
    "A": "The struggle for survival in a post-apocalyptic world",
    "B": "The importance of friendship and loyalty",
    "C": "The corrupting influence of power",
    "D": "The clash between civilization and the wilderness",
    "E": "The consequences of questioning societal norms",
    "answer": "D",
    "basis": "The text mentions that the novel explores the dichotomy between the city and the wild, with the protagonist experiencing true freedom in the wilderness. This suggests that the main theme of the novel is the clash between civilization and the wilderness."
  },
  {
    "prompt": "What is the reason for Clive's imprisonment on the prison island?",
    "A": "He is falsely accused of questioning the status quo",
    "B": "He is caught listening in on a conversation",
    "C": "He is accused of organizing a rebellion against the authorities",
    "D": "He 

../data/wikipedia/w.parquet: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [10:02<00:00, 21.52s/it]
../data/wikipedia/x.parquet:  50%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                         | 1/2 [00:19<00:19, 19.39s/it]Traceback (most recent call last):
  File "/tmp/ipykernel_2063/1584694044.py", line 47, in <module>
    texts_json = json.loads(text)
  File "/opt/conda/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/opt/conda/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/opt/conda/lib/python3.10/json/decoder.py", line 353, in raw_decode
   

Expecting ',' delimiter: line 53 column 29 (char 2351)
[
  {
    "prompt": "What is the title of the third book in the Ender's Game series?",
    "A": "Xenocide",
    "B": "Speaker for the Dead",
    "C": "Children of the Mind",
    "D": "Ender's Game",
    "E": "Gloriously Bright",
    "answer": "A",
    "basis": "The text states that Xenocide is the third book in the Ender's Game series."
  },
  {
    "prompt": "What is the meaning of the title 'Xenocide'?",
    "A": "The act of killing populations of aliens",
    "B": "The act of killing humans infected with the descolada virus",
    "C": "The act of destroying the entire planet of Lusitania",
    "D": "The act of killing the pequeninos",
    "E": "The act of killing the Hive Queen",
    "answer": "A",
    "basis": "The text explains that the title 'Xenocide' is a combination of 'xeno-', meaning alien, and '-cide', referring to the act of killing, together meaning the act of killing populations of aliens."
  },
  {
    "prompt": "Wh


../data/wikipedia/y.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [02:30<00:00, 21.45s/it]
../data/wikipedia/z.parquet: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [01:29<00:00, 17.86s/it]


In [14]:
df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

In [23]:
for i in range(10):
    series = df_texts.sample(1).iloc[0]
    print(f"{series['prompt']} \n text: \n {series['original_text'].split('==')[0]}")
    print("----------------")

What is the maximum sustainable yield (MSY) usually higher than? 
 text: 
 title: Maximum sustainable yield
In population ecology and economics, maximum sustainable yield (MSY) is theoretically, the largest yield (or catch) that can be taken from a species' stock over an indefinite period. Fundamental to the notion of sustainable harvest, the concept of MSY aims to maintain the population size at the point of maximum growth rate by harvesting the individuals that would normally be added to the population, allowing the population to continue to be productive indefinitely. Under the assumption of logistic growth, resource limitation does not constrain individuals' reproductive rates when populations are small, but because there are few individuals, the overall yield is small. At intermediate population densities, also represented by half the carrying capacity, individuals are able to breed to their maximum rate. At this point, called the maximum sustainable yield, there is a surplus of i

In [40]:
df_texts.to_csv(f"output_gpt3.5_generate/{now_date}.csv")

In [20]:
pd.DataFrame(texts)["A"].isnull().sum()

69

In [41]:
df_science = get_df(files[0])
df_science[df_science["id"] == '29637793']

Unnamed: 0,id,title,text,categories


In [42]:
df_science

Unnamed: 0,id,title,text,categories
368626,1908395,Artificial brain,title: Artificial brain\nAn artificial brain (...,"[Computational neuroscience, Robotics, Emergin..."
34268,10410698,Abdul Amir al-Jamri,title: Abdul Amir al-Jamri\nSheikh Abdul Amir ...,"[1938 births, 2006 deaths, Deaths from kidney ..."
233077,1958222,Amyloid beta,title: Amyloid beta\nAmyloid beta (Aβ or Abeta...,"[Peptides, Molecular neuroscience, Alzheimer's..."
9870,3621668,A Woman of the Iron People,title: A Woman of the Iron People\nA Woman of ...,"[1991 American novels, 1991 science fiction no..."
139719,38366604,Albert Spaier,title: Albert Spaier\nAlbert Spaier (9 July 18...,"[1883 births, 1934 deaths, Writers from Iași, ..."
...,...,...,...,...
62685,4474244,Actuarial reserves,"title: Actuarial reserves\nIn insurance, an ac...","[Actuarial science, Capital requirement de:Dec..."
357456,4260564,Arrival II,title: Arrival II\nArrival ll (alternatively t...,"[1998 films, 1998 science fiction films, Ameri..."
391514,32894329,Astrobiophysics,title: Astrobiophysics\nAstrobiophysics is a f...,"[Astrophysics, Biophysics .]"
10000,2824171,A World of Difference (novel),title: A World of Difference (novel)\nA World ...,"[1990 American novels, Novels set during the C..."
