In [1]:
def read_text_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except FileNotFoundError:
        print("The file does not exist at the specified path.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Example usage
file_contents = read_text_file("unit1/chapter1/1-1-the-science-of-biology.txt")
print(file_contents)


Learning Objectives
By the end of this section, you will be able to do the following:

Identify the shared characteristics of the natural sciences
Summarize the steps of the scientific method
Compare inductive reasoning with deductive reasoning
Describe the goals of basic science and applied science
Photo A depicts round colonies of blue-green algae. Each algae cell is about 5 microns across. Photo B depicts round fossil structures called stromatalites along a watery shoreline.
Figure 1.2 Formerly called blue-green algae, these (a) cyanobacteria, magnified 300x under a light microscope, are some of Earth’s oldest life forms. These (b) stromatolites along the shores of Lake Thetis in Western Australia are ancient structures formed by layering cyanobacteria in shallow waters. (credit a: modification of work by NASA; credit b: modification of work by Ruth Ellison; scale-bar data from Matt Russell)
What is biology? In simple terms, biology is the study of life. This is a very broad definit

In [3]:
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()

client = OpenAI()

response = client.chat.completions.create(
  model="gpt-4-turbo-preview",
  response_format={ "type": "json_object" },
  messages=[
    {"role": "system", "content": "Your objective is to generate question and answer pairs from the content of a textbook provided. Output JSON."},
    {"role": "user", "content": file_contents}
  ]
)
print(response.choices[0].message.content)

{
  "questions": [
    {
      "question": "What is the broad definition of biology?",
      "answer": "Biology is the study of life."
    },
    {
      "question": "What are the shared characteristics of the natural sciences?",
      "answer": "Natural sciences relate to the physical world and its phenomena and processes."
    },
    {
      "question": "What does the scientific method entail?",
      "answer": "The scientific method is a method of research with defined steps that include experiments and careful observation."
    },
    {
      "question": "How do inductive reasoning and deductive reasoning differ?",
      "answer": "Inductive reasoning uses related observations to arrive at a general conclusion, whereas deductive reasoning uses a general principle to predict specific results."
    },
    {
      "question": "What distinguishes basic science from applied science?",
      "answer": "Basic science seeks to expand knowledge regardless of the short-term application, whil

In [6]:
# output content into sample qa file 2024-04-16
import json

# JSON string
json_output = response.choices[0].message.content #str

# Convert string to Python dictionary (optional, if you need to manipulate the data)
data = json.loads(json_output)

# Write JSON data to a file
with open('unit1/chapter1/qa-generation-u1-ch1.1-2024-04-16-output.json', 'w') as file:
    json.dump(data, file, indent=4)

In [7]:
# now with 1.2
file_contents = read_text_file("unit1/chapter1/1-2-themes-and-concepts-of-biology.txt")

response = client.chat.completions.create(
  model="gpt-4-turbo-preview",
  response_format={ "type": "json_object" },
  messages=[
    {"role": "system", "content": "Your objective is to generate question and answer pairs from the content of a textbook provided. Output JSON."},
    {"role": "user", "content": file_contents}
  ]
)

json_output = response.choices[0].message.content #str

# Convert string to Python dictionary (optional, if you need to manipulate the data)
data = json.loads(json_output)

# Write JSON data to a file
with open('unit1/chapter1/qa-generation-u1-ch1.2-2024-04-16-output.json', 'w') as file:
    json.dump(data, file, indent=4)

In [28]:
import json
from datetime import date

def generate_qa_pairs(file_name, prompt, mc=False):
    # Read the content of the file
    file_contents = read_text_file(file_name)

    # Create a message with the prompt and call the completion method of the client
    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        response_format={"type": "json_object"},
        messages=[
            {"role": "system", "content": prompt},
            {"role": "user", "content": file_contents}
        ]
    )

    # Extract JSON output from the response
    json_output = response.choices[0].message.content  # Assume it's already a JSON string

    # Convert string to Python dictionary (optional, if you need to manipulate the data)
    data = json.loads(json_output)

    # Generate a filename for the output JSON file
    # Splitting the directory and filename, and replacing '/' in file_name for compatibility
    directory = "/".join(file_name.split("/")[:2])
    base_filename = file_name.split("/")[-1].replace(".txt", "")
    qa_type = "mcqa" if mc else "qa"
    output_filename = f"{directory}/{qa_type}-generation-output-{base_filename}-{date.today()}.json"

    # Write JSON data to a file
    with open(output_filename, 'w') as file:
        json.dump(data, file, indent=4)



In [35]:
# chapters
path_1_1 = "unit1/chapter1/1-1-the-science-of-biology.txt"
path_1_2 = "unit1/chapter1/1-2-themes-and-concepts-of-biology.txt"

# generate qa pairs
prompt_qa_gen = "Your objective is to generate question and answer pairs from the content of a textbook provided. Output JSON."
prompt_mc_qa_gen = """
Your objective is to generate ten multiple choice question and answer pairs from the content of a textbook provided.
Each multiple choice question should instruct user to pick a, b, c or d. Answer is one of those choices.
e.g. {
    "question": "What is the capitol of Colombia?",
    "options": ["Bogota", "Paris", "Lisbon", "Lima"],
    "answer": "Bogota"
    }
Make sure to generate ten (10) question and answer pairs.
Output JSON.
"""

# 1.1 qa
# generate_qa_pairs(path_1_1, prompt_qa_gen)

# 1.1 mc
generate_qa_pairs(path_1_1, prompt_mc_qa_gen, mc=True)

# 1.2 qa
# generate_qa_pairs(path_1_2, prompt_qa_gen)

# 1.2 mc
generate_qa_pairs(path_1_2, prompt_mc_qa_gen, mc=True)
