## 0.00 Setup

In [2]:
import torch

if torch.cuda.is_available():
    print(f"GPU is available: {torch.cuda.get_device_name(0)}")
else:
    print("GPU is NOT available.")


GPU is NOT available.


In [1]:
import torch
torch.cuda.empty_cache()


In [None]:
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
%pip install PyMuPDF
%pip install unstructured
%pip install "unstructured[pdf]"
%pip install faiss-cpu
%pip install langchain_experimental
%pip install huggingface_hub
%pip install sentence_transformers
%pip install transformers langchain accelerate sentencepiece
%pip install langchain-huggingface

### Step 0.01 Load libraries and API keys

In [5]:
%pip install langchain_experimental

Collecting langchain_experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.4-py3-none-any.whl (209 kB)
Installing collected packages: langchain_experimental
Successfully installed langchain_experimental-0.3.4
Note: you may need to restart the kernel to use updated packages.


In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from dotenv import load_dotenv

# Load API keys from .env file 
load_dotenv(override=True)

True

In [None]:
# Step 0.02: Check API KEY
import os
print(f"[API KEY]\n{os.environ['LANGSMITH_API_KEY']}")

### Combined Inital Working OpenAi

In [9]:
## Step 0: Load Environment Variables
import os
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker

# Load API keys from .env file 
load_dotenv(override=True)

document_path = "documents"

# Define the root folder that contains your PDFs
pdf_folder = document_path  # adjust to your folder path

# List to store all loaded documents
all_docs = []

# Walk through the folder and its subfolders
for root, dirs, files in os.walk(pdf_folder):
    for file in files:
        if file.lower().endswith(".pdf"):
            file_path = os.path.join(root, file)
            print(f"Loading file: {file_path}")
            loader = PyMuPDFLoader(file_path)
            docs = loader.load()  # load returns a list of Document objects (e.g., one per page)
            all_docs.extend(docs)

print(f"Loaded {len(all_docs)} documents from {pdf_folder}.")


chunk_size = 500
chunk_overlap = 50
number_of_chunks = 3

question = "What are important design principles to think about when adapting my science unit?"
model_name ="gpt-4o" 
temperature = 0

Loading file: documents\2025_NARST_conference_program_book.pdf
Loading file: documents\HowtoWriteNGSSLessonPlans.pdf
Loaded 223 documents from documents.


In [12]:
print(len(all_docs))  # should be > 0
print(docs[0].page_content[:300])  # preview first page


223
How to Write NGSS Lesson Plans 
• 
The Albert Team 
• 
Last Updated On: March 1, 2022 
The Next Generation Science Standards, or NGSS standards, are likely to start rolling out 
across the United States over the next few years. The first comprehensive overhaul in 
almost fifteen years, these standar


In [15]:
# Step 1: Load Documents
#loader = PyMuPDFLoader(document_path)
#docs = loader.load()

# Step 2: Split Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, separators=["\n\n", "\n", " ", ""])
split_documents = text_splitter.split_documents(all_docs)

# Step 3: Generate Embeddings
embeddings = OpenAIEmbeddings()

# Step 4: Create and Save the Database
# Create a vector store.
vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)

# Step 5: Create Retriever
# Search and retrieve information contained in the documents.
retriever = vectorstore.as_retriever()

# Step 6: Create Prompt
prompt = PromptTemplate.from_template(
    # """You are an assistant for question-answering tasks. 
# Use the following pieces of retrieved context to answer the question. 
# If you don't know the answer, just say that you don't know. 
    
    """You are a thinking partner for a teacher working to adapt their curricular material. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know.
    


#Context: 
{context}

#Question:
{question}

#Answer:"""
)

# Step 7: Load LLM
llm = ChatOpenAI(model_name=model_name, temperature=0, max_tokens=2000, streaming=True, verbose=True)

# Step 8: Create Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Step 9: Run Chain
# Input a query about the document and print the response.
question = question
response = chain.invoke(question)
print(response)


When adapting your science unit, consider the following important design principles based on the context provided:

1. **Phenomena-Based Learning**: Identify interesting phenomena that are relevant to students' daily lives. This approach aligns with the Next Generation Science Standards (NGSS) and helps students see how science and engineering are present in the world around them.

2. **Flexibility in Lesson Planning**: The NGSS provides detailed guidelines but allows for flexibility in lesson preparation. This means you can tailor the lessons to fit your state, school district, and specific classroom needs.

3. **Building on Prior Knowledge**: Consider what students already know about the topic and how instruction can build on that knowledge. Identify prior concepts that are necessary for understanding the core ideas of the unit.

4. **Level of Abstraction**: Determine the appropriate level of abstraction expected from students and ensure that the instruction is aligned with that leve

In [None]:
# Step 9: Run Chain
# Input a query about the document and print the response.
question = question
response = chain.invoke(question)
print(response)

Based on the retrieved context, here are some important design principles to consider when adapting your science unit:

1. **Promoting Critical Thinking**: Consider incorporating a newly designed instructional framework that emphasizes critical thinking. This can help students engage more deeply with the material and develop essential analytical skills.

2. **Addressing Instructional Shifts**: Be aware of the instructional shifts in science education and support sensemaking. This involves helping students make sense of scientific concepts and processes, which can be facilitated by using educative features in curricula like OpenSciEd.

3. **Incorporating Play and Joyful Methodologies**: Think about integrating play and joyful methodologies into science learning. This can make learning more engaging and enjoyable for students, potentially increasing their interest and retention.

4. **Enacting Care and Inclusivity**: Consider how you can enact care alongside students, teachers, and commu

## 1.00 Setup RAG pipeline

### 1.01 Load the documents

In [5]:
### Load PDF Document
loader = PyMuPDFLoader("documents/2025_NARST_conference_program_book.pdf")
docs = loader.load()
print(f"Number of pages in the document: {len(docs)}")

Number of pages in the document: 212


In [6]:
### Check document content
# Print the first page content
print(docs[0].page_content)

98th NARST International Conference | Digital Program
Chicago, Illinois, Hilton Downtown Chicago
Washington, DC
March 23 - 26, 2025


In [7]:
### Check metadata of loaded document
docs[3].__dict__

{'id': None,
 'metadata': {'producer': 'Adobe PDF Library 17.0',
  'creator': 'Adobe InDesign 20.2 (Macintosh)',
  'creationdate': '2025-03-14T13:22:13-04:00',
  'source': 'documents/2025_NARST_conference_program_book.pdf',
  'file_path': 'documents/2025_NARST_conference_program_book.pdf',
  'total_pages': 212,
  'format': 'PDF 1.6',
  'title': '',
  'author': '',
  'subject': '',
  'keywords': '',
  'moddate': '2025-03-17T10:04:33-06:00',
  'trapped': '',
  'modDate': "D:20250317100433-06'00'",
  'creationDate': "D:20250314132213-04'00'",
  'page': 3},
 'page_content': '98th NARST International Conference     March 23–26, 2025     4\n98th NARST International Conference\nGeneral Information\nGeneral Information\nInformation about NARST\nNARST is a global organization for improving science \nteaching and learning through research. Since its \ninception in 1928, NARST has promoted research in \nscience education and the communication of \nknowledge generated by the research. The ultimate

### 1.02 Document Chunking

In [None]:
## Define Chunking Size and Split Documents
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50, separators=["\n\n", "\n", " ", ""])
split_documents = text_splitter.split_documents(docs)
print(f"Number of split chunks: {len(split_documents)}")

Number of split chunks: 1099


### 1.03 Generate Chunk Embeddings 

In [9]:
# Generate Embeddings
embeddings = OpenAIEmbeddings()

### 1.04 Store Embedded Chunks in Vector Database

In [10]:
# Create and Save the Database
# Create a vector store.
vectorstore = FAISS.from_documents(documents=split_documents, embedding=embeddings)

In [11]:
searchTerm = "What is the conference theme?"
number_of_results = 3

In [12]:
for doc in vectorstore.similarity_search(searchTerm, k=number_of_results):
    # Print the document content
    print(doc.page_content)

from a place of trust and relationship-building? Given 
that NARST's ultimate goal is to help all learners 
achieve science literacy, how might we reimagine 
science literacy with social, environmental, and 
epistemological justice at its core? 
This conference theme invites us to share the ways 
that we can transgress canonical boundaries in 
science education and expand dialogues on strategies 
for disrupting structures that sustain inequities, and
2025 NARST Annual International Conference, Washington DC 
 
 
44 
 
Strand 14: Environmental Education and 
Sustainability 
Stand-Alone Paper 
Bridging Roles: Educators and High 
School Graduates’ Sense of Climate 
Change. 
Shaima Alokbe*, Ben-Gurion University of 
the Negev, Israel 
Areej Nbari*, Ben-Gurion University of the 
Negev, Israel 
Wisam Sedawi*, University of Michigan, 
USA 
Orit Ben Zvi Assaraf, Ben-Gurion 
University of the Negev, Israel 
 
 
 
Closing remarks
2025 NARST Annual International Conference, Washington DC 
 
 
58 

### 1.05 Define Retriever to perform similarity search

In [32]:
# Create Retriever
# Search and retrieve information contained in the documents.
retriever = vectorstore.as_retriever()

In [33]:
initial_query = "What is the conference theme?"

In [34]:
retriever.invoke(initial_query)

[Document(id='67d5f200-1da7-4d93-b9e7-d9e0486e5d42', metadata={'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.2 (Macintosh)', 'creationdate': '2025-03-14T13:22:13-04:00', 'source': 'documents/2025_NARST_conference_program_book.pdf', 'file_path': 'documents/2025_NARST_conference_program_book.pdf', 'total_pages': 212, 'format': 'PDF 1.6', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-03-17T10:04:33-06:00', 'trapped': '', 'modDate': "D:20250317100433-06'00'", 'creationDate': "D:20250314132213-04'00'", 'page': 33}, page_content="from a place of trust and relationship-building? Given \nthat NARST's ultimate goal is to help all learners \nachieve science literacy, how might we reimagine \nscience literacy with social, environmental, and \nepistemological justice at its core? \nThis conference theme invites us to share the ways \nthat we can transgress canonical boundaries in \nscience education and expand dialogues on strategies \nfor disrupt

### 1.06 Create prompt for performing RAG

In [35]:
# Create Prompt
prompt = PromptTemplate.from_template(
    """You are a thinking partner for a teacher working to adapt their curricular material. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 

#Context: 
{context}

#Question:
{question}

#Answer:"""
)

### 1.07 Define the LLM

In [36]:
# Setup LLM
llm = ChatOpenAI(model_name="gpt-4o", temperature=0)

### 1.08 Produce Output from Defined Chain

In [37]:
# Create Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

### 1.09 Run Chain

In [42]:
# Run Chain
# Input a query about the document and print the response.
question = "What are important design principles to think about when adapting my science unit?"
response = chain.invoke(question)
print(response)

I don't know. The provided context does not include specific information about design principles for adapting a science unit.


## 2.00 Combined RAG Code

In [7]:
## Step 0: Load Environment Variables
import os
from dotenv import load_dotenv
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_experimental.text_splitter import SemanticChunker

document_path = "documents"

# Define the root folder that contains your PDFs
pdf_folder = document_path  # adjust to your folder path

# List to store all loaded documents
all_docs = []

# Walk through the folder and its subfolders
for root, dirs, files in os.walk(pdf_folder):
    for file in files:
        if file.lower().endswith(".pdf"):
            file_path = os.path.join(root, file)
            print(f"Loading file: {file_path}")
            loader = PyMuPDFLoader(file_path)
            docs = loader.load()  # load returns a list of Document objects (e.g., one per page)
            all_docs.extend(docs)

print(f"Loaded {len(all_docs)} documents from {pdf_folder}.")


chunk_size = 500
chunk_overlap = 50
number_of_chunks = 3

question = "What are important design principles to think about when adapting my science unit?"
model_name ="gpt-4o" 
temperature = 0

Loading file: documents\2025_NARST_conference_program_book.pdf
Loaded 212 documents from documents.


In [8]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings

# Step 1: Load Documents

docs = docs

# Combine the page content into one text string.
full_text = "\n\n".join(doc.page_content for doc in docs)

# Step 2: Split Documents
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", " ", ""])
#split_documents = text_splitter.split_documents(docs)

# Step 2: Create a SemanticChunker using an OpenAI embedding model.
semantic_text_splitter = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-small"), add_start_index=True)

# Split the text into semantically coherent chunks
chunks = semantic_text_splitter.split_text(full_text)

# Wrap the chunks into Document objects
docs = [Document(page_content=chunk) for chunk in chunks]

# Step 3: Generate Embeddings
embeddings = HuggingFaceEmbeddings()     #(Default model: sentence-transformers/all-mpnet-base-v2)   #OpenAIEmbeddings()

# Step 4: Create and Save the Database
# Create a vector store.
vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings)

# Save the vector store locally for later use.
# You can specify the directory where you want to save the index.
vectorstore.save_local("faiss_index/my_curriculum_db")

# Step 5: Create Retriever
# Search and retrieve information contained in the documents.
retriever = vectorstore.as_retriever(search_type="similarity")
#retriever = vectorstore.as_retriever(search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5})
#retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5, "lambda_mult": 0.5} ) # λ controls the diversity-relevance balance Closer to 1.0 → more relevance Closer to 0.0 → more diversity

#search_result = retriever.invoke(question)
#print(search_result)
# Load stored FAISS index
#vectorstore = FAISS.load_local("faiss_index/my_curriculum_db", embedding_model)

# Step 6: Create Prompt
prompt = PromptTemplate.from_template(
    """You are a thinking partner for a teacher working to adapt their curricular material. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know.  

#Context: 
{context}

#Question:
{question}

#Answer:"""
)

# Step 7: Load LLM
#llm = ChatOpenAI(model_name=model_name, temperature=temperature)
# Load the model and tokenizer
hf_pipeline = pipeline(
    model="tiiuae/falcon-7b-instruct",       # You can change this to any other model
    task="text-generation",       # For models like FLAN-T5
    max_length=512,
    temperature=0.1,
    top_p=0.9,
    do_sample=True
)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# Step 8: Create Chain
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


OutOfMemoryError: CUDA out of memory. Tried to allocate 80.00 MiB. GPU 0 has a total capacity of 24.00 GiB of which 0 bytes is free. Of the allocated memory 37.91 GiB is allocated by PyTorch, and 386.92 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [4]:
# Run Chain
# Input a query about the document and print the response.
question = question
response = chain.invoke(question)
print(response)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


ValueError: Input length of input_ids is 512, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [None]:
# output the results.

print(f"Number of documents: {len(docs)}")
print("===" * 20)
print(f"[HUMAN]\n{question}\n")
print(f"[AI]\n{response}")



Number of documents: 36
[HUMAN]
What are important design principles to think about when adapting my science unit?

[AI]
A teacher needs to consider how to incorporate a science unit into their unit.


In [5]:
from langchain.chains import RetrievalQA

rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,  # your retriever from vectorstore
    return_source_documents=True
)

# Ask a question
query = "How can I adapt a science lesson for multilingual learners?"
result = rag_chain(query)

# Show results
print("Answer:\n", result["result"])
print("\nSources:\n", [doc.metadata for doc in result["source_documents"]])


  result = rag_chain(query)
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


ValueError: Input length of input_ids is 512, but `max_length` is set to 512. This can lead to unexpected behavior. You should consider increasing `max_length` or, better yet, setting `max_new_tokens`.

In [None]:
query = "emergent bilinguals?"
docs = vectorstore.similarity_search(query, k=5)

for i, doc in enumerate(docs):
    print(f"\n--- Result {i+1} ---\n{doc.page_content}")


### Loading different document types

##  PDF File

In [1]:
# load all pdf files in the directory
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(".", glob="documents/*.pdf")
docs = loader.load()

print(f"page_content: {len(docs)}\n")
print("[metadata]\n")
print(docs[0].metadata)
print("\n========= [Preview] Front Section =========\n")
print(docs[0].page_content[2500:3000])

page_content: 1

[metadata]

{'source': 'documents\\2025_NARST_conference_program_book.pdf'}


s to help all learners achieve science literacy. NARST promotes this goal by: 1) encouraging and supporting the application of diverse research methods and theoretical perspectives from multiple disciplines to the investigation of teaching and learning in science; 2) communicating science education research findings to researchers, practitioners, and policy makers; and 3) cooperating with other educational and scientific societies to influence educational policies.

Member Benefits

Ten issues p


In [24]:
from langchain_community.document_loaders import PyMuPDFLoader

# Load PDF file. Enter the file path.
loader = PyMuPDFLoader("documents/2025_NARST_conference_program_book.pdf")



docs = loader.load()
print(f"Number of documents: {len(docs)}")

# Output the content of the 10th page.
print(f"\n[page_content]\n{docs[9].page_content[:500]}")
print(f"\n[metadata]\n{docs[9].metadata}\n")

Number of documents: 212

[page_content]
98th NARST International Conference     March 23–26, 2025     10
NARST Leadership Team
International Coordinator
Mercy Ogunsola-Bandele (2025)
National Open University of Nigeria 
Graduate Student Coordinator
Jennifer Bateman (2025)
Clemson University
NARST Liaison to NSTA
Carla Zembal-Saul (2027)	
Penn State University
JRST Editors 
Felicia Mensah (2025)	

Teachers College, Columbia University
Troy Sadler (2025)	 
UNC Chapel Hill 
Matthew Kloser (2030)	

University of Notre Dame
Edna Tan (2030)

[metadata]
{'producer': 'Adobe PDF Library 17.0', 'creator': 'Adobe InDesign 20.2 (Macintosh)', 'creationdate': '2025-03-14T13:22:13-04:00', 'source': 'documents/2025_NARST_conference_program_book.pdf', 'file_path': 'documents/2025_NARST_conference_program_book.pdf', 'total_pages': 212, 'format': 'PDF 1.6', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-03-17T10:04:33-06:00', 'trapped': '', 'modDate': "D:20250317100433-06'00'

## Web Article

In [None]:
# Load the contents of a web article, split it into chunks, and index it.
loader = WebBaseLoader(
    web_paths=("https://www.bbc.com/news/business-68092814",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            "main",
            attrs={"id": ["main-content"]},
        )
    ),
)
docs = loader.load()
print(f"Number of documents: {len(docs)}")
docs[0].page_content[:500]

## CSV File

In [None]:
from langchain_community.document_loaders.csv_loader import CSVLoader

# Load CSV file
loader = CSVLoader(file_path="data/titanic.csv")
docs = loader.load()
print(f"Number of documents: {len(docs)}")

# Output the content of the 10th row.
print(f"\n[row_content]\n{docs[9].page_content[:500]}")
print(f"\n[metadata]\n{docs[9].metadata}\n")

## All Text Files in Folder Loader

In [None]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(".", glob="data/*.txt", show_progress=True)
docs = loader.load()

print(f"Number of documents: {len(docs)}")

# Output the content of the 10th page.
print(f"\n[page_content]\n{docs[0].page_content[:500]}")
print(f"\n[metadata]\n{docs[0].metadata}\n")
print(f"\n[metadata]\n{docs[1].metadata}\n")

### Split text based on semantic similarity

In [7]:
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader

# Create a SemanticChunker.
semantic_text_splitter = SemanticChunker(OpenAIEmbeddings(model="text-embedding-3-small"), add_start_index=True)

# Load PDF file. Enter the file path.
loader = PyMuPDFLoader("documents/2025_NARST_conference_program_book.pdf")
docs = loader.load()

# Combine the page content into one text string.
text = "\n\n".join(doc.page_content for doc in docs)

for sent in semantic_text_splitter.split_text(text):
    print(sent)
    print("===" * 20)

98th NARST International Conference | Digital Program
Chicago, Illinois, Hilton Downtown Chicago
Washington, DC
March 23 - 26, 2025

THANK YOU TO OUR EXHIBITORS 
THANK YOU TO OUR SPONSOR

98th NARST International Conference     March 23–26, 2025     3
98th NARST International Conference
Table of Contents
2	
Sponsors 
3	
Table of Contents 
4	
NARST General Information
5	
Programs and Events Code of Conduct Policy
6	
Code of Ethical Conduct
8	
Research Interest Groups (RIGs) Information
10	 NARST Leadership Team
11	 Strand Key
11	 Strand Coordinators
12	 Program Proposal Reviewers
17	 NARST Presidents and Executive Directors
18 	 JRST Editors and NARST Emeritus Members
19	 NARST Award Recipients
19 	Distinguished Contributions to Science Education 
through Research
20	Outstanding Doctoral Research Award
21	 Early Career Research Award
21	 NARST Fellows Award
21	 Excellence in Mentoring Award
22 JRST Award
23	NARST Outstanding Paper Award
24	Outstanding Master’s Thesis Award
24	Classroom 

In [None]:
# Load a portion of the "Chain of Density" paper.
with open("data/chain-of-density.txt", "r", encoding="utf-8") as f:
    text = f.read()

for sent in semantic_text_splitter.split_text(text):
    print(sent)
    print("===" * 20)