# RAG PIPELINE FOR IEP

In [3]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "E:\Anaconda\Lib\site-packages\pip\__main__.py", line 24, in <module>
    sys.exit(_main())
             ^^^^^^^
  File "E:\Anaconda\Lib\site-packages\pip\_internal\cli\main.py", line 65, in main
    cmd_name, cmd_args = parse_command(args)
                         ^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\site-packages\pip\_internal\cli\main_parser.py", line 79, in parse_command
    general_options, args_else = parser.parse_args(args)
                                 ^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\optparse.py", line 1371, in parse_args
    values = self.get_default_values()
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "E:\Anaconda\Lib\site-packages\pip\_internal\cli\parser.py", line 279, in get_default_values
    self.config.load()
  File "E:\Anaconda\Lib\site-packages\pip\_internal\configuration.py", line 124, in load
 

In [4]:
import os
import aiohttp
import asyncio
import argparse
import pdfplumber
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor

from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

## 1. Data Collection and Preprocessing 
Collect and preprocess relevant documents including:
Occupational Outlook Handbook from the Bureau of Labor Statistics (https://www.bls.gov/ooh/)
State educational standards for employment skills (e.g., 21st Century Skills)
Sample IEP goals and transition planning documentation
Implement effective text extraction and chunking strategies
Create and store appropriate embeddings for efficient retrieval

In [5]:
import subprocess

# Run the script without forcing rebuild
subprocess.run(["python", "load_and_embed.py"])

CompletedProcess(args=['python', 'load_and_embed.py'], returncode=1)

## 2. RAG Pipeline Implementation and Prompt Engineering
Develop a retrieval component that can effectively find relevant information from:
* Career requirements and standards from the Occupational Outlook Handbook
* Educational standards that align with a student's career interests
* Best practices for IEP goal development <br>

Implement a generation component that:
* Creates well-formed, measurable postsecondary goals
* Develops aligned short-term objectives/benchmarks
* Ensures compliance with IDEA 2004 requirements for transition planning

Create effective prompts that:
* Extract relevant information from student profiles
* Guide the model to generate appropriately structured goals
* Ensure alignment between postsecondary goals, industry standards, and educational standards
* Include appropriate instructions for handling missing information or ambiguous cases

---
### Building an effective prompt

To build an effective prompt, we start with a clear role assigment to help set the model's tone and perspective. This helps position the LLM as a domain specific expert in special education. Next, we include contextual inputs for grounding.  The student prile ensures the output is personalized to that student and the retrieved context adds external grounding retrieved from the RAG pipeline to anchor themodel's responses in the occupational Outlook Handbook and IDEA Compliance.  Then we layer actionalble insturctions - directing the LLM step by step and also andle ambigouous situations directing it to explain any assumptions. Most importantly, we give it a consistent output format.  This ensures that consistent formatting and alignment to standards in IEP documentation.  

In [6]:
def build_prompt(student_profile, retrieved_context):
    return f"""
You are a special education expert creating IEP transition goals for high school students with disabilities.

Student Profile:
{student_profile}

Reference Materials:
{retrieved_context}

Instructions:
- Create a clear, measurable postsecondary goal based on the student’s career interest.
- List 2–3 short-term objectives aligned with the goal.
- Suggest specific transition services to support those objectives.
- Include a final note on compliance with IDEA 2004.
- If needed, explain any assumptions you made.

Respond in this format:

Measurable Postsecondary Goals:
[Write two detailed goals here that explain what this student should focus on. One goal should relate to employment. One goal should relate to education and training.]

Short-Term Objectives:
1.
2.
3.

Transition Services:
1.
2.

Measurable annual goal aligned with standards
[Relate the postsecondary goals to soft skills (i.e. listening) that the student should focus on in order to achieve success in their desired career path while aligning it with standards.]

Compliance Note:
[Explain any assumptions or IDEA alignment.]

Response:
"""


In [7]:
# Importing relevent chunks
from retriever import retrieve_relevant_chunks

In [11]:
# Specifying hugging face model used and hugging face token
from huggingface_hub import InferenceClient

HF_TOKEN = "insert your token"

client = InferenceClient(model="HuggingFaceH4/zephyr-7b-beta",
                        token = HF_TOKEN)

def generate_iep_plan(prompt):
    return client.text_generation(prompt, max_new_tokens=700)

## Example Testing

In [12]:
# Example 1 
student_profile = """
Clarence

Student Information:
15-year-old sophomore with a behavior disorder
Completed the O*Net Interest Profiler assessment
Shows strong interest in the "Enterprising" category
Career interests: retail sales, driver/sales worker
Prefers hands-on learning over academic instruction
Assessment Results:

O*Net Interest Profiler indicates strength in Enterprising activities
Career suggestions include retail salesperson or driver/sales worker
Student interview ("Vision for the Future") indicates interest in working at Walmart

"""

In [13]:
# Step 1: Retrieve contextual information from vector DB
context = retrieve_relevant_chunks(student_profile)

# Step 2: Build the prompt with student info + context
prompt = build_prompt(student_profile, context)

# Step 3: Generate IEP goal plan
response = generate_iep_plan(prompt)

In [14]:
print(response)


Measurable Postsecondary Goals:
1. By the end of senior year, Clarence will demonstrate the skills necessary to secure and maintain employment as a retail salesperson at Walmart by participating in a job shadowing program, completing a work-based learning experience, and practicing customer service skills during school-based simulations.
2. By the end of senior year, Clarence will complete a high school diploma or equivalent and enroll in a postsecondary education or training program related to retail sales or customer service.

Short-Term Objectives:
1. By the end of junior year, Clarence will identify at least three local Walmart stores and contact the managers to inquire about job shadowing opportunities.
2. By the end of senior year, Clarence will complete a minimum of 40 hours of work-based learning at a Walmart store, documenting his experiences in a journal and reflecting on what he learned.
3. By the end of senior year, Clarence will participate in at least five school-based s

In [15]:
# Example 2 
student_profile = """
Mallory

Student Information:
17-year-old sophomore with ADHD
Completed the O*Net Interest Profiler assessment
Shows strong interest in the "Conventional" category
Career interests: accounting, scientific research
Gets caught up in details and needs specific instructions, doesn't always work well with others
Assessment Results:

O*Net Interest Profiler indicates strength in Conventional activities
Career suggestions include accounting and scientific research
Student interview ("Vision for the Future") indicates interest in shadowing at a university science department

"""

In [16]:
# Step 1: Retrieve contextual information from vector DB
context = retrieve_relevant_chunks(student_profile)

# Step 2: Build the prompt with student info + context
prompt = build_prompt(student_profile, context)

# Step 3: Generate IEP goal plan
response = generate_iep_plan(prompt)

In [17]:
print(response)


Measurable Postsecondary Goals:
1. By the end of senior year, Mallory will demonstrate the ability to accurately complete basic accounting tasks, such as balancing a checkbook and creating a simple budget, with 90% accuracy and in a timely manner.
2. By the end of senior year, Mallory will participate in a university science department shadowing program for at least 20 hours, demonstrating an understanding of basic scientific concepts and procedures, and will be able to communicate her findings to a small group of peers.

Short-Term Objectives:
1. By the end of junior year, Mallory will be able to accurately complete a basic accounting worksheet, with 80% accuracy and in a timely manner.
2. By the end of junior year, Mallory will be able to accurately label and identify basic scientific equipment, with 80% accuracy and in a timely manner.
3. By the end of junior year, Mallory will be able to accurately follow a basic scientific procedure, with 80% accuracy and in a timely manner.

Tra

## Interface

* Build a user-friendly interface that collects student information
* Display generated goals, objectives, and alignments
* Include explanations of how the goals connect to standards

In [18]:
## Run app
!streamlit run app.py

Traceback (most recent call last):
  File "E:\Anaconda\Scripts\streamlit-script.py", line 6, in <module>
    from streamlit.web.cli import main
  File "E:\Anaconda\Lib\site-packages\streamlit\web\cli.py", line 24, in <module>
    import click
  File "E:\Anaconda\Lib\site-packages\click\__init__.py", line 7, in <module>
    from .core import Argument as Argument
  File "E:\Anaconda\Lib\site-packages\click\core.py", line 16, in <module>
    from . import types
  File "E:\Anaconda\Lib\site-packages\click\types.py", line 9, in <module>
    from ._compat import _get_argv_encoding
  File "E:\Anaconda\Lib\site-packages\click\_compat.py", line 510, in <module>
    from ._winconsole import _get_windows_console_stream
  File "E:\Anaconda\Lib\site-packages\click\_winconsole.py", line 13, in <module>
    from ctypes import byref
  File "E:\Anaconda\Lib\ctypes\__init__.py", line 8, in <module>
    from _ctypes import Union, Structure, Array
ImportError: DLL load failed while importing _ctypes: The 