In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
import os
from pypdf import PdfReader

# --- CONFIGURATION AREA ---
PDF_FILEPATHS = [
    #File path to PDF for Objective 3 of Lab 11
    "C:\\Users\\kaila\\Downloads\\Engineering_Strategic-Plan_2018.pdf"
]
# ------------------------------------------

# --- Output Configuration ---
OUTPUT_TEXT_FILE = "context-objective3.txt"
PDF_SEPARATOR = "\n\n--- DOCUMENT END ---\n\n"

# --- Functions ---
def extract_text_from_pdf(filepath: str) -> str:
    """Extracts text content from a PDF file given its local filepath."""
    with open(filepath, 'rb') as file:
        pdf_reader = PdfReader(file)
        text = ""
        for page in pdf_reader.pages:
            text += page.extract_text() or "" 
        return text

# --- Execution ---
print("Starting PDF conversion...")

# Filter out empty paths from the list
valid_paths = [p.strip() for p in PDF_FILEPATHS if p.strip()]

if not valid_paths:
    print("No valid file paths entered in the PDF_FILEPATHS list.")
else:
    all_text = []
    
    for i, filepath in enumerate(valid_paths):
        filename = os.path.basename(filepath)
        print(f"[{i + 1}/{len(valid_paths)}] Processing: {filename}...")
        
        if not os.path.exists(filepath):
            print(f"⚠️ Error: File not found at path: {filepath}")
            continue

        try:
            text = extract_text_from_pdf(filepath)
            all_text.append(text)
            
        except Exception as e:
            print(f"Critical Error processing {filename}: {e}")
    
    if all_text:
        final_corpus = PDF_SEPARATOR.join(all_text)
        
        with open(OUTPUT_TEXT_FILE, 'w', encoding='utf-8') as f:
            f.write(final_corpus)
            
        print(f"\nConversion complete!")
        print(f"Text from {len(all_text)} PDF(s) saved to **{OUTPUT_TEXT_FILE}**.")
    else:
        print("\nFailed to extract text from any provided PDF.")

Starting PDF conversion...
[1/1] Processing: Engineering_Strategic-Plan_2018.pdf...

Conversion complete!
Text from 1 PDF(s) saved to **context-objective3.txt**.


In [4]:
from langchain_openai.chat_models import ChatOpenAI

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-4o-mini")

In [5]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

chain = model | parser

In [16]:
from langchain.prompts import ChatPromptTemplate

template = """
Answer the question based on the context below. If you can't 
answer the question, tell us why.

Context: {context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

In [17]:
from langchain_community.document_loaders import TextLoader

# Fix: Specify the encoding as 'utf-8'
loader = TextLoader("context-objective3.txt", encoding="utf-8")

text_documents = loader.load()
text_documents

[Document(metadata={'source': 'context-objective3.txt'}, page_content="College of Engineering Strategic Plan May 16, 2018 \n1 \nVision \n2018 Strategic Plan\n \n \nThe Ohio State University  \nMission/Vision Statement \n \nVISION \nThe Ohio State University is the model 21st-century public, land grant, research, urban, community engaged \ninstitution. \n \nMISSION \nThe University is dedicated to: \n• Creating and discovering knowledge to improve the well-being of our state, regional, national and global \ncommunities; \n• Educating students through a comprehensive array of distinguished academic programs; \n• Preparing a diverse student body to be leaders and engaged citizens; \n• Fostering a culture of engagement and service. \nWe understand that diversity and inclusion are essential components of our excellence. \n \nVALUES \nShared values are the commitments made by the University community in how we conduct our work. At \nThe Ohio State University we value: \n• Excellence \n• Dive

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
documents = text_splitter.split_documents(text_documents)

print(f"Split blog post into {len(documents)} sub-documents.")

Split blog post into 71 sub-documents.


In [19]:
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectorstore1 = DocArrayInMemorySearch.from_documents(documents, embeddings)


chain = (
    {"context": vectorstore1.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)
#chain.invoke("When are post lab assignments due?")

In [20]:
import os
from dotenv import load_dotenv
load_dotenv()

from langchain_pinecone import PineconeVectorStore

index_name = "5234-rag-index"

pinecone = PineconeVectorStore.from_documents(
    documents, embeddings, index_name=index_name
)

In [22]:
pinecone.similarity_search("What is sustainable about the Engineering Strategic Plan 2018?")[:3]

[Document(metadata={'source': 'context-objective3.txt'}, page_content='information technology, communications, transportation, supply chains, and many other \nengineering and architecture-related activities. \nThe following additional factors are high-level drivers that have shaped the current strategic \nplan.  \nMeeting the demand for engineers. Sustainable growth and increased standards of living for \nour global society requires high quality, work place- and market place-ready engineering and \narchitecture talent be produced in significant quantity. As a result, the national and global \ndemand for engineering and architecture talent continues to increase. At the same time, the \ncollege-age population in the U.S. is decreasing. The higher education market place is becoming \nincreasingly competitive and engineering programs must deepen the quality of the academic \nprogramming, faculty, facilities and infrastructure and open access to new pools of incoming \nstudents.  \nFaculty 

In [23]:
chain = (
    {"context": pinecone.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)

chain.invoke("According to the 'Engineering_Strategic_Plan_2018.pdf', what specific measures, metrics, or key performance indicators (KPIs) did the College of Engineering plan to use to assess the success and effectiveness of the Master of Global Engineering Leadership (MGEL) degree program? List them clearly.")

'The provided context does not specify any particular measures, metrics, or key performance indicators (KPIs) that the College of Engineering planned to use to assess the success and effectiveness of the Master of Global Engineering Leadership (MGEL) degree program. The content focuses on initiatives for promoting enrollment growth and establishing advisory groups but lacks detailed information about specific assessment criteria or metrics. Therefore, I cannot answer the question based on the given context.'