In [1]:
# Import necessary libraries
import os
import json
import PyPDF2
from typing import List, Dict
import pandas as pd
from dotenv import load_dotenv

# LlamaIndex imports
from llama_index.core import Document, Settings, VectorStoreIndex, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI

# Load environment variables from .env file
load_dotenv()

# Verify OpenAI API key is set
openai_api_key = os.getenv("OPENAI_API_KEY")
if not openai_api_key:
    raise ValueError("Please set the OPENAI_API_KEY environment variable")
    
print("Environment setup complete!")

Environment setup complete!


In [3]:
# Set up LlamaIndex settings with OpenAI LLM and Embeddings
def initialize_settings():
    # Configure the embedding model (text-embedding-3-small)
    embed_model = OpenAIEmbedding(
        model="text-embedding-3-small",
        api_key=os.getenv("OPENAI_API_KEY"),
        dimensions=1536
    )
    
    # Configure the LLM (gpt-4o-mini)
    llm = OpenAI(
        model="gpt-4o-mini",
        temperature=0.1,
        api_key=os.getenv("OPENAI_API_KEY")
    )
    
    # Set up LlamaIndex settings using the new approach
    Settings.embed_model = embed_model
    Settings.llm = llm
    Settings.chunk_size = 1000
    Settings.chunk_overlap = 200
    
    # Don't return a ServiceContext object
    return Settings

# Initialize settings
settings = initialize_settings()

# Test the LLM
llm = Settings.llm
response = llm.complete("Hello, can you tell me what model you are?")
print("LLM Test Response:", response)

LLM Test Response: I am based on OpenAI's GPT-3 model. How can I assist you today?


In [4]:
# Step 6: Loading and Processing PDF Documents
def extract_text_from_pdf(pdf_path):
    """
    Extract text from a PDF file using PyPDF2
    """
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page_num in range(len(reader.pages)):
                text += reader.pages[page_num].extract_text() + "\n"
            return text
    except Exception as e:
        print(f"Error extracting text from PDF: {e}")
        return None

def load_documents_from_pdfs(pdf_paths):
    """
    Load and process PDFs into LlamaIndex Document objects
    """
    documents = {}
    for doc_type, pdf_path in pdf_paths.items():
        try:
            print(f"Loading {doc_type} from {pdf_path}...")
            text = extract_text_from_pdf(pdf_path)
            if text:
                # Create a LlamaIndex Document
                documents[doc_type] = Document(text=text, metadata={"source": pdf_path, "type": doc_type})
                
                # Save extracted text to a file for verification
                with open(f"extracted_{doc_type}.txt", "w") as f:
                    f.write(text)
                print(f"Saved extracted text to 'extracted_{doc_type}.txt'")
            else:
                print(f"Failed to extract text from {pdf_path}")
        except Exception as e:
            print(f"Error processing {pdf_path}: {e}")
    
    return documents

# Define PDF paths
pdf_paths = {
    "requirements": "Requirements_Specification.pdf",
    "task_estimates": "SampleProjectTasksEstimates.pdf"
}

# Load documents
documents = load_documents_from_pdfs(pdf_paths)

# Print extracted documents statistics
for doc_type, doc in documents.items():
    print(f"\n{doc_type.capitalize()} document:")
    print(f"Length: {len(doc.text)} characters")
    print(f"Preview: {doc.text[:300]}...")

# Store full text separately for direct use
document_store = {
    f"{doc_type}_full": doc.text for doc_type, doc in documents.items()
}

Loading requirements from Requirements_Specification.pdf...
Saved extracted text to 'extracted_requirements.txt'
Loading task_estimates from SampleProjectTasksEstimates.pdf...
Saved extracted text to 'extracted_task_estimates.txt'

Requirements document:
Length: 4994 characters
Preview:  
Chicago WideCast  
Smart -Home Services  
 
 
 
 
 
Author: Atef Bader, PhD  
Last Edit: 7/5/2024  
Image/Model: dall-e-3 
 
 
Project Overview Statement:  
 
Chicago WideCast Smart -Home Services  is a startup company that is 
interested in automating all of its business process workflows utilizi...

Task_estimates document:
Length: 2305 characters
Preview:  
Task  Amount of Work  Productivity  Rate  
Project Plan      
Write Plan  56 pages  5 page s/Hour  
Review Plan      
Preparation for review    4 pages/Hour  
Review Meeting   8 pages/Hour  
Rework  39 defects  5 defects/Hour  
   
Risk Mitigation and Contingency Plan      
Write Plan  78 pages  5...


In [7]:
# Create vector indices for each document type
def create_vector_indices(documents):
    indices = {}
    for doc_type, doc in documents.items():
        print(f"Creating vector index for {doc_type}...")
        # Parse the document into nodes
        parser = SimpleNodeParser.from_defaults()
        nodes = parser.get_nodes_from_documents([doc])
        
        # Create a vector index
        # Uses the global Settings instead of passing service_context
        index = VectorStoreIndex(nodes)
        indices[doc_type] = index
        
        # Save the index for future use
        index.storage_context.persist(f"{doc_type}_index")
        
    return indices

# Create vector indices
indices = create_vector_indices(documents)

# Create query engines for each index
query_engines = {
    doc_type: index.as_query_engine() 
    for doc_type, index in indices.items()
}

# Test the query engines
test_queries = {
    "requirements": "What TV plans does WideCast offer?",
    "task_estimates": "What are the productivity rates for writing plans?"
}

for doc_type, query in test_queries.items():
    print(f"\n{doc_type.capitalize()} query: '{query}'")
    response = query_engines[doc_type].query(query)
    print(f"Response: {response}")

Creating vector index for requirements...
Creating vector index for task_estimates...

Requirements query: 'What TV plans does WideCast offer?'
Response: WideCast offers the following TV plans:

1. Basic - 50 channels
2. BasicPlus – 100 channels
3. Ultimate - 200 channels

Task_estimates query: 'What are the productivity rates for writing plans?'
Response: The productivity rates for writing plans are as follows:

- Project Plan: 5 pages per hour
- Risk Mitigation and Contingency Plan: 5 pages per hour
- Analysis Document: 5 pages per hour
- Design Document (DD): 4 pages per hour
- Test Plan (TP): 6 pages per day


In [8]:
# Define role-specific prompts
role_prompts = {
    "project_manager": """You are an experienced Project Manager. Your task is to create a detailed project plan 
    for the Chicago WideCast Smart-Home Services system based on the requirements provided. 
    Tag and number each requirement/use-case with unique identifiers. 
    Use the SampleProjectTasksEstimates.pdf format when creating tasks and estimates.""",
    
    "requirements_engineer": """You are a skilled Requirements Engineer. Your task is to analyze the provided 
    requirements for the Chicago WideCast Smart-Home Services system and create detailed, tagged, and numbered 
    requirements and use cases. Follow a standard format like REQ-001, REQ-002, etc., for requirements 
    and UC-001, UC-002, etc., for use cases.""",
    
    "system_engineer": """You are an experienced System Engineer. Review the requirements for the Chicago WideCast 
    Smart-Home Services system and create system architecture tasks with time estimates. 
    Consider integration points, system components, and technical constraints.""",
    
    "software_engineer": """You are a Software Engineer responsible for implementing the Chicago WideCast 
    Smart-Home Services system. Create coding tasks and estimates based on the requirements. 
    Consider frontend, backend, database, and API development tasks.""",
    
    "test_engineer": """You are a Test Engineer responsible for ensuring the quality of the Chicago WideCast 
    Smart-Home Services system. Create testing tasks and estimates covering unit tests, integration tests, 
    system tests, and user acceptance tests.""",
    
    "documentation_engineer": """You are a Documentation Engineer responsible for creating all documentation 
    for the Chicago WideCast Smart-Home Services system. Create documentation tasks and estimates covering 
    user manuals, system documentation, API documentation, and training materials."""
}

# Define the roles
roles = ["project_manager", "requirements_engineer", "system_engineer", 
         "software_engineer", "test_engineer", "documentation_engineer"]

In [None]:
# Function to generate tagged requirements using the Requirements Engineer prompt
def generate_tagged_requirements():
    # Get the full requirements document
    full_requirements = document_store["requirements_full"]
    
    # Get the requirements engineer system prompt
    system_message = role_prompts["requirements_engineer"]
    
    # Create a structured prompt
    prompt = f"""
    {system_message}
    
    Based on the following Chicago WideCast Smart-Home Services requirements document, 
    create a comprehensive list of tagged and numbered requirements and use cases. 
    Format each requirement as REQ-XXX with a descriptive title and detailed description,
    and each use case as UC-XXX with a descriptive title and detailed description.
    
    Requirements Document:
    {full_requirements}
    
    Focus on extracting the actual requirements from the document rather than inventing new ones.
    Ensure each requirement is specific, measurable, achievable, relevant, and time-bound (SMART).
    """
    
    # Query using LlamaIndex
    llm = Settings.llm
    response = llm.complete(prompt)
    return response

# Generate tagged requirements
print("Generating tagged requirements...")
tagged_requirements = generate_tagged_requirements()

print("\nTagged Requirements Preview:")
print(tagged_requirements)



Generating tagged requirements...

Tagged Requirements Preview:
Based on the provided requirements document for the Chicago WideCast Smart-Home Services system, here is a comprehensive list of tagged and numbered requirements and use cases.

### Requirements

#### REQ-001: Service Offerings
**Description:** The system shall provide the following service offerings to customers:
- Online TV plans: Basic (50 channels), BasicPlus (100 channels), Ultimate (200 channels).
- Data plans: WiFi SpeedLane (100/5 Mbps), WiFi LightLane (250/30 Mbps).
- On-Demand Movie Streaming: Premium (100 movies/month), Ultimate (500 movies/month).
- Pay-Per-View (PPV) services: Live Sports Events and PPV Movies.
- Online Video Games: Premium (100 games/month), Ultimate (200 games/month).
- Home Security services: Security Cameras and Alarms, Remote Lock/Unlock for delivery personnel.
- Utilities: Remote control for lighting and thermostat.

#### REQ-002: User Roles
**Description:** The system shall support four

In [13]:
# Function to generate task estimates for each role
def generate_tasks_and_estimates(role):
    # Get the full documents
    full_requirements = document_store["requirements_full"]
    full_task_estimates = document_store["task_estimates_full"]
    
    # Get the role-specific system prompt
    system_message = role_prompts[role]
    
    # Create a structured prompt
    prompt = f"""
    {system_message}
    
    Based on the following Chicago WideCast Smart-Home Services requirements document and 
    the SampleProjectTasksEstimates format, generate a detailed list of tasks 
    and time estimates for your role as a {role.replace('_', ' ').title()}.
    
    Requirements Document:
    {full_requirements}
    
    Sample Task Estimates Format:
    {full_task_estimates}
    
    Include:
    1. Task descriptions
    2. Estimated work amounts
    3. Productivity rates
    
    Format your response as a structured table similar to the sample provided.
    Be specific about the tasks relevant to your role and provide realistic estimates.
    """
    
    # Query using LlamaIndex
    llm = Settings.llm
    response = llm.complete(prompt)
    return response

# Generate tasks and estimates for each role
tasks_and_estimates = {}
for role in roles:
    print(f"\nGenerating tasks for: {role}")
    try:
        tasks = generate_tasks_and_estimates(role)
        tasks_and_estimates[role] = tasks
        
        # Print preview of results
        print(f"\n=== {role.replace('_', ' ').title()} Tasks and Estimates Preview ===")
        print(tasks)
        
        # # Save each role's tasks to a file
        # with open(f"{role}_tasks_exp6.txt", "w") as f:
        #     f.write(tasks)
        
        print(f"Tasks for {role} saved to '{role}_tasks_exp6.txt'")
    except Exception as e:
        print(f"Error processing role {role}: {e}")
        tasks_and_estimates[role] = f"Error generating tasks: {str(e)}"


Generating tasks for: project_manager

=== Project Manager Tasks and Estimates Preview ===
Here is a detailed project plan for the Chicago WideCast Smart-Home Services system, structured in a table format similar to the SampleProjectTasksEstimates.pdf format. Each task is tagged and numbered with unique identifiers based on the requirements provided.

| Task ID | Task Description                                      | Amount of Work         | Productivity Rate         |
|---------|------------------------------------------------------|------------------------|---------------------------|
| PM-01   | Project Plan                                          | 60 pages               | 5 pages/Hour              |
| PM-02   | Write Project Overview Statement                      | 5 pages                | 4 pages/Hour              |
| PM-03   | Review Project Plan                                   |                        |                           |
| PM-03.1 | Preparation for review       

In [None]:
# Create a comprehensive project plan
def create_project_plan(tasks_and_estimates, tagged_requirements):
    project_plan = f"""
    # Chicago WideCast Smart-Home Services Project Plan (Experiment 6)
    
    ## Tagged Requirements
    {tagged_requirements}
    
    ## Project Tasks and Estimates
    
    ### Project Manager
    {tasks_and_estimates.get("project_manager", "Not available")}
    
    ### Requirements Engineer
    {tasks_and_estimates.get("requirements_engineer", "Not available")}
    
    ### System Engineer
    {tasks_and_estimates.get("system_engineer", "Not available")}
    
    ### Software Engineer
    {tasks_and_estimates.get("software_engineer", "Not available")}
    
    ### Test Engineer
    {tasks_and_estimates.get("test_engineer", "Not available")}
    
    ### Documentation Engineer
    {tasks_and_estimates.get("documentation_engineer", "Not available")}
    """
    
    return project_plan

# Create the project plan
project_plan = create_project_plan(tasks_and_estimates, tagged_requirements)

# Save the project plan to a file
with open("Chicago_WideCast_Project_Plan_Exp6.md", "w") as f:
    f.write(project_plan)

print("\nProject plan has been saved to 'Chicago_WideCast_Project_Plan_Exp6.md'")