In [None]:
!pip3 install pandas openai langchain langchain_community rapidocr-onnxruntime pypdf faiss-cpu langchain_openai

In [None]:
import os
import openai
import pandas as pd

from datetime import datetime

from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.llms import LlamaCpp
from langchain.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
# from questions import questions
# from answers import responses
# from helper import (
#     text_to_csv,
#     get_time_difference,
#     filter_csv,
#     read_csv,
#     read_counter,
#     increment_counter,
#     text_to_file,
# )
from secret import OPENAI_API_KEY

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
embeddings = OpenAIEmbeddings(openai_api_key=os.environ["OPENAI_API_KEY"])
vector_store_path = "./vector_store"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [None]:
def create_vector_store(file_path):
    loader = PyPDFLoader(file_path, extract_images=True)
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=20,
        keep_separator=False,
        separators=["\n\n", "\n", ""],
    )
    
    splitted_documents = loader.load_and_split(text_splitter=text_splitter)
    
    # splitted_documents = text_splitter.split_documents(documents)
    
    vectordb = FAISS.from_documents(splitted_documents, embedding=embeddings)
    vectordb.save_local(vector_store_path)
    
    return splitted_documents

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


def get_retriever():
    vectorstore = FAISS.load_local(
        vector_store_path, embeddings, allow_dangerous_deserialization=True
    )

    retriever = vectorstore.as_retriever(search_kwargs={"k": 20})

    return retriever

In [None]:
doc_name = "Doc-5"
file_path = f"./docs/{doc_name}.pdf"

In [None]:
docs= create_vector_store(file_path)

In [None]:
llm = ChatOpenAI(
    model="gpt-4-0125-preview",  # "ft:gpt-3.5-turbo-1106:mobilelive-inc:irap-model:8mTN1w61",
    openai_api_key=os.environ["OPENAI_API_KEY"],
    temperature=0.5,
)

retriever = get_retriever()

In [None]:
from answers import responses

In [None]:
def get_prompt(template=None):
    if template is None:
        template = """You are an AI assistant to answer question as per the given context. 
        Use the following pieces of retrieved context to answer the question. 
        Please provide answer as per the given context.
      Question: {question}
      Context: {context}
      Answer:
      """

    prompt_template = PromptTemplate(
        template=template,
        input_variables=[
            "context",
            "question",
        ],
    )

    return prompt_template

In [None]:
import os 

folder_path = f"./output/{doc_name}"

if not os.path.exists(folder_path):
    os.makedirs(folder_path)

In [None]:
def get_prompt_with_qa():
    template = """You are an AI assistant which is acting as a Technical Project Manager. 
    Provide the required information to the best of your knowledge.

    Please generate CSV-formatted data containing the following fields:
     "Level,Category,Subcategory,User Story,Effort (Days)"
    Ensure that the CSV format adheres to standard conventions, with comma-separated values and proper line breaks.

    CSV_FORMAT:
        Level,Category,Subcategory,User Story,Effort (Days)

    OBJECTIVE:
    The objective is to develop a structured and informative WBS that will serve as a clear roadmap for project execution, encompassing both technical and non-technical aspects

    CONTEXT:
    Consider the following sets of Questions and Answers for context:\n"""
    for question, response in responses.items():
        template += f"QUES: {question.strip()}\nANS: {response.strip()}\n\n"

    template +="""
      Question: {question}
      Context: {context}
      Answer:
      """

    prompt_template = PromptTemplate(
        template=template,
        input_variables=[
            "context",
            "question",
        ],
    )

    return prompt_template

In [None]:
def get_prompt_for_dev_tasks(contents):
    template = f"""You are an AI assistant which is acting as a Technical Project Manager. 
    Provide the required information to the best of your knowledge.

    Please generate CSV-formatted data containing the following fields:
     "Level,Category,Subcategory,User Story,Effort (Days)"
    Ensure that the CSV format adheres to standard conventions, with comma-separated values and proper line breaks.

    REQUIREMEMTS:
    \n{contents}\n
    """

    template +="""
      Question: {question}
      Context: {context}
      Answer:
      """

    prompt_template = PromptTemplate(
        template=template,
        input_variables=[
            "context",
            "question",
        ],
    )

    return prompt_template

In [None]:
prompt = get_prompt_with_qa()

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
query = """TASK:
    Your task is to create a comprehensive list of user stories covering all major aspects of the software project, 
    including Analysis, Design, Development, Testing, and Deployment. 
    Please provide detailed user stories for DESIGN and DEVELOPMENT (at least 10-12 user stories for each subcategory). 
    Ensure that the user stories are structured hierarchically and organized logically to facilitate project management. You need to include as much technical details as possible.
    Additionally, provide the estimate the effort (in days) with some buffer required for each user story and provide a total of EFFORTS column. 
   """

first_pass = rag_chain.invoke(query)
print(first_pass)

In [None]:
llm = ChatOpenAI(
    model="gpt-4-0125-preview",  
    openai_api_key=os.environ["OPENAI_API_KEY"],
    temperature=0.7,
)

retriever = get_retriever()
prompt = get_prompt_for_dev_tasks(first_pass)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
query = """ TASK:
    You have been given an high-level list of requirements covering all major aspects of the software project. 
    Your task is to further divide the given requirements, also include technical detailes in them and keep them as detailed as possible. 
    Please provide atleast 10-12 detailed requirements for each subcategory. Keep your major focus on DESIGN and DEVELOPMENT tasks and minor focus on others.
    Additionally, provide the estimate the effort (in days) to the best of your knowledge, 
    with some buffer required for each requirement.

    TOTAL EFFORTS:
    """

second_pass = rag_chain.invoke(query)
print(second_pass)

In [None]:
template = """You are an AI assistant which is acting as a Technical Project Manager. 
Provide the required information to the best of your knowledge.

Please generate CSV-formatted data containing the following fields:
 "Level,Category,Subcategory,User Story,Effort (Days)"
Ensure that the CSV format adheres to standard conventions, with comma-separated values and proper line breaks.

  Question: {question}
  Context: {context}
  Answer:
  """

prompt_template = PromptTemplate(
    template=template,
    input_variables=[
        "context",
        "question",
    ],
)

llm = ChatOpenAI(
    model="gpt-4-0125-preview",  
    openai_api_key=os.environ["OPENAI_API_KEY"],
    temperature=0.7,
)

retriever = get_retriever()
prompt = prompt_template

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
query = """TASK:
    Your task is to create a comprehensive list of user stories covering all major aspects of the software project, 
    including Analysis, Design, Development, Testing, and Deployment. 
    Please provide at least 10-12 user stories for each category and subcategory. 
    Ensure that the user stories are structured hierarchically and organized logically to facilitate project management. You need to include as much technical details as possible.
    Additionally, provide the estimate the effort (in days) with some buffer required for each user story and provide a total of EFFORTS column. 
   """

third_pass = rag_chain.invoke(query)
print(third_pass)