In [None]:
import os
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.indexes import VectorstoreIndexCreator
from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain.agents.agent_types import AgentType
from langchain.memory import ConversationBufferMemory
import tiktoken
from langchain.text_splitter import CharacterTextSplitter
import PyPDF2
import warnings
warnings.filterwarnings('ignore')

In [None]:
class Document:
    def __init__(self, text):
        self.page_content = text
        self.metadata = {}

class PDFTextLoader:
    def __init__(self, file_paths):
        self.file_paths = file_paths

    def load(self):
        documents = []
        for file_path in self.file_paths:
            with open(file_path, "rb") as file:
                reader = PyPDF2.PdfReader(file)
                for page in reader.pages:
                    text = page.extract_text()
                    if text:
                        documents.append(Document(text))
        return documents
txt_file_paths = ['Nodal_2.pdf' , 'Nodal_3.pdf' , 'Nodal_4.pdf' , 'Nodal_5.pdf' , 'Nodal_6.pdf' , 'Nodal_7.pdf' , 'Nodal_8.pdf' , 'Nodal_9.pdf']  # List of PDF files
loader = PDFTextLoader(file_paths=txt_file_paths)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
data = text_splitter.split_documents(documents)

In [None]:
os.environ["OPENAI_API_KEY"] = "Your OPENAI_API_KEY"

In [None]:
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(data, embedding=embeddings)
llm = ChatOpenAI(temperature=0.3, model_name="gpt-4")
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)

In [None]:
query = "what information should I provide for an ancillary service trade?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'For an Ancillary Service Trade, you should provide the following information:\n\n(a) The buying Qualified Scheduling Entity (QSE);\n(b) The selling QSE;\n(c) The type of Ancillary Service;\n(d) The quantity in MW; and\n(e) The first and last hours of the trade. \n\nIf the trade is for Responsive Reserve Service (RRS), the QSE should also indicate the quantity of the service that is provided from:\n\n(i) Resources providing Primary Frequency Response;\n(ii) Fast Frequency Response (FFR) Resources; and\n(iii) Load Resources controlled by high-set under-frequency relays. \n\nFor Emergency Responsive Service (ECRS), the QSE should indicate the quantity of the service that is provided from Resources that are manually dispatched and those that are System-Wide Economic Dispatch (SCED)-dispatchable.'

In [None]:
query = " What are the requirements from ERCOT on the Submittal Timeline for Transmission Facility Outage Requests?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'ERCOT requires Transmission Service Providers (TSPs) to submit all requests for Planned Outages and Maintenance Outages or changes to existing approved Outages of Transmission Elements in the Network Operations Model no later than the minimum amount of time between the submittal of a request to ERCOT for approval of a proposed Outage and the scheduled start date of the proposed Outage. The minimum time varies depending on the type of outage:\n\n- For a Forced Outage or Maintenance Outage Level I, the request must be submitted immediately.\n- For a Maintenance Outage Level II, the request must be submitted at least two days in advance, but ERCOT may reduce this to one day on a case-by-case basis for reliability purposes.\n- For a Maintenance Outage Level III or a Planned Outage, the request must be submitted at least three days in advance.\n- For a Simple Transmission Outage, the request must be submitted at least one day in advance.'

In [None]:
query = "What is the Opportunity Outage?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'An "Opportunity Outage" is a special category of Planned Outages that may be approved by ERCOT when a specific Resource has been forced Off-Line due to a Forced Outage and the Resource has been previously approved for a Planned Outage during the next two days. When a Forced Outage occurs on a Resource that has an approved Outage scheduled within the following two days, the Resource may remain Off-Line and start the approved Outage earlier than scheduled. Opportunity Outages of Transmission Facilities may also be approved by ERCOT when a specific Resource is Off-Line due to a Forced, Planned or Maintenance Outage.'

In [None]:
query = "Should Energy Storage Resource (ESR) registered with ERCOT as a Generation Resource, or, a Controllable Load Resource?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'An Energy Storage Resource (ESR) should be understood to apply to both Generation Resources and Controllable Load Resources, as per the requirements of all ERCOT Protocols and Other Binding Documents. This is unless the Protocols explicitly provide otherwise.'

In [None]:
query = "When ERCOT must reduce the Ancillary Service Plan for purposes of the DAM due to insufficient Ancillary Service Offers, what is the order of priority that ERCOT shall follow to preserve the Ancillary Service Plan?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'The Electric Reliability Council of Texas (ERCOT) preserves the Ancillary Service Plan in the Day-Ahead Market (DAM) in the following order of priority when it must reduce it due to insufficient Ancillary Service Offers: \n1. Regulation Up (Reg-Up)\n2. Regulation Down (Reg-Down)\n3. Responsive Reserve (RRS)\n4. ERCOT Contingency Reserve Service (ECRS)\n5. Non-Spin.'

In [None]:
##More complex questions!

In [None]:
query = "How do you calculate physical responsive ancillary service Capability across ERCOT? "
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'The method for calculating physical responsive ancillary service Capability across the Electric Reliability Council of Texas (ERCOT) is not explicitly mentioned in the provided context. However, it does mention that ERCOT continuously monitors the capacity of each Resource to provide services like Reg-Up, Reg-Down, and ECRS, considering factors like the Resource Status, actual generation or Load, the Ancillary Service award for the respective service, the High Sustained Limit (HSL), the Low Sustained Limit (LSL), ramp rates, and the Resource’s qualification to provide the service. For Load Resources, the amount of ECRS capacity provided must be measured as the Load Resource’s average Load level in the last five minutes. A Resource that is capable of providing ECRS and that has a Resource Status code of ONSC and an ECRS award is considered to be providing capability to the extent that it is not using that capacity to provide energy or other Ancillary Services.'

In [None]:
query = "Who is eligible to provide must-run alternative services?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'According to ERCOT, a Qualified Scheduling Entity (QSE) may provide Must-Run Alternative (MRA) Service if it meets all registration and qualification criteria in Section 16.2, Registration and Qualification of Qualified Scheduling Entities.'

In [None]:
query = "What criteria are used by ERCOT for procuring must-run alternatives?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'ERCOT uses several criteria for procuring Must-Run Alternatives (MRAs). These include:\n\n1. Any transmission upgrades that can be implemented prior to the time period for which the performance deficiency has been identified.\n\n2. ERCOT staff will select the option or combination of options that most cost-effectively address the performance deficiency, as long as the cost of the selected options is justified given the possible impact to Customers due to the performance deficiency.\n\n3. In selecting the most cost-effective option, ERCOT will consider factors such as:\n   - The degree to which the option addresses the identified performance deficiency;\n   - The total expected cost of each option;\n   - Expected unit performance of the Generation Resource proposed for suspension of operations, including start-up time, minimum run-time, minimum down-time, and historical unit outage data;\n   - Operational limitations of proposed MRAs, including start-up times, minimum run-times, ramp p

In [None]:
query = "Can ERCOT share confidential market information without issuing market notices?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

"No, the Electric Reliability Council of Texas (ERCOT) is required to maintain the confidentiality of market information. The protocols state that ERCOT will not post information if it would reveal any individual Market Participant’s Protected Information. This suggests that any sharing of confidential market information would be against ERCOT's protocols."

In [None]:
query = "How do you determine the real-time settle price point of a hub for a given settlement interval?"
result = conversation_chain({"question": query})
answer = result["answer"]
answer

'The Real-Time Settlement Point Price of the Hub for a given 15-minute Settlement Interval is calculated as follows: RTSPP ERCOT345Bus = Max [-$251, (RTRDP + (HUBLMPERCOT345Bus,y * RNWF y))] where RTRDP = (RNWF y * RTRDPA y) and RNWF y = TLMP y / TLMP y. \n\nHere, RTSPP ERCOT345Bus is the Real-Time Settlement Point Price at the Hub, for the 15-minute Settlement Interval. RTRDP is the Real-Time Reliability Deployment Price for Energy, reflecting the impact of reliability deployments on energy prices that are calculated from the Real-Time Reliability Deployment Price Adder for Energy. HUBLMP ERCOT345Bus,y is the Hub Locational Marginal Price for the ERCOT345Bus, for the SCED Interval y. RNWF y is the Resource Node Weighting Factor per interval, used in the Resource Node Settlement Point Price calculation for the portion of the SCED interval y within the Settlement Interval. TLMP y is the duration of the portion of the SCED interval y within the 15-minute Settlement Interval.'