In [2]:
import time
import threading
import schedule
import requests
from datetime import datetime

from langchain_community.llms import Ollama
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
import gradio as gr

# Initialize components
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
llm = Ollama(
    model="llama2",
    base_url="http://localhost:11434",
    temperature=0.3
)

# Load ESG base documents and initialize the base vector store
def load_esg_base():
    loader = PyPDFLoader("C:/Users/Nidhi.Chakravarthy/Documents/Financial-Document-parser-using-RAG/esg_regulations.pdf")
    pages = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return splitter.split_documents(pages)

vector_store = FAISS.from_documents(load_esg_base(), embeddings)

# Analysis function for ESG compliance queries
def analyze_content(query, file):
    try:
        if file:
            loader = PyPDFLoader(file.name)
            user_docs = loader.load()
            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
            user_chunks = splitter.split_documents(user_docs)
            user_store = FAISS.from_documents(user_chunks, embeddings)
            # Merge user document vectors with base vector store
            user_store.merge_from(vector_store)
            retriever = user_store.as_retriever()
        else:
            retriever = vector_store.as_retriever()

        prompt = ChatPromptTemplate.from_template(
            """Analyze this ESG document for compliance risks:
{context}

Query: {input}

Format findings as:
- [RISK LEVEL] [SECTION]: [DESCRIPTION]"""
        )

        chain = (
            {"context": retriever, "input": RunnablePassthrough()}
            | prompt
            | llm
        )

        return chain.invoke(query)
    except Exception as e:
        return f"Error: {str(e)}"

# Global variable for tracking the last updated report timestamp
last_updated_report = None

# Function to fetch and update the regulatory report from an API
def fetch_and_update_regulatory_report():
    global last_updated_report, vector_store
    # Replace with your actual regulatory reports endpoint and API key
    api_url = "https://api.regulations.gov/v4/documents?sort=-postedDate&api_key=DEMO_KEY"
    
    try:
        response = requests.get(api_url)
        response.raise_for_status()
        data = response.json()
        documents = data.get("data", [])
        if not documents:
            print("No regulatory reports found.")
            return

        # Use the first (latest) report sorted by postedDate
        latest_doc = documents[0]
        updated_str = latest_doc.get("postedDate")
        if not updated_str:
            print("No posted date found in latest report.")
            return

        # Convert the posted date string to a datetime object (assumes ISO 8601 format)
        updated_datetime = datetime.fromisoformat(updated_str.rstrip("Z"))
        
        if last_updated_report is None or updated_datetime > last_updated_report:
            last_updated_report = updated_datetime
            print(f"New regulatory report detected: {last_updated_report}")

            # Retrieve the PDF download URL from the metadata (adjust key as required)
            doc_url = latest_doc.get("downloadUrl")
            if not doc_url:
                print("Download URL not found for the latest report.")
                return

            pdf_response = requests.get(doc_url)
            if pdf_response.status_code != 200:
                print("Failed to download the latest regulatory report.")
                return

            # Save the PDF locally for processing
            pdf_filename = "latest_regulatory_report.pdf"
            with open(pdf_filename, "wb") as f:
                f.write(pdf_response.content)

            # Process the newly fetched report
            loader = PyPDFLoader(pdf_filename)
            pages = loader.load()
            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
            new_documents = splitter.split_documents(pages)

            # Merge the new documents into the existing vector store
            new_vector_store = FAISS.from_documents(new_documents, embeddings)
            vector_store.merge_from(new_vector_store)
            print("Vector store updated with the latest regulatory report.")
        else:
            print("No new regulatory reports found.")
    except Exception as e:
        print(f"Error while fetching regulatory report: {e}")

# Polling function that schedules regular API checks for regulatory updates
def poll_regulatory_reports():
    fetch_and_update_regulatory_report()  # Run once immediately on startup
    schedule.every(1).hours.do(fetch_and_update_regulatory_report)
    while True:
        schedule.run_pending()
        time.sleep(60)

# Gradio interface for the ESG analyzer
with gr.Blocks(title="ESG Compliance Analyzer") as app:
    gr.Markdown("## ESG Document Analyzer with Local LLM")
    with gr.Row():
        with gr.Column():
            file_input = gr.File(label="Upload Document (PDF)")
            query_input = gr.Textbox(label="Your Compliance Question")
            submit_btn = gr.Button("Analyze")
        with gr.Column():
            output = gr.Textbox(label="Analysis Results", interactive=False)

    submit_btn.click(
        fn=analyze_content,
        inputs=[query_input, file_input],
        outputs=output
    )

# Main: start the regulatory polling thread and launch the Gradio UI
if __name__ == "__main__":
    # Start polling for regulatory updates in a background thread
    polling_thread = threading.Thread(target=poll_regulatory_reports, daemon=True)
    polling_thread.start()
    
    # Launch the Gradio app (this will block and open the UI in your browser)
    app.launch(server_name="0.0.0.0", share=False)


  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  llm = Ollama(


ValueError: File path C:/Users/Nidhi.Chakravarthy/Documents/Financial-Document-parser-using-RAG/esg_regulations.pdf is not a valid file or url