In [6]:
import streamlit as st
import pinecone
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import plotly.graph_objects as go
import pandas as pd
import re
from typing import List, Dict
import os

# Initialize session state
if 'processed_files' not in st.session_state:
    st.session_state.processed_files = []

def initialize_pinecone():
    """Initialize Pinecone with your API key"""
    pinecone.init(
        api_key=st.secrets["PINECONE_API_KEY"],
        environment=st.secrets["PINECONE_ENV"]
    )
    index_name = "financial-reports"
    
    # Create index if it doesn't exist
    if index_name not in pinecone.list_indexes():
        pinecone.create_index(
            name=index_name,
            metric='cosine',
            dimension=384  # dimension for all-MiniLM-L6-v2
        )
    return pinecone.Index(index_name)

def process_pdf(file) -> List[str]:
    """Process PDF and return chunks"""
    # Save uploaded file temporarily
    with open("temp.pdf", "wb") as f:
        f.write(file.getvalue())
    
    # Load and split the PDF
    loader = PyPDFLoader("temp.pdf")
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=100
    )
    chunks = text_splitter.split_documents(documents)
    
    # Clean up
    os.remove("temp.pdf")
    return chunks

def extract_financial_metrics(text: str) -> Dict:
    """Extract key financial metrics from text"""
    metrics = {
        'revenue': None,
        'profit': None,
        'eps': None,
    }
    
    # Simple regex patterns (you might want to make these more robust)
    revenue_pattern = r'\$?([\d,]+(?:\.\d+)?)\s*(?:million|billion)?\s*(?:in\s+)?revenue'
    profit_pattern = r'\$?([\d,]+(?:\.\d+)?)\s*(?:million|billion)?\s*(?:in\s+)?(?:net\s+)?profit'
    eps_pattern = r'EPS of \$?([\d,]+(?:\.\d+)?)'
    
    # Extract metrics
    revenue_match = re.search(revenue_pattern, text, re.IGNORECASE)
    if revenue_match:
        metrics['revenue'] = float(revenue_match.group(1).replace(',', ''))
    
    profit_match = re.search(profit_pattern, text, re.IGNORECASE)
    if profit_match:
        metrics['profit'] = float(profit_match.group(1).replace(',', ''))
    
    eps_match = re.search(eps_pattern, text, re.IGNORECASE)
    if eps_match:
        metrics['eps'] = float(eps_match.group(1).replace(',', ''))
    
    return metrics

def create_financial_charts(metrics: Dict):
    """Create financial visualization charts"""
    # Create a bar chart
    if any(metrics.values()):
        fig = go.Figure()
        for metric, value in metrics.items():
            if value:
                fig.add_trace(go.Bar(
                    name=metric.capitalize(),
                    x=[metric],
                    y=[value]
                ))
        
        fig.update_layout(
            title="Key Financial Metrics",
            yaxis_title="Value ($)",
            showlegend=False
        )
        return fig
    return None

def main():
    st.title("Financial Report Analysis System")
    
    # Initialize components
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    index = initialize_pinecone()
    vectorstore = Pinecone(index, embeddings, "text")
    
    # Initialize Ollama
    llm = Ollama(
        base_url="http://localhost:11434",
        model="llama2",
        callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])
    )
    
    # File upload
    uploaded_file = st.file_uploader("Upload Financial Report (PDF)", type="pdf")
    
    if uploaded_file and uploaded_file.name not in st.session_state.processed_files:
        with st.spinner("Processing document..."):
            # Process the PDF
            chunks = process_pdf(uploaded_file)
            
            # Store in Pinecone
            vectorstore.add_documents(chunks)
            
            # Mark file as processed
            st.session_state.processed_files.append(uploaded_file.name)
            st.success(f"Processed {uploaded_file.name}")
            
            # Extract and display metrics
            all_text = " ".join([chunk.page_content for chunk in chunks])
            metrics = extract_financial_metrics(all_text)
            
            # Create and display charts
            fig = create_financial_charts(metrics)
            if fig:
                st.plotly_chart(fig)
    
    # Query interface
    st.subheader("Ask Questions About the Reports")
    query = st.text_input("Enter your question:")
    
    if query:
        with st.spinner("Generating response..."):
            qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=vectorstore.as_retriever()
            )
            response = qa_chain.run(query)
            st.write(response)
    
    # Generate executive summary
    if st.button("Generate Executive Summary"):
        with st.spinner("Generating executive summary..."):
            summary_prompt = """
            Generate an executive summary of the financial report covering:
            1. Key financial highlights
            2. Major developments
            3. Business outlook
            Be concise and focus on the most important information.
            """
            qa_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=vectorstore.as_retriever()
            )
            summary = qa_chain.run(summary_prompt)
            st.subheader("Executive Summary")
            st.write(summary)

if __name__ == "__main__":
    main()

FileNotFoundError: No secrets files found. Valid paths for a secrets.toml file are: C:\Users\eKasi_SWT_COM00862\.streamlit\secrets.toml, C:\Users\eKasi_SWT_COM00862\Desktop\Financial_report_analysis_RAG_system\.streamlit\secrets.toml