In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import IsolationForest
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import zscore
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import re
import json

class AdvancedFinancialAnalyzer:
    def __init__(self):
        # Load pre-trained financial sentiment model
        self.sentiment_tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
        self.sentiment_model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")
        
        # Advanced feature extraction patterns
        self.advanced_patterns = {
            'risk_indicators': [
                r'(?:high\s*risk|significant\s*risk|potential\s*liability)',
                r'(?:market\s*volatility|economic\s*uncertainty)',
                r'(?:regulatory\s*challenge|compliance\s*issue)'
            ],
            'growth_signals': [
                r'(?:strategic\s*expansion|market\s*opportunity|innovation)',
                r'(?:new\s*market\s*entry|product\s*development)',
                r'(?:merger\s*acquisition|strategic\s*partnership)'
            ]
        }

    def extract_advanced_insights(self, text: str) -> Dict:
        """
        Advanced text analysis with multiple insight extraction techniques
        """
        insights = {
            'risk_indicators': [],
            'growth_signals': [],
            'sentiment_score': self.analyze_financial_sentiment(text)
        }
        
        # Extract advanced patterns
        for category, patterns in self.advanced_patterns.items():
            for pattern in patterns:
                matches = re.findall(pattern, text, re.IGNORECASE)
                insights[category].extend(matches)
        
        return insights

    def analyze_financial_sentiment(self, text: str) -> float:
        """
        Advanced financial sentiment analysis using FinBERT
        """
        try:
            inputs = self.sentiment_tokenizer(text, return_tensors="pt", 
                                              truncation=True, 
                                              max_length=512)
            
            with torch.no_grad():
                outputs = self.sentiment_model(**inputs)
            
            # Get probabilities
            probabilities = torch.softmax(outputs.logits, dim=1)
            sentiment_scores = {
                'negative': probabilities[0][0].item(),
                'neutral': probabilities[0][1].item(),
                'positive': probabilities[0][2].item()
            }
            
            # Normalized sentiment score
            return (sentiment_scores['positive'] - sentiment_scores['negative']) * 100
        except Exception as e:
            st.warning(f"Sentiment analysis error: {e}")
            return 0

    def detect_financial_anomalies(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Machine learning-based anomaly detection
        """
        # Select numeric columns for anomaly detection
        numeric_columns = df.select_dtypes(include=[np.number]).columns
        
        # Prepare data
        X = df[numeric_columns]
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        
        # Isolation Forest for anomaly detection
        clf = IsolationForest(contamination=0.1, random_state=42)
        y_pred = clf.fit_predict(X_scaled)
        
        # Add anomaly flag to dataframe
        df['is_anomaly'] = y_pred
        df['anomaly_score'] = clf.decision_function(X_scaled)
        
        return df

    def document_similarity_analysis(self, documents: List[str]) -> np.ndarray:
        """
        Advanced document similarity comparison
        """
        # TF-IDF Vectorization
        vectorizer = TfidfVectorizer(stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(documents)
        
        # Cosine similarity matrix
        similarity_matrix = cosine_similarity(tfidf_matrix)
        
        return similarity_matrix

    def predictive_financial_modeling(self, df: pd.DataFrame):
        """
        Simple predictive modeling for financial metrics
        """
        # Prepare features and target
        features = ['revenue', 'net_income', 'eps']
        
        # Check if we have enough data for prediction
        if len(df) < 2:
            return None
        
        # Simple linear extrapolation
        predictions = {}
        for feature in features:
            if feature in df.columns:
                # Linear regression-like extrapolation
                lr_model = np.polyfit(range(len(df)), df[feature], 1)
                future_prediction = np.poly1d(lr_model)(len(df))
                predictions[feature] = future_prediction
        
        return predictions

def main():
    st.title("Advanced Financial Intelligence System")
    
    # Initialize analyzer
    analyzer = AdvancedFinancialAnalyzer()
    
    # File upload with multiple file support
    uploaded_files = st.file_uploader(
        "Upload Financial Reports (PDFs)", 
        type="pdf", 
        accept_multiple_files=True
    )
    
    if uploaded_files:
        # Containers for different analysis sections
        insights_container = st.container()
        anomaly_container = st.container()
        similarity_container = st.container()
        predictive_container = st.container()
        
        # Collect and process documents
        all_documents = []
        document_texts = []
        
        for uploaded_file in uploaded_files:
            # PDF Processing (you'd replace this with your existing PDF processing logic)
            with open("temp.pdf", "wb") as f:
                f.write(uploaded_file.getvalue())
            
            # Extract text (simplified for this example)
            with open("temp.pdf", "rb") as f:
                text = f.read().decode('utf-8', errors='ignore')
            
            document_texts.append(text)
            
            # Advanced insights extraction
            advanced_insights = analyzer.extract_advanced_insights(text)
            
            with insights_container:
                st.subheader("Advanced Textual Insights")
                col1, col2, col3 = st.columns(3)
                
                with col1:
                    st.metric("Sentiment Score", 
                              f"{advanced_insights['sentiment_score']:.2f}", 
                              delta_color="inverse")
                
                with col2:
                    st.write("Risk Indicators:")
                    st.json(advanced_insights['risk_indicators'])
                
                with col3:
                    st.write("Growth Signals:")
                    st.json(advanced_insights['growth_signals'])
        
        # Anomaly Detection
        # (Assuming you have a DataFrame from previous processing)
        df = pd.DataFrame({
            'revenue': [100, 120, 110, 130, 140],
            'net_income': [20, 25, 22, 30, 35],
            'eps': [0.5, 0.6, 0.55, 0.7, 0.8]
        })
        
        anomaly_df = analyzer.detect_financial_anomalies(df)
        
        with anomaly_container:
            st.subheader("Anomaly Detection")
            
            # Visualize anomalies
            fig = go.Figure()
            fig.add_trace(go.Scatter(
                x=anomaly_df.index, 
                y=anomaly_df['revenue'], 
                mode='markers',
                marker=dict(
                    color=anomaly_df['is_anomaly'].map({1: 'blue', -1: 'red'}),
                    size=10
                ),
                name='Revenue'
            ))
            
            st.plotly_chart(fig)
            
            # Anomaly details
            st.dataframe(anomaly_df[anomaly_df['is_anomaly'] == -1])
        
        # Document Similarity
        similarity_matrix = analyzer.document_similarity_analysis(document_texts)
        
        with similarity_container:
            st.subheader("Document Similarity Analysis")
            
            # Heatmap of document similarities
            fig = px.imshow(
                similarity_matrix, 
                labels=dict(x="Document", y="Document", color="Similarity"),
                title="Document Similarity Heatmap"
            )
            st.plotly_chart(fig)
        
        # Predictive Modeling
        predictions = analyzer.predictive_financial_modeling(df)
        
        with predictive_container:
            st.subheader("Predictive Financial Modeling")
            
            if predictions:
                # Visualize predictions
                fig = go.Figure()
                for metric, prediction in predictions.items():
                    fig.add_trace(go.Scatter(
                        x=list(df.index) + [len(df)],
                        y=list(df[metric]) + [prediction],
                        mode='lines+markers',
                        name=f'{metric} Prediction'
                    ))
                
                st.plotly_chart(fig)
                
                # Display predictions
                st.json(predictions)

if __name__ == "__main__":
    main()