# 10-K Filings: Comprehensive Visualizations

This notebook demonstrates how to create comprehensive visualizations and dashboards that combine the results of text analysis, sentiment analysis, and financial analysis performed in previous notebooks.

In [None]:
# Import libraries
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from matplotlib.ticker import FuncFormatter, MaxNLocator
import matplotlib.dates as mdates

# Add project root to path for importing local modules
sys.path.append('..')

# Import project modules
from src.visualization.basic_plots import (
    plot_time_series, plot_metric_distribution, create_wordcloud,
    plot_sentiment_analysis, plot_comparative_metrics, plot_correlation_heatmap,
    plot_stacked_bar, plot_year_over_year_comparison
)
from src.visualization.advanced_plots import (
    create_interactive_time_series, create_heatmap_over_time, create_interactive_scatter,
    create_interactive_bar_chart, create_interactive_pie_chart, create_interactive_heatmap,
    create_radar_chart, plot_embeddings_2d, create_waterfall_chart, create_sunburst_chart,
    create_treemap_chart, create_bubble_chart, create_interactive_dashboard
)
from src.utils.helpers import format_number, setup_matplotlib_style

# Set up matplotlib style
setup_matplotlib_style()

# Set pandas display options
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)

## Load Analysis Results

Let's load the results from our previous analyses (text, sentiment, and financial).

In [None]:
# Define paths to result files
text_analysis_file = '../data/results/text_analysis.pkl'
financial_analysis_file = '../data/results/financial_analysis.pkl'
word_freq_file = '../data/results/word_frequencies.pkl'
topic_modeling_file = '../data/results/topic_modeling.pkl'
financial_comparison_file = '../data/results/financial_comparison.pkl'

# Create results directory if it doesn't exist
if not os.path.exists('../data/results'):
    os.makedirs('../data/results')

# Initialize variables to store loaded data
text_metrics_df = None
financial_metrics_df = None
word_frequencies = None
topic_modeling = None
financial_comparison = None

# Load text analysis results
if os.path.exists(text_analysis_file):
    text_metrics_df = pd.read_pickle(text_analysis_file)
    print(f"Loaded text analysis results with {len(text_metrics_df)} rows.")
else:
    print(f"Warning: Text analysis file not found: {text_analysis_file}")

# Load financial analysis results
if os.path.exists(financial_analysis_file):
    financial_metrics_df = pd.read_pickle(financial_analysis_file)
    print(f"Loaded financial analysis results with {len(financial_metrics_df)} rows.")
else:
    print(f"Warning: Financial analysis file not found: {financial_analysis_file}")

# Load word frequencies
if os.path.exists(word_freq_file):
    word_frequencies = pd.read_pickle(word_freq_file)
    print(f"Loaded word frequencies for {len(word_frequencies)} sections.")
else:
    print(f"Warning: Word frequencies file not found: {word_freq_file}")

# Load topic modeling results
if os.path.exists(topic_modeling_file):
    topic_modeling = pd.read_pickle(topic_modeling_file)
    print(f"Loaded topic modeling results for {topic_modeling['section']} section with {topic_modeling['n_topics']} topics.")
else:
    print(f"Warning: Topic modeling file not found: {topic_modeling_file}")

# Load financial comparison results
if os.path.exists(financial_comparison_file):
    financial_comparison = pd.read_pickle(financial_comparison_file)
    print(f"Loaded financial comparison results.")
else:
    print(f"Warning: Financial comparison file not found: {financial_comparison_file}")

## Check and Prepare Data

Let's check what data we have available and prepare it for visualization.

In [None]:
# Create sample data if needed (when no real data is available)
def create_sample_data():
    # Sample companies
    tickers = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META']
    
    # Sample dates
    dates = pd.date_range(start='2019-01-01', end='2023-01-01', freq='YS')
    years = [d.year for d in dates]
    
    # Create sample financial metrics DataFrame
    financial_data = []
    for ticker in tickers:
        base_revenue = np.random.uniform(50, 200) * 1e9  # Base revenue between $50B and $200B
        growth_rate = np.random.uniform(0.05, 0.2)  # Annual growth between 5% and 20%
        profit_margin = np.random.uniform(0.1, 0.3)  # Profit margin between 10% and 30%
        
        for i, date in enumerate(dates):
            # Calculate metrics with some growth and randomness
            revenue = base_revenue * (1 + growth_rate)**i * np.random.uniform(0.9, 1.1)
            net_income = revenue * profit_margin * np.random.uniform(0.9, 1.1)
            
            financial_data.append({
                'ticker': ticker,
                'company_name': f"{ticker} Inc.",
                'filing_date': date,
                'filing_year': date.year,
                'revenue': revenue,
                'net_income': net_income,
                'operating_income': revenue * 0.2 * np.random.uniform(0.9, 1.1),
                'total_assets': revenue * 1.5 * np.random.uniform(0.9, 1.1),
                'total_liabilities': revenue * 0.8 * np.random.uniform(0.9, 1.1),
                'profit_margin': net_income / revenue,
                'roa': net_income / (revenue * 1.5),
                'debt_to_assets': (revenue * 0.8) / (revenue * 1.5)
            })
    
    financial_df = pd.DataFrame(financial_data)
    
    # Create sample text metrics DataFrame
    text_data = []
    sections = ['item_1', 'item_1a', 'item_7', 'item_7a']
    
    for ticker in tickers:
        sentiment_base = np.random.uniform(-0.2, 0.4)  # Base sentiment
        
        for date in dates:
            for section in sections:
                # Calculate metrics with some randomness
                polarity = sentiment_base + np.random.uniform(-0.2, 0.2)
                positive_score = max(0, sentiment_base + 0.2 + np.random.uniform(-0.1, 0.1))
                negative_score = max(0, 0.2 - sentiment_base + np.random.uniform(-0.1, 0.1))
                
                text_data.append({
                    'ticker': ticker,
                    'company_name': f"{ticker} Inc.",
                    'filing_date': date,
                    'filing_year': date.year,
                    'section': section,
                    'textblob_polarity': polarity,
                    'textblob_subjectivity': np.random.uniform(0.3, 0.7),
                    'lexicon_positive_score': positive_score,
                    'lexicon_negative_score': negative_score,
                    'lexicon_net_score': positive_score - negative_score,
                    'lexicon_uncertainty_score': np.random.uniform(0.05, 0.2),
                    'lexicon_litigious_score': np.random.uniform(0.01, 0.15)
                })
    
    text_df = pd.DataFrame(text_data)
    
    return financial_df, text_df

# Check if we need to create sample data
if financial_metrics_df is None or text_metrics_df is None:
    print("Creating sample data for demonstration...")
    sample_financial_df, sample_text_df = create_sample_data()
    
    if financial_metrics_df is None:
        financial_metrics_df = sample_financial_df
        print(f"Created sample financial data with {len(financial_metrics_df)} rows.")
    
    if text_metrics_df is None:
        text_metrics_df = sample_text_df
        print(f"Created sample text analysis data with {len(text_metrics_df)} rows.")

In [None]:
# Prepare data for visualization
# Ensure date columns are datetime
if financial_metrics_df is not None and 'filing_date' in financial_metrics_df.columns:
    financial_metrics_df['filing_date'] = pd.to_datetime(financial_metrics_df['filing_date'])

if text_metrics_df is not None and 'filing_date' in text_metrics_df.columns:
    text_metrics_df['filing_date'] = pd.to_datetime(text_metrics_df['filing_date'])

# Get list of companies
companies = []
if financial_metrics_df is not None and 'ticker' in financial_metrics_df.columns:
    companies = sorted(financial_metrics_df['ticker'].unique())
    print(f"Companies in financial data: {', '.join(companies)}")
elif text_metrics_df is not None and 'ticker' in text_metrics_df.columns:
    companies = sorted(text_metrics_df['ticker'].unique())
    print(f"Companies in text data: {', '.join(companies)}")

# Get available years
years = []
if financial_metrics_df is not None and 'filing_year' in financial_metrics_df.columns:
    years = sorted(financial_metrics_df['filing_year'].unique())
    print(f"Years in financial data: {', '.join(map(str, years))}")
elif text_metrics_df is not None and 'filing_year' in text_metrics_df.columns:
    years = sorted(text_metrics_df['filing_year'].unique())
    print(f"Years in text data: {', '.join(map(str, years))}")

## Basic Visualizations

Let's start with some basic visualizations to understand our data.

In [None]:
# Time series plot of revenue
if financial_metrics_df is not None and 'revenue' in financial_metrics_df.columns:
    fig, ax = plot_time_series(
        financial_metrics_df,
        date_column='filing_date',
        value_column='revenue',
        company_column='ticker',
        title='Revenue Over Time by Company'
    )
    
    # Format y-axis with B for billions
    def billions_formatter(x, pos):
        return f'${x/1e9:.1f}B'
    
    ax.yaxis.set_major_formatter(FuncFormatter(billions_formatter))
    
    plt.show()

In [None]:
# Sentiment analysis plot
if text_metrics_df is not None and 'lexicon_net_score' in text_metrics_df.columns:
    # Filter to Risk Factors section
    risk_factors_sentiment = text_metrics_df[text_metrics_df['section'] == 'item_1a'].copy()
    
    if not risk_factors_sentiment.empty:
        fig, ax = plot_sentiment_analysis(
            risk_factors_sentiment,
            date_column='filing_date',
            sentiment_column='lexicon_net_score',
            company_column='ticker',
            title='Risk Factors Sentiment Over Time by Company'
        )
        plt.show()

In [None]:
# Comparative metrics plot
if financial_metrics_df is not None:
    # Select metrics to compare
    metrics_to_compare = ['revenue', 'net_income', 'operating_income']
    available_metrics = [m for m in metrics_to_compare if m in financial_metrics_df.columns]
    
    if available_metrics and 'ticker' in financial_metrics_df.columns:
        # Calculate mean metrics by company (for the most recent year)
        if 'filing_year' in financial_metrics_df.columns:
            most_recent_year = financial_metrics_df['filing_year'].max()
            recent_data = financial_metrics_df[financial_metrics_df['filing_year'] == most_recent_year]
        else:
            recent_data = financial_metrics_df
        
        fig, ax = plot_comparative_metrics(
            recent_data,
            metrics=available_metrics,
            company_column='ticker',
            title=f'Financial Metrics Comparison by Company ({most_recent_year if "filing_year" in financial_metrics_df.columns else "Most Recent"})'
        )
        
        # Format y-axis with B for billions
        ax.yaxis.set_major_formatter(FuncFormatter(billions_formatter))
        
        plt.show()

In [None]:
# Year-over-year comparison
if financial_metrics_df is not None and 'revenue' in financial_metrics_df.columns:
    fig, ax = plot_year_over_year_comparison(
        financial_metrics_df,
        date_column='filing_date',
        value_column='revenue',
        group_column='ticker',
        title='Year-over-Year Revenue Comparison',
        normalize=False
    )
    
    # Format y-axis with B for billions
    ax.yaxis.set_major_formatter(FuncFormatter(billions_formatter))
    
    plt.show()
    
    # Also show normalized version (percentage change)
    fig, ax = plot_year_over_year_comparison(
        financial_metrics_df,
        date_column='filing_date',
        value_column='revenue',
        group_column='ticker',
        title='Year-over-Year Revenue Growth (%)',
        normalize=True
    )
    plt.show()

In [None]:
# Word cloud visualization
if word_frequencies is not None:
    # Select a section for the word cloud
    section_name = 'item_1a'  # Risk Factors
    
    if section_name in word_frequencies:
        word_freq = word_frequencies[section_name]
        
        if word_freq is not None and not word_freq.empty:
            # Create a dictionary of word frequencies for the word cloud
            word_freq_dict = dict(zip(word_freq['word'], word_freq['frequency']))
            
            fig, ax = create_wordcloud(
                word_freq_dict,
                title='Word Cloud for Risk Factors',
                figsize=(12, 8),
                colormap='Reds',
                max_words=100
            )
            plt.show()

In [None]:
# Correlation heatmap
if financial_metrics_df is not None:
    # Select numerical columns (excluding ID columns)
    num_cols = financial_metrics_df.select_dtypes(include=[np.number]).columns.tolist()
    num_cols = [col for col in num_cols if col not in ['filing_year']]
    
    if num_cols:
        fig, ax = plot_correlation_heatmap(
            financial_metrics_df,
            columns=num_cols,
            title='Correlation Heatmap of Financial Metrics'
        )
        plt.show()

## Interactive Visualizations

Now let's create some interactive visualizations using Plotly.

In [None]:
# Interactive time series plot
if financial_metrics_df is not None:
    # Select metrics to plot
    metrics_to_plot = ['revenue', 'net_income', 'operating_income']
    available_metrics = [m for m in metrics_to_plot if m in financial_metrics_df.columns]
    
    if available_metrics and 'filing_date' in financial_metrics_df.columns:
        fig = create_interactive_time_series(
            financial_metrics_df,
            date_column='filing_date',
            value_columns=available_metrics,
            company_column='ticker',
            title='Interactive Financial Metrics Over Time'
        )
        fig.show()

In [None]:
# Interactive scatter plot
if financial_metrics_df is not None and text_metrics_df is not None:
    # We need to merge financial and text metrics
    # First, filter text metrics to a specific section (e.g., MD&A)
    if 'section' in text_metrics_df.columns:
        mda_sentiment = text_metrics_df[text_metrics_df['section'] == 'item_7'].copy()
        
        # Group by company and year and calculate mean sentiment
        if not mda_sentiment.empty and 'lexicon_net_score' in mda_sentiment.columns:
            sentiment_by_company_year = mda_sentiment.groupby(['ticker', 'filing_year'])['lexicon_net_score'].mean().reset_index()
            
            # Group financial metrics by company and year
            if 'profit_margin' in financial_metrics_df.columns:
                financial_by_company_year = financial_metrics_df.groupby(['ticker', 'filing_year'])[['revenue', 'profit_margin']].mean().reset_index()
                
                # Merge the datasets
                merged_df = pd.merge(financial_by_company_year, sentiment_by_company_year, on=['ticker', 'filing_year'])
                
                if not merged_df.empty:
                    fig = create_interactive_scatter(
                        merged_df,
                        x_column='lexicon_net_score',
                        y_column='profit_margin',
                        color_column='ticker',
                        size_column='revenue',
                        hover_data=['filing_year'],
                        title='MD&A Sentiment vs. Profit Margin'
                    )
                    fig.show()

In [None]:
# Interactive heatmap
if financial_metrics_df is not None and 'revenue' in financial_metrics_df.columns:
    # Create heatmap of revenue by company and year
    fig = create_heatmap_over_time(
        financial_metrics_df,
        date_column='filing_date',
        company_column='ticker',
        value_column='revenue',
        title='Revenue Heatmap by Company and Year'
    )
    fig.show()

In [None]:
# Interactive bar chart
if financial_metrics_df is not None:
    # Create a bar chart of profit margins by company
    if 'profit_margin' in financial_metrics_df.columns:
        # Group by company and calculate mean profit margin
        profit_margins = financial_metrics_df.groupby('ticker')['profit_margin'].mean().reset_index()
        
        fig = create_interactive_bar_chart(
            profit_margins,
            x_column='ticker',
            y_column='profit_margin',
            title='Average Profit Margin by Company'
        )
        
        # Update layout to format y-axis as percentage
        fig.update_layout(
            yaxis=dict(
                tickformat='.0%',
                title='Profit Margin'
            )
        )
        
        fig.show()

In [None]:
# Bubble chart visualization
if financial_metrics_df is not None and text_metrics_df is not None:
    # Similar to the interactive scatter plot, but using the bubble chart function
    if 'section' in text_metrics_df.columns:
        mda_sentiment = text_metrics_df[text_metrics_df['section'] == 'item_7'].copy()
        
        if not mda_sentiment.empty and 'lexicon_net_score' in mda_sentiment.columns:
            sentiment_by_company_year = mda_sentiment.groupby(['ticker', 'filing_year'])['lexicon_net_score'].mean().reset_index()
            
            if 'profit_margin' in financial_metrics_df.columns and 'revenue' in financial_metrics_df.columns:
                financial_by_company_year = financial_metrics_df.groupby(['ticker', 'filing_year'])[['revenue', 'profit_margin']].mean().reset_index()
                
                # Merge the datasets
                merged_df = pd.merge(financial_by_company_year, sentiment_by_company_year, on=['ticker', 'filing_year'])
                
                if not merged_df.empty:
                    fig = create_bubble_chart(
                        merged_df,
                        x_column='lexicon_net_score',
                        y_column='profit_margin',
                        size_column='revenue',
                        color_column='ticker',
                        text_column='filing_year',
                        title='MD&A Sentiment vs. Profit Margin (Size = Revenue)'
                    )
                    fig.show()

## Comprehensive Dashboards

Now let's create comprehensive dashboards that combine multiple visualizations.

In [None]:
# Financial performance dashboard
if financial_metrics_df is not None:
    # Select metrics to include
    metrics_to_include = ['revenue', 'net_income', 'operating_income', 'profit_margin']
    available_metrics = [m for m in metrics_to_include if m in financial_metrics_df.columns]
    
    if available_metrics and len(available_metrics) >= 2:
        # Create a figure with subplots
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=[
                'Revenue Over Time',
                'Net Income Over Time',
                'Operating Income by Company',
                'Profit Margin by Company'
            ],
            vertical_spacing=0.1,
            horizontal_spacing=0.1
        )
        
        # Add revenue time series
        if 'revenue' in available_metrics:
            for ticker in companies:
                company_data = financial_metrics_df[financial_metrics_df['ticker'] == ticker].sort_values('filing_date')
                fig.add_trace(
                    go.Scatter(
                        x=company_data['filing_date'],
                        y=company_data['revenue'],
                        mode='lines+markers',
                        name=ticker,
                        legendgroup=ticker
                    ),
                    row=1, col=1
                )
        
        # Add net income time series
        if 'net_income' in available_metrics:
            for ticker in companies:
                company_data = financial_metrics_df[financial_metrics_df['ticker'] == ticker].sort_values('filing_date')
                fig.add_trace(
                    go.Scatter(
                        x=company_data['filing_date'],
                        y=company_data['net_income'],
                        mode='lines+markers',
                        name=ticker,
                        legendgroup=ticker,
                        showlegend=False
                    ),
                    row=1, col=2
                )
        
        # Add operating income bar chart
        if 'operating_income' in available_metrics:
            # Calculate average operating income by company
            operating_income_by_company = financial_metrics_df.groupby('ticker')['operating_income'].mean().reset_index()
            fig.add_trace(
                go.Bar(
                    x=operating_income_by_company['ticker'],
                    y=operating_income_by_company['operating_income'],
                    name='Operating Income',
                    marker_color='rgb(55, 83, 109)'
                ),
                row=2, col=1
            )
        
        # Add profit margin bar chart
        if 'profit_margin' in available_metrics:
            # Calculate average profit margin by company
            profit_margin_by_company = financial_metrics_df.groupby('ticker')['profit_margin'].mean().reset_index()
            fig.add_trace(
                go.Bar(
                    x=profit_margin_by_company['ticker'],
                    y=profit_margin_by_company['profit_margin'],
                    name='Profit Margin',
                    marker_color='rgb(26, 118, 255)'
                ),
                row=2, col=2
            )
        
        # Update layout
        fig.update_layout(
            title='Financial Performance Dashboard',
            height=800,
            width=1200,
            showlegend=True,
            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
        )
        
        # Format y-axes
        fig.update_yaxes(title_text='Revenue ($)', row=1, col=1, tickprefix='$', tickformat=',.0f')
        fig.update_yaxes(title_text='Net Income ($)', row=1, col=2, tickprefix='$', tickformat=',.0f')
        fig.update_yaxes(title_text='Operating Income ($)', row=2, col=1, tickprefix='$', tickformat=',.0f')
        fig.update_yaxes(title_text='Profit Margin (%)', row=2, col=2, tickformat='.0%')
        
        fig.show()

In [None]:
# Sentiment analysis dashboard
if text_metrics_df is not None and 'section' in text_metrics_df.columns:
    # Filter to key sections
    sections_to_include = ['item_1a', 'item_7']  # Risk Factors and MD&A
    section_names = {'item_1a': 'Risk Factors', 'item_7': 'MD&A'}
    
    section_data = text_metrics_df[text_metrics_df['section'].isin(sections_to_include)].copy()
    
    if not section_data.empty and 'lexicon_net_score' in section_data.columns:
        # Create a figure with subplots
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=[
                f'{section_names["item_1a"]} Sentiment Over Time',
                f'{section_names["item_7"]} Sentiment Over Time',
                'Sentiment Comparison by Section',
                'Positive vs. Negative Score by Company'
            ],
            vertical_spacing=0.1,
            horizontal_spacing=0.1
        )
        
        # Add Risk Factors sentiment time series
        risk_sentiment = section_data[section_data['section'] == 'item_1a']
        for ticker in companies:
            company_data = risk_sentiment[risk_sentiment['ticker'] == ticker].sort_values('filing_date')
            if not company_data.empty:
                fig.add_trace(
                    go.Scatter(
                        x=company_data['filing_date'],
                        y=company_data['lexicon_net_score'],
                        mode='lines+markers',
                        name=ticker,
                        legendgroup=ticker
                    ),
                    row=1, col=1
                )
        
        # Add MD&A sentiment time series
        mda_sentiment = section_data[section_data['section'] == 'item_7']
        for ticker in companies:
            company_data = mda_sentiment[mda_sentiment['ticker'] == ticker].sort_values('filing_date')
            if not company_data.empty:
                fig.add_trace(
                    go.Scatter(
                        x=company_data['filing_date'],
                        y=company_data['lexicon_net_score'],
                        mode='lines+markers',
                        name=ticker,
                        legendgroup=ticker,
                        showlegend=False
                    ),
                    row=1, col=2
                )
        
        # Add sentiment comparison by section
        sentiment_by_section = section_data.groupby(['ticker', 'section'])['lexicon_net_score'].mean().reset_index()
        for section in sections_to_include:
            section_sentiment = sentiment_by_section[sentiment_by_section['section'] == section]
            fig.add_trace(
                go.Bar(
                    x=section_sentiment['ticker'],
                    y=section_sentiment['lexicon_net_score'],
                    name=section_names.get(section, section)
                ),
                row=2, col=1
            )
        
        # Add positive vs. negative scores by company
        if 'lexicon_positive_score' in section_data.columns and 'lexicon_negative_score' in section_data.columns:
            # Calculate average scores by company
            pos_neg_by_company = section_data.groupby('ticker')[
                ['lexicon_positive_score', 'lexicon_negative_score']
            ].mean().reset_index()
            
            # Add positive scores
            fig.add_trace(
                go.Bar(
                    x=pos_neg_by_company['ticker'],
                    y=pos_neg_by_company['lexicon_positive_score'],
                    name='Positive Score',
                    marker_color='green'
                ),
                row=2, col=2
            )
            
            # Add negative scores
            fig.add_trace(
                go.Bar(
                    x=pos_neg_by_company['ticker'],
                    y=pos_neg_by_company['lexicon_negative_score'],
                    name='Negative Score',
                    marker_color='red'
                ),
                row=2, col=2
            )
        
        # Update layout
        fig.update_layout(
            title='Sentiment Analysis Dashboard',
            height=800,
            width=1200,
            showlegend=True,
            legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
        )
        
        # Add horizontal reference lines at y=0 for sentiment plots
        fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=0, 
                      xref="paper", yref="y1", line=dict(color="gray", width=1, dash="dash"))
        fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=0, 
                      xref="paper", yref="y2", line=dict(color="gray", width=1, dash="dash"))
        fig.add_shape(type="line", x0=0, y0=0, x1=1, y1=0, 
                      xref="paper", yref="y3", line=dict(color="gray", width=1, dash="dash"))
        
        fig.show()

In [None]:
# Integrated financial and sentiment dashboard
if financial_metrics_df is not None and text_metrics_df is not None:
    # Check if we have the necessary data
    if 'section' in text_metrics_df.columns and 'lexicon_net_score' in text_metrics_df.columns:
        # Filter to MD&A section
        mda_sentiment = text_metrics_df[text_metrics_df['section'] == 'item_7'].copy()
        
        # Group by company and year
        sentiment_by_company_year = mda_sentiment.groupby(['ticker', 'filing_year'])['lexicon_net_score'].mean().reset_index()
        
        # Check if we have financial metrics
        financial_metrics = ['revenue', 'net_income', 'profit_margin']
        available_metrics = [m for m in financial_metrics if m in financial_metrics_df.columns]
        
        if available_metrics:
            # Group by company and year
            financial_by_company_year = financial_metrics_df.groupby(['ticker', 'filing_year'])[available_metrics].mean().reset_index()
            
            # Merge the datasets
            merged_df = pd.merge(financial_by_company_year, sentiment_by_company_year, on=['ticker', 'filing_year'])
            
            if not merged_df.empty:
                # Create integrated dashboard
                fig = create_interactive_dashboard(
                    financial_df=merged_df,
                    sentiment_df=merged_df  # We've already merged the data
                )
                fig.show()

## Save Visualizations

Let's save some of our visualizations for use in reports or presentations.

In [None]:
# Create output directory if it doesn't exist
output_dir = '../output/visualizations'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"Created output directory: {output_dir}")

# Save static visualizations
def save_static_visualizations():
    saved_files = []
    
    # Revenue time series
    if financial_metrics_df is not None and 'revenue' in financial_metrics_df.columns:
        fig, ax = plot_time_series(
            financial_metrics_df,
            date_column='filing_date',
            value_column='revenue',
            company_column='ticker',
            title='Revenue Over Time by Company'
        )
        ax.yaxis.set_major_formatter(FuncFormatter(billions_formatter))
        plt.tight_layout()
        filename = os.path.join(output_dir, 'revenue_time_series.png')
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
        saved_files.append(filename)
    
    # Sentiment analysis
    if text_metrics_df is not None and 'lexicon_net_score' in text_metrics_df.columns:
        risk_factors_sentiment = text_metrics_df[text_metrics_df['section'] == 'item_1a'].copy()
        if not risk_factors_sentiment.empty:
            fig, ax = plot_sentiment_analysis(
                risk_factors_sentiment,
                date_column='filing_date',
                sentiment_column='lexicon_net_score',
                company_column='ticker',
                title='Risk Factors Sentiment Over Time by Company'
            )
            plt.tight_layout()
            filename = os.path.join(output_dir, 'risk_factors_sentiment.png')
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            plt.close()
            saved_files.append(filename)
    
    # Correlation heatmap
    if financial_metrics_df is not None:
        num_cols = financial_metrics_df.select_dtypes(include=[np.number]).columns.tolist()
        num_cols = [col for col in num_cols if col not in ['filing_year']]
        if num_cols:
            fig, ax = plot_correlation_heatmap(
                financial_metrics_df,
                columns=num_cols,
                title='Correlation Heatmap of Financial Metrics'
            )
            plt.tight_layout()
            filename = os.path.join(output_dir, 'correlation_heatmap.png')
            plt.savefig(filename, dpi=300, bbox_inches='tight')
            plt.close()
            saved_files.append(filename)
    
    return saved_files

# Save interactive visualizations
def save_interactive_visualizations():
    saved_files = []
    
    # Interactive time series
    if financial_metrics_df is not None:
        metrics_to_plot = ['revenue', 'net_income', 'operating_income']
        available_metrics = [m for m in metrics_to_plot if m in financial_metrics_df.columns]
        if available_metrics and 'filing_date' in financial_metrics_df.columns:
            fig = create_interactive_time_series(
                financial_metrics_df,
                date_column='filing_date',
                value_columns=available_metrics,
                company_column='ticker',
                title='Interactive Financial Metrics Over Time'
            )
            filename = os.path.join(output_dir, 'interactive_financial_metrics.html')
            fig.write_html(filename)
            saved_files.append(filename)
    
    # Financial dashboard
    if financial_metrics_df is not None:
        metrics_to_include = ['revenue', 'net_income', 'operating_income', 'profit_margin']
        available_metrics = [m for m in metrics_to_include if m in financial_metrics_df.columns]
        if available_metrics and len(available_metrics) >= 2:
            # Create the dashboard (using code from earlier cell)
            fig = make_subplots(
                rows=2, cols=2,
                subplot_titles=[
                    'Revenue Over Time',
                    'Net Income Over Time',
                    'Operating Income by Company',
                    'Profit Margin by Company'
                ],
                vertical_spacing=0.1,
                horizontal_spacing=0.1
            )
            
            # Add revenue time series
            if 'revenue' in available_metrics:
                for ticker in companies:
                    company_data = financial_metrics_df[financial_metrics_df['ticker'] == ticker].sort_values('filing_date')
                    fig.add_trace(
                        go.Scatter(
                            x=company_data['filing_date'],
                            y=company_data['revenue'],
                            mode='lines+markers',
                            name=ticker,
                            legendgroup=ticker
                        ),
                        row=1, col=1
                    )
            
            # Add net income time series
            if 'net_income' in available_metrics:
                for ticker in companies:
                    company_data = financial_metrics_df[financial_metrics_df['ticker'] == ticker].sort_values('filing_date')
                    fig.add_trace(
                        go.Scatter(
                            x=company_data['filing_date'],
                            y=company_data['net_income'],
                            mode='lines+markers',
                            name=ticker,
                            legendgroup=ticker,
                            showlegend=False
                        ),
                        row=1, col=2
                    )
            
            # Add operating income bar chart
            if 'operating_income' in available_metrics:
                operating_income_by_company = financial_metrics_df.groupby('ticker')['operating_income'].mean().reset_index()
                fig.add_trace(
                    go.Bar(
                        x=operating_income_by_company['ticker'],
                        y=operating_income_by_company['operating_income'],
                        name='Operating Income',
                        marker_color='rgb(55, 83, 109)'
                    ),
                    row=2, col=1
                )
            
            # Add profit margin bar chart
            if 'profit_margin' in available_metrics:
                profit_margin_by_company = financial_metrics_df.groupby('ticker')['profit_margin'].mean().reset_index()
                fig.add_trace(
                    go.Bar(
                        x=profit_margin_by_company['ticker'],
                        y=profit_margin_by_company['profit_margin'],
                        name='Profit Margin',
                        marker_color='rgb(26, 118, 255)'
                    ),
                    row=2, col=2
                )
            
            # Update layout
            fig.update_layout(
                title='Financial Performance Dashboard',
                height=800,
                width=1200,
                showlegend=True,
                legend=dict(orientation='h', yanchor='bottom', y=1.02, xanchor='right', x=1)
            )
            
            # Format y-axes
            fig.update_yaxes(title_text='Revenue ($)', row=1, col=1, tickprefix='$', tickformat=',.0f')
            fig.update_yaxes(title_text='Net Income ($)', row=1, col=2, tickprefix='$', tickformat=',.0f')
            fig.update_yaxes(title_text='Operating Income ($)', row=2, col=1, tickprefix='$', tickformat=',.0f')
            fig.update_yaxes(title_text='Profit Margin (%)', row=2, col=2, tickformat='.0%')
            
            filename = os.path.join(output_dir, 'financial_dashboard.html')
            fig.write_html(filename)
            saved_files.append(filename)
    
    return saved_files

# Save visualizations
static_files = save_static_visualizations()
interactive_files = save_interactive_visualizations()

print(f"Saved {len(static_files)} static visualizations:")
for filename in static_files:
    print(f"- {os.path.basename(filename)}")

print(f"\nSaved {len(interactive_files)} interactive visualizations:")
for filename in interactive_files:
    print(f"- {os.path.basename(filename)}")

## Summary and Conclusion

In this notebook, we've demonstrated a wide range of visualization techniques available in the 10-K Analysis Toolkit, including:

1. **Basic visualizations** using matplotlib and seaborn:
   - Time series plots of financial metrics
   - Sentiment analysis plots
   - Comparative metrics visualizations
   - Word clouds
   - Correlation heatmaps

2. **Interactive visualizations** using Plotly:
   - Interactive time series
   - Interactive scatter plots
   - Heatmaps
   - Bar charts
   - Bubble charts

3. **Comprehensive dashboards** that combine multiple visualizations:
   - Financial performance dashboard
   - Sentiment analysis dashboard
   - Integrated financial and sentiment dashboard

These visualizations provide valuable insights into the financial performance and narrative content of 10-K filings, allowing analysts to:

- Track financial metrics over time
- Compare performance across companies
- Analyze sentiment trends in Risk Factors and MD&A sections
- Explore the relationship between sentiment and financial performance
- Create compelling visualizations for reports and presentations

The 10-K Analysis Toolkit provides a comprehensive set of tools for extracting, analyzing, and visualizing data from SEC 10-K filings, enabling deep insights into corporate financial disclosures.