In [4]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'

    # --- Step 1: Load the Risk Score Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        print("Please run the 'text_based_risk_model.py' script first.")
        exit()

    print("Data loaded successfully.")

    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    # Create a 'month' column for grouping
    df['month'] = df['date'].dt.to_period('M')
    
    # For each month, sort stocks into 10 deciles based on the risk_score
    # q=10 creates 10 groups. labels=False gives us numbers 0-9.
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    # Calculate the average monthly return for each portfolio decile
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1 # Display as 1-10

    # Create the bar chart
    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    # Add labels to the bars
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', 
                    xytext=(0, 9), 
                    textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Visual 2: Long-Short Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Long-Short Strategy Equity Curve ---")

    # The original decile labels are 0-9. Portfolio 1 is decile 0.
    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()
    # Portfolio 10 is decile 9.
    short_returns = df[df['portfolio_decile'] == 9].groupby('month')['stock_ret'].mean()

    # The strategy return is the return of the longs minus the return of the shorts
    strategy_returns = long_returns - short_returns
    
    # Calculate the cumulative performance (equity curve)
    cumulative_returns = (1 + strategy_returns).cumprod()

    # Create the line chart
    fig, ax = plt.subplots(figsize=(12, 7))
    # Convert PeriodIndex to DatetimeIndex for plotting
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5)
    
    ax.set_title('Cumulative Performance of Long-Short Strategy', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log') # Log scale is standard for long-term performance charts
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_short_performance.png', dpi=300)
    print("Saved 'long_short_performance.png'")
    plt.close()
    
    # --- NEW: Visual 4: Long-Short Strategy Drawdown Plot ---
    print("\n--- Generating Visual 4: Strategy Drawdown Plot ---")

    # Calculate the running maximum of the cumulative returns (the high water mark)
    running_max = cumulative_returns.cummax()
    # Calculate the drawdown as the percentage drop from the peak
    drawdown = (cumulative_returns - running_max) / running_max

    # Create the line chart for drawdowns
    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Long-Short Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_short_drawdown.png', dpi=300)
    print("Saved 'long_short_drawdown.png'")
    plt.close()


    # --- Visual 3: Risk Score Distribution ---
    print("\n--- Generating Visual 3: Risk Score Distribution Histogram ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)

    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    print("\nAll visualizations have been generated and saved.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Generating Visual 2: Long-Short Strategy Equity Curve ---
Saved 'long_short_performance.png'

--- Generating Visual 4: Strategy Drawdown Plot ---
Saved 'long_short_drawdown.png'

--- Generating Visual 3: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

All visualizations have been generated and saved.


In [7]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'
    market_data_path = r'C:\_Files\Personal\Projects\FIAM\FIAM2025\data\mkt_ind.csv'

    # --- Step 1: Load the Risk Score and Market Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        exit()

    print("Data loaded successfully.")
    
    # --- Load Market Data for S&P 500 Benchmark ---
    try:
        df_market = pd.read_csv(market_data_path)
        
        # --- FIX: Construct the date from 'year' and 'month' columns ---
        if 'year' in df_market.columns and 'month' in df_market.columns:
            df_market['date'] = pd.to_datetime(df_market['year'].astype(str) + '-' + df_market['month'].astype(str) + '-01')
        else:
            raise KeyError("Market data CSV must contain 'year' and 'month' columns.")
            
        # --- FIX: Handle different possible names for the market return column ---
        if 'sprtrn' not in df_market.columns and 'ret' in df_market.columns:
            df_market.rename(columns={'ret': 'sprtrn'}, inplace=True)

    except FileNotFoundError:
        print(f"Error: Market data file not found at '{market_data_path}'")
        df_market = None
    except KeyError as e:
        print(f"Error processing market data file: {e}")
        df_market = None


    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    df['month'] = df['date'].dt.to_period('M')
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Visual 2: Long-Short Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Long-Short Strategy Equity Curve ---")

    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()
    short_returns = df[df['portfolio_decile'] == 9].groupby('month')['stock_ret'].mean()
    strategy_returns = long_returns - short_returns
    cumulative_returns = (1 + strategy_returns).cumprod()

    fig, ax = plt.subplots(figsize=(12, 7))
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5, label='Long-Short Strategy')
    
    # --- Add S&P 500 Benchmark to the Plot ---
    if df_market is not None:
        df_market['month'] = df_market['date'].dt.to_period('M')
        sp500_returns = df_market.set_index('month')['sprtrn']
        # Align the benchmark with our strategy's time period
        sp500_returns = sp500_returns.reindex(strategy_returns.index)
        sp500_cumulative = (1 + sp500_returns).cumprod()
        sp500_cumulative.index = sp500_cumulative.index.to_timestamp()
        sp500_cumulative.plot(ax=ax, linewidth=2, linestyle='--', color='gray', label='S&P 500 (Long Only)')
        ax.legend()


    ax.set_title('Cumulative Performance vs. S&P 500 Benchmark', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_short_performance_vs_benchmark.png', dpi=300)
    print("Saved 'long_short_performance_vs_benchmark.png'")
    plt.close()
    
    # --- Visual 3: Long-Short Strategy Drawdown Plot ---
    print("\n--- Generating Visual 3: Strategy Drawdown Plot ---")

    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns - running_max) / running_max

    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Long-Short Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_short_drawdown.png', dpi=300)
    print("Saved 'long_short_drawdown.png'")
    plt.close()


    # --- Visual 4: Risk Score Distribution ---
    print("\n--- Generating Visual 4: Risk Score Distribution Histogram ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)

    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    print("\nAll visualizations have been generated and saved.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Generating Visual 2: Long-Short Strategy Equity Curve ---
Saved 'long_short_performance_vs_benchmark.png'

--- Generating Visual 3: Strategy Drawdown Plot ---
Saved 'long_short_drawdown.png'

--- Generating Visual 4: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

All visualizations have been generated and saved.


In [9]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'
    market_data_path = r'C:\_Files\Personal\Projects\FIAM\FIAM2025\data\mkt_ind.csv'

    # --- Step 1: Load the Risk Score and Market Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        exit()

    print("Data loaded successfully.")
    
    # --- Load Market Data for S&P 500 Benchmark ---
    try:
        df_market = pd.read_csv(market_data_path)
        
        # --- FIX: Construct the date from 'year' and 'month' columns ---
        if 'year' in df_market.columns and 'month' in df_market.columns:
            df_market['date'] = pd.to_datetime(df_market['year'].astype(str) + '-' + df_market['month'].astype(str) + '-01')
        else:
            raise KeyError("Market data CSV must contain 'year' and 'month' columns.")
            
        # --- FIX: Handle different possible names for the market return column ---
        if 'sprtrn' not in df_market.columns and 'ret' in df_market.columns:
            df_market.rename(columns={'ret': 'sprtrn'}, inplace=True)

    except FileNotFoundError:
        print(f"Error: Market data file not found at '{market_data_path}'")
        df_market = None
    except KeyError as e:
        print(f"Error processing market data file: {e}")
        df_market = None


    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    df['month'] = df['date'].dt.to_period('M')
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Visual 2: Net-Long Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Net-Long Strategy Equity Curve ---")

    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()
    short_returns = df[df['portfolio_decile'] == 9].groupby('month')['stock_ret'].mean()

    # --- UPDATED: Calculate returns for a 100/50 Net-Long strategy ---
    # We are 100% long the low-risk portfolio and 50% short the high-risk portfolio.
    # This gives the strategy a persistent 50% net-long market exposure.
    strategy_returns = long_returns - (0.5 * short_returns)
    
    cumulative_returns = (1 + strategy_returns).cumprod()

    fig, ax = plt.subplots(figsize=(12, 7))
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5, label='Net-Long (100/50) Strategy')
    
    # --- Add S&P 500 Benchmark to the Plot ---
    if df_market is not None:
        df_market['month'] = df_market['date'].dt.to_period('M')
        sp500_returns = df_market.set_index('month')['sprtrn']
        # Align the benchmark with our strategy's time period
        sp500_returns = sp500_returns.reindex(strategy_returns.index)
        sp500_cumulative = (1 + sp500_returns).cumprod()
        sp500_cumulative.index = sp500_cumulative.index.to_timestamp()
        sp500_cumulative.plot(ax=ax, linewidth=2, linestyle='--', color='gray', label='S&P 500 (Long Only)')
        ax.legend()

    ax.set_title('Cumulative Performance of Net-Long (100/50) Strategy vs. Benchmark', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('net_long_performance_vs_benchmark.png', dpi=300)
    print("Saved 'net_long_performance_vs_benchmark.png'")
    plt.close()
    
    # --- Visual 3: Net-Long Strategy Drawdown Plot ---
    print("\n--- Generating Visual 3: Strategy Drawdown Plot ---")

    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns - running_max) / running_max

    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Net-Long Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('net_long_drawdown.png', dpi=300)
    print("Saved 'net_long_drawdown.png'")
    plt.close()


    # --- Visual 4: Risk Score Distribution ---
    print("\n--- Generating Visual 4: Risk Score Distribution Histogram ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)

    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    print("\nAll visualizations have been generated and saved.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Generating Visual 2: Net-Long Strategy Equity Curve ---
Saved 'net_long_performance_vs_benchmark.png'

--- Generating Visual 3: Strategy Drawdown Plot ---
Saved 'net_long_drawdown.png'

--- Generating Visual 4: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

All visualizations have been generated and saved.


In [10]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# --- NEW: Import for linear regression to calculate Alpha and Beta ---
from sklearn.linear_model import LinearRegression

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'
    market_data_path = r'C:\_Files\Personal\Projects\FIAM\FIAM2025\data\mkt_ind.csv'

    # --- Step 1: Load the Risk Score and Market Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        exit()

    print("Data loaded successfully.")
    
    # --- Load Market Data for S&P 500 Benchmark ---
    try:
        df_market = pd.read_csv(market_data_path)
        
        if 'year' in df_market.columns and 'month' in df_market.columns:
            df_market['date'] = pd.to_datetime(df_market['year'].astype(str) + '-' + df_market['month'].astype(str) + '-01')
        else:
            raise KeyError("Market data CSV must contain 'year' and 'month' columns.")
            
        if 'sprtrn' not in df_market.columns and 'ret' in df_market.columns:
            df_market.rename(columns={'ret': 'sprtrn'}, inplace=True)

    except FileNotFoundError:
        print(f"Error: Market data file not found at '{market_data_path}'")
        df_market = None
    except KeyError as e:
        print(f"Error processing market data file: {e}")
        df_market = None


    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    df['month'] = df['date'].dt.to_period('M')
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Strategy Performance Calculation ---
    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()
    short_returns = df[df['portfolio_decile'] == 9].groupby('month')['stock_ret'].mean()
    strategy_returns = long_returns - (0.5 * short_returns)
    cumulative_returns = (1 + strategy_returns).cumprod()

    # --- Visual 2: Net-Long Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Net-Long Strategy Equity Curve ---")
    fig, ax = plt.subplots(figsize=(12, 7))
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5, label='Net-Long (100/50) Strategy')
    
    if df_market is not None:
        df_market['month'] = df_market['date'].dt.to_period('M')
        sp500_returns = df_market.set_index('month')['sprtrn']
        sp500_returns = sp500_returns.reindex(strategy_returns.index)
        sp500_cumulative = (1 + sp500_returns).cumprod()
        sp500_cumulative.index = sp500_cumulative.index.to_timestamp()
        sp500_cumulative.plot(ax=ax, linewidth=2, linestyle='--', color='gray', label='S&P 500 (Long Only)')
        ax.legend()

    ax.set_title('Cumulative Performance of Net-Long (100/50) Strategy vs. Benchmark', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.tight_layout()
    plt.savefig('net_long_performance_vs_benchmark.png', dpi=300)
    print("Saved 'net_long_performance_vs_benchmark.png'")
    plt.close()
    
    # --- Visual 3: Net-Long Strategy Drawdown Plot ---
    print("\n--- Generating Visual 3: Strategy Drawdown Plot ---")
    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns - running_max) / running_max
    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Net-Long Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.tight_layout()
    plt.savefig('net_long_drawdown.png', dpi=300)
    print("Saved 'net_long_drawdown.png'")
    plt.close()

    # --- Visual 4: Risk Score Distribution ---
    print("\n--- Generating Visual 4: Risk Score Distribution Histogram ---")
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)
    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    # --- NEW: Final Performance Statistics Calculation ---
    print("\n--- Calculating Final Performance Metrics ---")
    
    # Prepare data for calculations
    rf_rate = df_market.set_index('month')['rf'].reindex(strategy_returns.index)
    
    # Annualized Returns
    annualized_return = strategy_returns.mean() * 12
    
    # Annualized Volatility (Standard Deviation)
    annualized_volatility = strategy_returns.std() * np.sqrt(12)
    
    # Sharpe Ratio
    sharpe_ratio = (annualized_return - rf_rate.mean() * 12) / annualized_volatility
    
    # Max Drawdown
    max_drawdown = drawdown.min()
    
    # Alpha and Beta
    excess_strategy_returns = strategy_returns - rf_rate
    excess_market_returns = sp500_returns - rf_rate
    # Drop NaNs that may have been introduced by reindexing
    regression_data = pd.DataFrame({'strategy': excess_strategy_returns, 'market': excess_market_returns}).dropna()
    
    X = regression_data['market'].values.reshape(-1, 1)
    y = regression_data['strategy'].values
    
    reg = LinearRegression().fit(X, y)
    beta = reg.coef_[0]
    alpha_monthly = reg.intercept_
    alpha_annualized = alpha_monthly * 12

    # Create summary string
    summary = f"""
======================================================================
           Strategy Performance Summary (Out-of-Sample)
======================================================================
Annualized Return:              {annualized_return:.2%}
Annualized Volatility:          {annualized_volatility:.2%}
Sharpe Ratio:                   {sharpe_ratio:.2f}
Alpha (Annualized):             {alpha_annualized:.2%}
Beta:                           {beta:.2f}
Maximum Drawdown:               {max_drawdown:.2%}
======================================================================
    """
    
    print(summary)
    
    # Save summary to a text file
    with open('performance_summary.txt', 'w') as f:
        f.write(summary)
        
    print("Saved 'performance_summary.txt'")
    print("\nAll visualizations and performance summary have been generated.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Generating Visual 2: Net-Long Strategy Equity Curve ---
Saved 'net_long_performance_vs_benchmark.png'

--- Generating Visual 3: Strategy Drawdown Plot ---
Saved 'net_long_drawdown.png'

--- Generating Visual 4: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

--- Calculating Final Performance Metrics ---

           Strategy Performance Summary (Out-of-Sample)
Annualized Return:              12.80%
Annualized Volatility:          18.76%
Sharpe Ratio:                   0.59
Alpha (Annualized):             5.73%
Beta:                           0.53
Maximum Drawdown:               -34.97%
    
Saved 'performance_summary.txt'

All visualizations and performance summary have been generated.


In [4]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'
    market_data_path = r'C:\_Files\Personal\Projects\FIAM\FIAM2025\data\mkt_ind.csv'

    # --- Step 1: Load the Risk Score and Market Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        exit()

    print("Data loaded successfully.")
    
    # --- Load Market Data for S&P 500 Benchmark ---
    try:
        df_market = pd.read_csv(market_data_path)
        
        # --- FIX: Construct the date from 'year' and 'month' columns ---
        if 'year' in df_market.columns and 'month' in df_market.columns:
            df_market['date'] = pd.to_datetime(df_market['year'].astype(str) + '-' + df_market['month'].astype(str) + '-01')
        else:
            raise KeyError("Market data CSV must contain 'year' and 'month' columns.")
            
        # --- FIX: Handle different possible names for the market return column ---
        if 'sprtrn' not in df_market.columns and 'ret' in df_market.columns:
            df_market.rename(columns={'ret': 'sprtrn'}, inplace=True)

    except FileNotFoundError:
        print(f"Error: Market data file not found at '{market_data_path}'")
        df_market = None
    except KeyError as e:
        print(f"Error processing market data file: {e}")
        df_market = None


    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    df['month'] = df['date'].dt.to_period('M')
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Strategy Calculation ---
    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()
    short_returns = df[df['portfolio_decile'] == 9].groupby('month')['stock_ret'].mean()

    # --- UPDATED: Calculate returns for a 100/25 Net-Long strategy ---
    # We are 100% long the low-risk portfolio and 25% short the high-risk portfolio.
    # This gives the strategy a persistent 75% net-long market exposure.
    strategy_returns = long_returns - (0.25 * short_returns)
    cumulative_returns = (1 + strategy_returns).cumprod()
    
    # --- Align Benchmark Data ---
    sp500_returns = None
    if df_market is not None:
        df_market['month'] = df_market['date'].dt.to_period('M')
        sp500_returns = df_market.set_index('month')['sprtrn']
        sp500_returns = sp500_returns.reindex(strategy_returns.index).dropna()
        strategy_returns = strategy_returns.reindex(sp500_returns.index)


    # --- Performance Summary Calculation ---
    print("\n--- Calculating Performance Summary ---")
    
    # Calculate Drawdown
    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns - running_max) / running_max
    max_drawdown = drawdown.min()
    
    # Calculate Key Metrics
    total_return = cumulative_returns.iloc[-1] - 1
    annualized_return = (np.mean(1 + strategy_returns) ** 12) - 1
    annualized_volatility = np.std(strategy_returns) * np.sqrt(12)
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else 0
    
    # Calculate Alpha and Beta if benchmark is available
    annualized_alpha = None
    beta = None
    if sp500_returns is not None:
        # Alpha
        alpha_series = strategy_returns - sp500_returns
        annualized_alpha = (np.mean(1 + alpha_series) ** 12) - 1
        
        # Beta
        covariance = np.cov(strategy_returns, sp500_returns)[0, 1]
        variance = np.var(sp500_returns)
        beta = covariance / variance if variance != 0 else None

    
    # --- Print Performance Summary ---
    print("--- Net-Long (100/25) Strategy Performance Summary ---")
    print("-" * 50)
    print(f"Total Cumulative Return: {total_return:>18.2%}")
    print(f"Annualized Return: {annualized_return:>24.2%}")
    if annualized_alpha is not None:
        print(f"Annualized Alpha vs S&P 500: {annualized_alpha:>13.2%}")
    if beta is not None:
        print(f"Beta vs S&P 500: {beta:>26.2f}")
    print(f"Annualized Volatility: {annualized_volatility:>20.2%}")
    print(f"Sharpe Ratio: {sharpe_ratio:>29.2f}")
    print(f"Maximum Drawdown: {max_drawdown:>25.2%}")
    print("-" * 50)


    # --- Visual 2: Net-Long Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Net-Long Strategy Equity Curve ---")
    
    fig, ax = plt.subplots(figsize=(12, 7))
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5, label='Net-Long (100/25) Strategy')
    
    # --- Add S&P 500 Benchmark to the Plot ---
    if sp500_returns is not None:
        sp500_cumulative = (1 + sp500_returns).cumprod()
        sp500_cumulative.index = sp500_cumulative.index.to_timestamp()
        sp500_cumulative.plot(ax=ax, linewidth=2, linestyle='--', color='gray', label='S&P 500 (Long Only)')
        ax.legend()

    ax.set_title('Cumulative Performance of Net-Long (100/25) Strategy vs. Benchmark', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('net_long_performance_vs_benchmark.png', dpi=300)
    print("Saved 'net_long_performance_vs_benchmark.png'")
    plt.close()
    
    # --- Visual 3: Net-Long Strategy Drawdown Plot ---
    print("\n--- Generating Visual 3: Strategy Drawdown Plot ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Net-Long Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('net_long_drawdown.png', dpi=300)
    print("Saved 'net_long_drawdown.png'")
    plt.close()


    # --- Visual 4: Risk Score Distribution ---
    print("\n--- Generating Visual 4: Risk Score Distribution Histogram ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)

    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    print("\nAll visualizations have been generated and saved.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Calculating Performance Summary ---
--- Net-Long (100/25) Strategy Performance Summary ---
--------------------------------------------------
Total Cumulative Return:            323.54%
Annualized Return:                   16.66%
Annualized Alpha vs S&P 500:         3.86%
Beta vs S&P 500:                       0.82
Annualized Volatility:               18.40%
Sharpe Ratio:                          0.91
Maximum Drawdown:                   -23.85%
--------------------------------------------------

--- Generating Visual 2: Net-Long Strategy Equity Curve ---
Saved 'net_long_performance_vs_benchmark.png'

--- Generating Visual 3: Strategy Drawdown Plot ---
Saved 'net_long_drawdown.png'

--- Generating Visual 4: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

All visualizations have been generated and saved.


In [5]:
# This script loads the generated risk scores and creates visualizations
# to evaluate the performance of the text-based risk model.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# --- Main Execution Block ---
if __name__ == "__main__":
    # Define file paths
    risk_scores_path = 'text_based_risk_scores.csv'
    market_data_path = r'C:\_Files\Personal\Projects\FIAM\FIAM2025\data\mkt_ind.csv'

    # --- Step 1: Load the Risk Score and Market Data ---
    print(f"--- Loading risk scores from '{risk_scores_path}' ---")
    try:
        df = pd.read_csv(risk_scores_path)
        df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    except FileNotFoundError:
        print(f"Error: Risk scores file not found at '{risk_scores_path}'")
        exit()

    print("Data loaded successfully.")
    
    # --- Load Market Data for S&P 500 Benchmark ---
    try:
        df_market = pd.read_csv(market_data_path)
        
        # --- FIX: Construct the date from 'year' and 'month' columns ---
        if 'year' in df_market.columns and 'month' in df_market.columns:
            df_market['date'] = pd.to_datetime(df_market['year'].astype(str) + '-' + df_market['month'].astype(str) + '-01')
        else:
            raise KeyError("Market data CSV must contain 'year' and 'month' columns.")
            
        # --- FIX: Handle different possible names for the market return column ---
        if 'sprtrn' not in df_market.columns and 'ret' in df_market.columns:
            df_market.rename(columns={'ret': 'sprtrn'}, inplace=True)

    except FileNotFoundError:
        print(f"Error: Market data file not found at '{market_data_path}'")
        df_market = None
    except KeyError as e:
        print(f"Error processing market data file: {e}")
        df_market = None


    # --- Visual 1: Portfolio Sort Analysis ---
    print("\n--- Generating Visual 1: Portfolio Sort Bar Chart ---")
    
    df['month'] = df['date'].dt.to_period('M')
    df['portfolio_decile'] = df.groupby('month')['risk_score'].transform(
        lambda x: pd.qcut(x, 10, labels=False, duplicates='drop')
    )
    
    portfolio_returns = df.groupby('portfolio_decile')['stock_ret'].mean().reset_index()
    portfolio_returns['portfolio_decile'] = portfolio_returns['portfolio_decile'] + 1

    plt.style.use('seaborn-v0_8-whitegrid')
    fig, ax = plt.subplots(figsize=(12, 7))
    sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')
    
    ax.set_title('Average Monthly Return by Text-Based Risk Score Decile', fontsize=16, fontweight='bold')
    ax.set_xlabel('Portfolio Decile (1 = Lowest Risk Score, 10 = Highest Risk Score)', fontsize=12)
    ax.set_ylabel('Average Monthly Stock Return', fontsize=12)
    ax.axhline(0, color='black', linewidth=0.8)
    
    for p in ax.patches:
        ax.annotate(f"{p.get_height():.2%}", 
                    (p.get_x() + p.get_width() / 2., p.get_height()), 
                    ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    plt.tight_layout()
    plt.savefig('portfolio_sort_returns.png', dpi=300)
    print("Saved 'portfolio_sort_returns.png'")
    plt.close()

    # --- Strategy Calculation ---
    long_returns = df[df['portfolio_decile'] == 0].groupby('month')['stock_ret'].mean()

    # --- UPDATED: Calculate returns for a Long-Only strategy ---
    # We are 100% long the low-risk portfolio (decile 1).
    strategy_returns = long_returns
    cumulative_returns = (1 + strategy_returns).cumprod()
    
    # --- Align Benchmark Data ---
    sp500_returns = None
    if df_market is not None:
        df_market['month'] = df_market['date'].dt.to_period('M')
        sp500_returns = df_market.set_index('month')['sprtrn']
        sp500_returns = sp500_returns.reindex(strategy_returns.index).dropna()
        strategy_returns = strategy_returns.reindex(sp500_returns.index)


    # --- Performance Summary Calculation ---
    print("\n--- Calculating Performance Summary ---")
    
    # Calculate Drawdown
    running_max = cumulative_returns.cummax()
    drawdown = (cumulative_returns - running_max) / running_max
    max_drawdown = drawdown.min()
    
    # Calculate Key Metrics
    total_return = cumulative_returns.iloc[-1] - 1
    annualized_return = (np.mean(1 + strategy_returns) ** 12) - 1
    annualized_volatility = np.std(strategy_returns) * np.sqrt(12)
    sharpe_ratio = annualized_return / annualized_volatility if annualized_volatility != 0 else 0
    
    # Calculate Alpha and Beta if benchmark is available
    annualized_alpha = None
    beta = None
    if sp500_returns is not None:
        # Alpha
        alpha_series = strategy_returns - sp500_returns
        annualized_alpha = (np.mean(1 + alpha_series) ** 12) - 1
        
        # Beta
        covariance = np.cov(strategy_returns, sp500_returns)[0, 1]
        variance = np.var(sp500_returns)
        beta = covariance / variance if variance != 0 else None

    
    # --- Print Performance Summary ---
    print("--- Long-Only Strategy Performance Summary ---")
    print("-" * 50)
    print(f"Total Cumulative Return: {total_return:>18.2%}")
    print(f"Annualized Return: {annualized_return:>24.2%}")
    if annualized_alpha is not None:
        print(f"Annualized Alpha vs S&P 500: {annualized_alpha:>13.2%}")
    if beta is not None:
        print(f"Beta vs S&P 500: {beta:>26.2f}")
    print(f"Annualized Volatility: {annualized_volatility:>20.2%}")
    print(f"Sharpe Ratio: {sharpe_ratio:>29.2f}")
    print(f"Maximum Drawdown: {max_drawdown:>25.2%}")
    print("-" * 50)


    # --- Visual 2: Net-Long Strategy Cumulative Performance ---
    print("\n--- Generating Visual 2: Long-Only Strategy Equity Curve ---")
    
    fig, ax = plt.subplots(figsize=(12, 7))
    cumulative_returns.index = cumulative_returns.index.to_timestamp()
    cumulative_returns.plot(ax=ax, linewidth=2.5, label='Long-Only Strategy')
    
    # --- Add S&P 500 Benchmark to the Plot ---
    if sp500_returns is not None:
        sp500_cumulative = (1 + sp500_returns).cumprod()
        sp500_cumulative.index = sp500_cumulative.index.to_timestamp()
        sp500_cumulative.plot(ax=ax, linewidth=2, linestyle='--', color='gray', label='S&P 500 (Long Only)')
        ax.legend()

    ax.set_title('Cumulative Performance of Long-Only Strategy vs. Benchmark', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Cumulative Growth (Log Scale)', fontsize=12)
    ax.set_yscale('log')
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_only_performance_vs_benchmark.png', dpi=300)
    print("Saved 'long_only_performance_vs_benchmark.png'")
    plt.close()
    
    # --- Visual 3: Net-Long Strategy Drawdown Plot ---
    print("\n--- Generating Visual 3: Strategy Drawdown Plot ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    drawdown.plot(ax=ax, kind='area', color='red', alpha=0.3)
    ax.set_title('Long-Only Strategy Drawdown', fontsize=16, fontweight='bold')
    ax.set_xlabel('Date', fontsize=12)
    ax.set_ylabel('Percentage Drawdown from Peak', fontsize=12)
    ax.yaxis.set_major_formatter(plt.FuncFormatter('{:.0%}'.format))
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)

    plt.tight_layout()
    plt.savefig('long_only_drawdown.png', dpi=300)
    print("Saved 'long_only_drawdown.png'")
    plt.close()


    # --- Visual 4: Risk Score Distribution ---
    print("\n--- Generating Visual 4: Risk Score Distribution Histogram ---")

    fig, ax = plt.subplots(figsize=(12, 7))
    sns.histplot(df['risk_score'], kde=True, ax=ax, bins=50)
    
    ax.set_title('Distribution of Text-Based Risk Scores', fontsize=16, fontweight='bold')
    ax.set_xlabel('Predicted Risk Score', fontsize=12)
    ax.set_ylabel('Frequency', fontsize=12)

    plt.tight_layout()
    plt.savefig('risk_score_distribution.png', dpi=300)
    print("Saved 'risk_score_distribution.png'")
    plt.close()
    
    print("\nAll visualizations have been generated and saved.")



--- Loading risk scores from 'text_based_risk_scores.csv' ---
Data loaded successfully.

--- Generating Visual 1: Portfolio Sort Bar Chart ---



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='portfolio_decile', y='stock_ret', data=portfolio_returns, ax=ax, palette='viridis')


Saved 'portfolio_sort_returns.png'

--- Calculating Performance Summary ---
--- Long-Only Strategy Performance Summary ---
--------------------------------------------------
Total Cumulative Return:            425.83%
Annualized Return:                   19.81%
Annualized Alpha vs S&P 500:         6.70%
Beta vs S&P 500:                       1.11
Annualized Volatility:               21.57%
Sharpe Ratio:                          0.92
Maximum Drawdown:                   -30.05%
--------------------------------------------------

--- Generating Visual 2: Long-Only Strategy Equity Curve ---
Saved 'long_only_performance_vs_benchmark.png'

--- Generating Visual 3: Strategy Drawdown Plot ---
Saved 'long_only_drawdown.png'

--- Generating Visual 4: Risk Score Distribution Histogram ---
Saved 'risk_score_distribution.png'

All visualizations have been generated and saved.
