In [1]:
#!/usr/bin/env python3
"""
HRP Portfolio Optimizer
Author: John E. Putman II
Date: 12/13/2024
"""

# Standard library imports
import sys
import warnings
from pathlib import Path

# Configure path
notebook_dir = Path.cwd()
project_root = notebook_dir.parent
sys.path.append(str(project_root))

# Third-party imports
import matplotlib.pyplot as plt
import pandas as pd
import pandas as pd
from typing import Optional

# Suppress warnings
warnings.filterwarnings('ignore')

# Local application imports
from src.proc.data_handler import DataHandler
from src.hrp.advanced import HRPPortfolio
from src.viz.plots import plot_portfolio_analysis


In [2]:
# Environment Information
print(f"Python version: {sys.version}")
print("\nKey package versions:")
for package in ['numpy', 'pandas', 'matplotlib', 'yfinance']:
    print(f"{package}: {__import__(package).__version__}")

Python version: 3.11.2 (tags/v3.11.2:878ead1, Feb  7 2023, 16:38:35) [MSC v.1934 64 bit (AMD64)]

Key package versions:
numpy: 1.23.5
pandas: 1.5.3
matplotlib: 3.7.1
yfinance: 0.2.36


In [3]:
class PortfolioManager:
    def __init__(self, risk_free_rate: float = 0.02):
        self.data_handler = DataHandler()
        self.portfolio = HRPPortfolio(risk_free_rate=risk_free_rate)
        
    def run_portfolio_optimization(
        self,
        data_path: str,
        min_market_cap: int,
        start_date: str,
        end_date: str,
        num_clusters: int = 3,
        risk_measure: str = "variance"
    ):
        # Load and prepare data
        df = self.data_handler.load_market_data(data_path, min_market_cap)
        df = self.data_handler.calculate_quality_metrics(df)
        hrp_df = self.data_handler.prepare_hrp_data(df)
        
        # Download data using yfinance directly instead of async
        data = yf.download(
            hrp_df['ticker'].tolist(),
            start=start_date,
            end=end_date
        )['Adj Close']
        
        # Optimize portfolio
        portfolio_metrics = self.portfolio.optimize_portfolio(
            data=data,
            market_caps=hrp_df['market_cap'].values,
            num_clusters=num_clusters,
            risk_measure=risk_measure
        )
        
        results = pd.DataFrame({
            'Weight': portfolio_metrics.weights,
            'Cluster': portfolio_metrics.clusters
        })
        results['Market_Cap'] = hrp_df.set_index('ticker')['market_cap']
        
        fig = plot_portfolio_analysis(
            portfolio_metrics.returns,
            portfolio_metrics.weights,
            portfolio_metrics.linkage_matrix,
            portfolio_metrics.sharpe_ratio,
            portfolio_metrics.volatility
        )
        plt.show()
        
        return results

In [4]:
if __name__ == "__main__":
    manager = PortfolioManager(risk_free_rate=0.02)
    results = manager.run_portfolio_optimization(
        data_path='../data/sp500_data.csv',
        min_market_cap=2000,
        start_date="2019-05-01",
        end_date="2024-12-10"
    )
    
    if results is not None:
        print("\nPortfolio Summary:")
        print(results.sort_values('Weight', ascending=False).head(10))

NameError: name 'yf' is not defined