In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

In [2]:
train_start = "2021-01-01"
train_end = "2023-12-31"
test_start = "2024-01-01"
test_end = "2024-12-31"
forecast_start = "2025-01-01"
current_date = datetime.today().strftime('%Y-%m-%d')

In [3]:
tickers = ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'NVDA']
print(f"Selected tickers for demo: {tickers}")

Selected tickers for demo: ['AAPL', 'MSFT', 'AMZN', 'GOOGL', 'NVDA']


# Define function to fetch stock data

In [4]:
def fetch_stock_data(ticker, start, end):
    """
    Fetch historical stock data from Yahoo Finance.
    Args:
        ticker (str): Stock symbol
        start (str): Start date
        end (str): End date
    Returns:
        pd.DataFrame: Data with Date, Ticker, Open, High, Low, Close, Volume
    """
    try:
        df = yf.download(ticker, start=start, end=end, progress=False, auto_adjust=True)
        if df.empty:
            print(f"No data found for {ticker} in {start} to {end}")
            return None
        df = df.reset_index()
        df['Ticker'] = ticker
        df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
        return df[['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume']]
    except Exception as e:
        print(f"Error for {ticker}: {e}")
        return None

# Fetch data for all periods

In [None]:
train_data = []
test_data = []
forecast_data = []

for ticker in tickers:
    # Training data (2021-2023)
    df_train = fetch_stock_data(ticker, train_start, train_end)
    if df_train is not None:
        train_data.append(df_train)
    
    # Testing data (2024)
    df_test = fetch_stock_data(ticker, test_start, test_end)
    if df_test is not None:
        test_data.append(df_test)
    
    # Forecast data (2025 up to today)
    df_forecast = fetch_stock_data(ticker, forecast_start, current_date)
    if df_forecast is not None:
        forecast_data.append(df_forecast)

# Concatenate all data
df_train_all = pd.concat(train_data, ignore_index=True) if train_data else pd.DataFrame()
df_test_all = pd.concat(test_data, ignore_index=True) if test_data else pd.DataFrame()
df_forecast_all = pd.concat(forecast_data, ignore_index=True) if forecast_data else pd.DataFrame()

# Save to CSV files
df_train_all.to_csv("stock_data_train.csv", index=False)
df_test_all.to_csv("stock_data_test.csv", index=False)
df_forecast_all.to_csv("stock_data_forecast.csv", index=False)
print("Data saved to stock_data_train.csv, stock_data_test.csv, stock_data_forecast.csv")

# Calculate financial metrics

In [None]:
def calculate_metrics(df, risk_free_rate=0.02):  # 2% annual risk-free rate
    """
    Calculate volatility, Sharpe Ratio, VaR, and Expected Return.
    Args:
        df (pd.DataFrame): DataFrame with 'Close' and 'Date' columns
        risk_free_rate (float): Annual risk-free rate
    Returns:
        dict: Metrics for the stock
    """
    # Daily returns
    df['Returns'] = df['Close'].pct_change().dropna()
    
    # Volatility (annualized standard deviation of daily returns)
    volatility = df['Returns'].std() * np.sqrt(252) * 100  # Annualized in percentage
    
    # Sharpe Ratio (annualized return - risk-free rate) / volatility
    annual_return = (df['Close'][-1] / df['Close'][0]) ** (252 / len(df)) - 1
    sharpe_ratio = (annual_return - risk_free_rate) / volatility if volatility != 0 else 0
    
    # Value at Risk (VaR) at 95% confidence (assuming normal distribution)
    var_95 = np.percentile(df['Returns'].dropna(), 5) * np.sqrt(21)  # 21 trading days in a month
    var_95_dollar = var_95 * df['Close'].iloc[-1]  # In dollar terms
    
    # Expected Return for April 2025 (simple historical average monthly return)
    monthly_return = df['Returns'].mean() * 21  # Approximate monthly return
    expected_return = monthly_return * 100  # In percentage
    
    return {
        'Volatility (%)': volatility,
        'Sharpe Ratio': sharpe_ratio,
        'VaR (95%) ($)': var_95_dollar,
        'Expected Return (%)': expected_return
    }

# Calculate metrics for each stock using training data
metrics_results = {}
for ticker in tickers:
    df_ticker = df_train_all[df_train_all['Ticker'] == ticker].copy()
    if not df_ticker.empty:
        metrics = calculate_metrics(df_ticker)
        metrics_results[ticker] = metrics
    else:
        print(f"No data for {ticker} to calculate metrics")

# Display metrics table
print("\nTable of Calculations:")
print("Metric\t\tPurpose\t\t\tExample Result")
print("Volatility\tRisk level\t\t{:.2f}%".format(metrics_results.get('AAPL', {'Volatility (%)': 0})['Volatility (%)']))
print("Sharpe Ratio\tRisk-adjusted return\t{:.2f}".format(metrics_results.get('AAPL', {'Sharpe Ratio': 0})['Sharpe Ratio']))
print("VaR (95%)\tMax loss\t\t${:.2f}".format(metrics_results.get('AAPL', {'VaR (95%) ($)': 0})['VaR (95%) ($)']))
print("Expected Return\tForecasted gain\t\t{:.2f}%".format(metrics_results.get('AAPL', {'Expected Return (%)': 0})['Expected Return (%)']))

# Prepare data for LSTM prediction