In [32]:
# import libs
import yfinance as yf
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta

In [33]:
# download the data

def download_stock_data(ticker_symbol):
    """
    Download stock data with error handling and validation
    """
    try:
        # Create ticker object and get history
        yticker = yf.Ticker(ticker_symbol)
        df = yticker.history(period='max')
        
        if df.empty:
            raise ValueError(f"No data downloaded for {ticker_symbol}")
            
        print(f"Downloaded {len(df)} days of {ticker_symbol} data")
        
        # Basic validation
        required_columns = ['Open', 'Close', 'Volume']
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
            
        return df
    
    except Exception as e:
        print(f"Error downloading {ticker_symbol}: {str(e)}")
        return None

# Download QQQ data
ticker = "QQQ"
df = download_stock_data(ticker)

df.tail()

Downloaded 6499 days of QQQ data


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2024-12-30 00:00:00-05:00,515.51001,519.359985,511.829987,515.609985,34584000,0.0,0.0,0.0
2024-12-31 00:00:00-05:00,516.900024,517.659973,510.26001,511.230011,29117000,0.0,0.0,0.0
2025-01-02 00:00:00-05:00,514.299988,516.640015,505.709991,510.230011,36389800,0.0,0.0,0.0
2025-01-03 00:00:00-05:00,513.349976,519.650024,512.530029,518.580017,29059500,0.0,0.0,0.0
2025-01-06 00:00:00-05:00,524.02002,527.919983,522.030029,524.539978,36022900,0.0,0.0,0.0


In [35]:
# transform the data

if df is not None:
    # Add Date and Ticker columns
    df['Date'] = df.index
    df['Ticker'] = ticker
    
    # Generate features for historical prices
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Weekday'] = df.index.weekday
    
    # Calculate historical returns for different periods
    for i in [1, 3, 7, 30, 90, 365]:
        df[f'growth_{i}d'] = df['Close'] / df['Close'].shift(i)
    
    # Calculate future growth (3 days ahead)
    df['future_growth_3d'] = df['Close'].shift(-3) / df['Close']
    
    # Calculate 30-day rolling volatility (annualized)
    df['volatility'] = df['Close'].rolling(30).std() * np.sqrt(252)
    
    # Calculate moving averages
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['MA50'] = df['Close'].rolling(window=50).mean()
    
    # Calculate YoY growth
    df['YoY_growth'] = (df['Close'] / df['Close'].shift(252)) - 1
    
    # # Save data to file
    # os.makedirs('data', exist_ok=True)
    # df.to_csv(f'data/{ticker}_data.csv')
    # print(f"Data saved to data/{ticker}_data.csv")

In [41]:
# visualize
# with template=presentations for larger fonts 

# Create price chart with moving averages
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df.index, y=df['MA20'], name='MA20', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=df.index, y=df['MA50'], name='MA50', line=dict(color='red')))
fig.update_layout(
        title=f'{ticker} Price with Moving Averages',
        yaxis_title='Price',
        template='presentation'
    )
fig.show()
    
# Create volume chart
fig_volume = px.bar(
        df,
        x=df.index,
        y='Volume',
        title=f'{ticker} Trading Volume'
)
fig_volume.update_layout(template='presentation')
fig_volume.show()
    
# Create YoY growth chart
fig_yoy = px.bar(
        df,
        x=df.index,
        y='YoY_growth',
        title=f'{ticker} Year-over-Year Growth',
        color='YoY_growth',
        color_continuous_scale=['red', 'green']
)
fig_yoy.update_layout(
    yaxis_title='YoY Growth %',
    template='presentation'
)
fig_yoy.show()
