In [1]:
# import libs
import yfinance as yf
import pandas as pd
import numpy as np
import os
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta

In [2]:
# download the data

def download_stock_data(ticker_symbol):
    """
    Download stock data with error handling and validation
    """
    try:
        # Create ticker object and get history
        yticker = yf.Ticker(ticker_symbol)
        df = yticker.history(period='max')
        
        if df.empty:
            raise ValueError(f"No data downloaded for {ticker_symbol}")
            
        print(f"Downloaded {len(df)} days of {ticker_symbol} data")
        
        # Basic validation
        required_columns = ['Open', 'Close', 'Volume']
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
            
        return df
    
    except Exception as e:
        print(f"Error downloading {ticker_symbol}: {str(e)}")
        return None

# Download QQQ data
ticker = "QQQ"
df = download_stock_data(ticker)

df.tail()

Downloaded 6511 days of QQQ data


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-01-17 00:00:00-05:00,522.849976,524.070007,513.109985,521.73999,51837900,0.0,0.0,0.0
2025-01-21 00:00:00-05:00,524.47998,525.969971,520.059998,524.799988,23628000,0.0,0.0,0.0
2025-01-22 00:00:00-05:00,529.570007,533.820007,529.26001,531.51001,33593000,0.0,0.0,0.0
2025-01-23 00:00:00-05:00,529.039978,532.76001,528.450012,532.640015,23096600,0.0,0.0,0.0
2025-01-24 00:00:00-05:00,533.02002,533.789978,528.150024,529.630005,19817900,0.0,0.0,0.0


In [3]:
# transform the data

if df is not None:
    # Add Date and Ticker columns
    df['Date'] = df.index
    df['Ticker'] = ticker
    
    # Generate features for historical prices
    df['Year'] = df.index.year
    df['Month'] = df.index.month
    df['Weekday'] = df.index.weekday
    
    # Calculate historical returns for different periods
    for i in [1, 3, 7, 30, 90, 365]:
        df[f'growth_{i}d'] = df['Close'] / df['Close'].shift(i)
    
    # Calculate future growth (3 days ahead)
    df['future_growth_3d'] = df['Close'].shift(-3) / df['Close']
    
    # Calculate 30-day rolling volatility (annualized)
    df['volatility'] = df['Close'].rolling(30).std() * np.sqrt(252)
    
    # Calculate moving averages
    df['MA20'] = df['Close'].rolling(window=20).mean()
    df['MA50'] = df['Close'].rolling(window=50).mean()
    
    # Calculate YoY growth
    df['YoY_growth'] = (df['Close'] / df['Close'].shift(252)) - 1
    
    # # Save data to file
    # os.makedirs('data', exist_ok=True)
    # df.to_csv(f'data/{ticker}_data.csv')
    # print(f"Data saved to data/{ticker}_data.csv")

In [6]:
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,Date,Ticker,...,growth_3d,growth_7d,growth_30d,growth_90d,growth_365d,future_growth_3d,volatility,MA20,MA50,YoY_growth
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-01-17 00:00:00-05:00,522.849976,524.070007,513.109985,521.73999,51837900,0.0,0.0,0.0,2025-01-17 00:00:00-05:00,QQQ,...,1.032985,1.012733,1.01105,1.151511,1.416318,1.020892,130.582992,516.589284,514.653027,0.28887
2025-01-21 00:00:00-05:00,524.47998,525.969971,520.059998,524.799988,23628000,0.0,0.0,0.0,2025-01-21 00:00:00-05:00,QQQ,...,1.015676,1.018495,1.00456,1.147658,1.412664,1.009204,131.154217,517.047359,515.320676,0.278284
2025-01-22 00:00:00-05:00,529.570007,533.820007,529.26001,531.51001,33593000,0.0,0.0,0.0,2025-01-22 00:00:00-05:00,QQQ,...,1.03592,1.04795,1.020232,1.137628,1.442995,,135.297505,517.955748,515.855555,0.269454
2025-01-23 00:00:00-05:00,529.039978,532.76001,528.450012,532.640015,23096600,0.0,0.0,0.0,2025-01-23 00:00:00-05:00,QQQ,...,1.020892,1.053564,1.013332,1.128965,1.462139,,139.127686,518.696501,516.254091,0.270494
2025-01-24 00:00:00-05:00,533.02002,533.789978,528.150024,529.630005,19817900,0.0,0.0,0.0,2025-01-24 00:00:00-05:00,QQQ,...,1.009204,1.048606,1.015514,1.117578,1.451194,,141.546103,519.034502,516.580445,0.258093


In [4]:
# visualize
# with template=presentations for larger fonts 

# Create price chart with moving averages
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df.index, y=df['MA20'], name='MA20', line=dict(color='orange')))
fig.add_trace(go.Scatter(x=df.index, y=df['MA50'], name='MA50', line=dict(color='red')))
fig.update_layout(
        title=f'{ticker} Price with Moving Averages',
        yaxis_title='Price',
        template='presentation'
    )
fig.show()
    
# Create volume chart
fig_volume = px.bar(
        df,
        x=df.index,
        y='Volume',
        title=f'{ticker} Trading Volume'
)
fig_volume.update_layout(template='presentation')
fig_volume.show()
    
# Create YoY growth chart
fig_yoy = px.bar(
        df,
        x=df.index,
        y='YoY_growth',
        title=f'{ticker} Year-over-Year Growth',
        color='YoY_growth',
        color_continuous_scale=['red', 'green']
)
fig_yoy.update_layout(
    yaxis_title='YoY Growth %',
    template='presentation'
)
fig_yoy.show()
