# Momentum-Based Signal Validation



In [130]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
import yfinance as yf

In [131]:
# Statistical libraries
from scipy import stats
from scipy.stats import jarque_bera, shapiro, normaltest
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
np.random.seed(42)

In [132]:
data = yf.download("AMZN AAPL GOOG",start="2020-01-01", end="2023-01-01").head()
missing_values = data.isnull().sum().sum()
print(missing_values)


  data = yf.download("AMZN AAPL GOOG",start="2020-01-01", end="2023-01-01").head()
[*********************100%***********************]  3 of 3 completed

0





*** Data Collection ***

In [133]:

def get_stock_data(tickers, start_date = '2020-01-01', end_date = '2024-07-01'):
    stock_data = {}
    for ticker in tickers:
        data = yf.download(ticker, start= start_date, end = end_date)

        if len(data) > 0:
            stock_data[ticker] = data
        else:
            print("No data found")

    return stock_data

def quality_check(data, ticker):
    # Check quality of data for one stock
    # data = data.xs(ticker, axis=1, level=1)
    total_days = len(data)
    missing_values = data.isnull().sum().sum()

    zero_volume_days = (data['Volume'] == 0).sum()

    daily_returns = data['Close'].pct_change()
    extreme_moves = (abs(daily_returns) > 0.2).sum()

    print(f"{ticker}:")
    print(f"Total observations: {total_days}")
    print(f"Missing values: {missing_values}")
    print(f"Zero volume days: {zero_volume_days}")
    print(f"Extreme moves (>20%): {extreme_moves}")

    return {
        'total_days': total_days,
        'missing_values': missing_values,
        'zero_volume': zero_volume_days,
        'extreme_moves': extreme_moves
    }


def analyze_returns_stats(data, ticker):
    print("Return Statistics for " + ticker)
    returns = data['Close'].pct_change().dropna()

    # basic stats

    mean_return = float(returns.mean())
    volatility = float(returns.std())
    annual_return = mean_return * 252
    annual_vol = volatility * np.sqrt(252)


    print(f"Daily return: {mean_return:.4f} ({annual_return:.2%} annualized)")
    print(f"Daily volatility: {volatility:.4f} ({annual_vol:.2%} annualized)")
    print(f"Skewness: {float(returns.skew()):.3f}")
    print(f"Kurtosis: {float(returns.kurtosis()):.3f}")

    
    #Test for normality
    jb_stat, jb_pvalue = jarque_bera(returns)

    print(f"*** Normality Test ***")
    print(f"Jarque Bera p-value: {jb_pvalue:.6f}")
    if jb_pvalue >= 0.05:
        print("Returns are normal")
    else:
        print('Returns are NOT normal')

    return returns



    




*** Feature Engineering, Creating Momentum Signals ***
- Main Idea: stocks that have been going up, might continue going up

In [148]:
def create_momentum_features(data):
    df = data.copy()
    #one day returns
    df['returns_1d'] = df['Close'].pct_change()

    #momentum periods of one week, 2 weeks, 1 month, 3 months
    momentum_periods = [5, 10, 20, 60]

    for period in momentum_periods:
        df[f'momentum_{period}d'] = df['Close'].pct_change(period)

    # moving averages
    ma_periods = [10, 20, 50]
    for period in ma_periods:
        df[f'sma_{period}d'] = df['Close'].pct_change(period).mean()
        df[f'above_sma_{period}'] =  (df['Close'] > df[f'sma_{period}']).astype(int)
        print(period + "--day moving average signal")

    # volatility

    df['vol_10d'] = df['returns_1d'].rolling(10).std()
    df['vol_20d'] = df['returns_1d'].rolling(20).std()
    print("***Volatility features***")

    # RSI - Relative Strength Index - momentum oscillator
    def calculate_rsi(prices, window = 14):
        delta = prices.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window = window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window = window).mean()
        rs = gain/loss
        rsi = 100 - (100 / (1 + rs))
        return rsi

    df['rsi'] = calculate_rsi(df['Close'])
    df['rsi_oversold'] = calculate_rsi(df['Close'] < 30).astype(int)
    df['rsi_undersold'] = calculate_rsi(df['close'] > 70).astype(int)

    print("*** RSI momentum indicator ***")

    df['vol_sma_20'] = df['Volume'].rolling(20).mean()
    df['volume_ratio'] = df['Volume'] / df['volume_sma_20']
    df['high_volume'] = (df['volume_ratio'] > 1.5).astype(int)
    print("*** Volume Features ***")
    
    print(f"\n There are {len([col for col in df.columns if col not in data.columns])} new features created\n")

    return df


    


In [149]:
def create_target_variable(data, forward_days = 5):
    print("Creating target: {forward_days}-day forward return")
    target = data['Close'].pct_change(forward_days).shift(-forward_days)

    print(f"Target mean: {target.mean():.4f}")
    print(f"Target std: {target.std():.4f}")
    print(f"Valid predictions: {target.count()}\n")

In [147]:
tickers = ['AAPL', 'TSLA', 'GOOG', 'NVDA', 'MSFT']

stock_data = get_stock_data(tickers)


main_ticker = list(stock_data.keys())[3]

data = stock_data[main_ticker]
print(data)

quality_report = quality_check(data, main_ticker)
    
returns = analyze_returns_stats(data, main_ticker)


  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed

Price            Close        High         Low        Open     Volume
Ticker            NVDA        NVDA        NVDA        NVDA       NVDA
Date                                                                 
2020-01-02    5.971747    5.971747    5.892343    5.942873  237536000
2020-01-03    5.876164    5.919973    5.827127    5.852019  205384000
2020-01-06    5.900805    5.906032    5.756682    5.782819  262636000
2020-01-07    5.972246    6.018046    5.884129    5.929183  314856000
2020-01-08    5.983446    6.024766    5.927937    5.968013  277108000
...                ...         ...         ...         ...        ...
2024-06-24  118.072693  124.420686  118.002715  123.201070  476060900
2024-06-25  126.050171  126.460045  119.282313  121.161716  414192600
2024-06-26  126.360077  128.079527  122.561274  126.090158  362975900
2024-06-27  123.950844  126.370085  122.881182  124.060810  252571700
2024-06-28  123.500984  127.669665  122.711233  124.540657  315516700

[1130 rows x 5 colu


  mean_return = float(returns.mean())
  volatility = float(returns.std())
  print(f"Skewness: {float(returns.skew()):.3f}")
  print(f"Kurtosis: {float(returns.kurtosis()):.3f}")


Rolling [window=2,center=False,axis=0,method=single]
