# Momentum-Based Signal Validation



In [130]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
import yfinance as yf

In [131]:
# Statistical libraries
from scipy import stats
from scipy.stats import jarque_bera, shapiro, normaltest
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
np.random.seed(42)

In [132]:
data = yf.download("AMZN AAPL GOOG",start="2020-01-01", end="2023-01-01").head()
missing_values = data.isnull().sum().sum()
print(missing_values)


  data = yf.download("AMZN AAPL GOOG",start="2020-01-01", end="2023-01-01").head()
[*********************100%***********************]  3 of 3 completed

0





*** Data Collection ***

In [133]:

def get_stock_data(tickers, start_date = '2020-01-01', end_date = '2024-07-01'):
    stock_data = {}
    for ticker in tickers:
        data = yf.download(ticker, start= start_date, end = end_date)

        if len(data) > 0:
            stock_data[ticker] = data
        else:
            print("No data found")

    return stock_data

def quality_check(data, ticker):
    # Check quality of data for one stock
    # data = data.xs(ticker, axis=1, level=1)
    total_days = len(data)
    missing_values = data.isnull().sum().sum()

    zero_volume_days = (data['Volume'] == 0).sum()

    daily_returns = data['Close'].pct_change()
    extreme_moves = (abs(daily_returns) > 0.2).sum()

    print(f"{ticker}:")
    print(f"Total observations: {total_days}")
    print(f"Missing values: {missing_values}")
    print(f"Zero volume days: {zero_volume_days}")
    print(f"Extreme moves (>20%): {extreme_moves}")

    return {
        'total_days': total_days,
        'missing_values': missing_values,
        'zero_volume': zero_volume_days,
        'extreme_moves': extreme_moves
    }


def analyze_returns_stats(data, ticker):
    print("Return Statistics for " + ticker)
    returns = data['Close'].pct_change().dropna()

    # basic stats

    mean_return = float(returns.mean())
    volatility = float(returns.std())
    annual_return = mean_return * 252
    annual_vol = volatility * np.sqrt(252)


    print(f"Daily return: {mean_return:.4f} ({annual_return:.2%} annualized)")
    print(f"Daily volatility: {volatility:.4f} ({annual_vol:.2%} annualized)")
    print(f"Skewness: {float(returns.skew()):.3f}")
    print(f"Kurtosis: {float(returns.kurtosis()):.3f}")

    
    #Test for normality
    jb_stat, jb_pvalue = jarque_bera(returns)

    print(f"*** Normality Test ***")
    print(f"Jarque Bera p-value: {jb_pvalue:.6f}")
    if jb_pvalue >= 0.05:
        print("Returns are normal")
    else:
        print('Returns are NOT normal')

    return returns



    




*** Feature Engineering, Creating Momentum Signals ***
- Main Idea: stocks that have been going up, might continue going up

In [None]:
def create_momentum_features(data):
    df = data.copy()
    #one day returns
    df['returns_1d'] = df['Close'].pct_change()

    #momentum periods of one week, 2 weeks, 1 month, 3 months
    momentum_periods = [5, 10, 20, 60]

    for period in momentum_periods:
        df[f'momentum_{period}d'] = df['Close'].pct_change(period)

    # moving averages
    ma_periods = [10, 20, 50]
    for period in ma_periods:
        df[f'sma_{period}d'] = df['Close'].pct_change(period).mean()
        df[f'above_sma_{period}'] =  (df['Close'] > df[f'sma_{period}']).astype(int)
        print(period + "--day moving average signal")


In [134]:
tickers = ['AAPL', 'TSLA', 'GOOG', 'NVDA', 'MSFT']

stock_data = get_stock_data(tickers)

main_ticker = list(stock_data.keys())[3]

data = stock_data[main_ticker]

quality_report = quality_check(data, main_ticker)
    
returns = analyze_returns_stats(data, main_ticker)
    

  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed
  data = yf.download(ticker, start= start_date, end = end_date)
[*********************100%***********************]  1 of 1 completed

NVDA:
Total observations: 1130
Missing values: 0
Zero volume days: Ticker
NVDA    0
dtype: int64
Extreme moves (>20%): Ticker
NVDA    1
dtype: int64
Return Statistics for NVDA
Daily return: 0.0033 (82.17% annualized)
Daily volatility: 0.0341 (54.07% annualized)
Skewness: 0.446
Kurtosis: 4.267
*** Normality Test ***
Jarque Bera p-value: 0.000000
Returns are NOT normal



  mean_return = float(returns.mean())
  volatility = float(returns.std())
  print(f"Skewness: {float(returns.skew()):.3f}")
  print(f"Kurtosis: {float(returns.kurtosis()):.3f}")
