In [1]:
import pandas as pd
import yfinance as yf
import statsmodels.api as sm
from statsmodels.tsa.stattools import grangercausalitytests, adfuller
from datetime import datetime, timedelta
from statsmodels.tsa.api import VAR 
import warnings
warnings.filterwarnings("ignore")

def get_max_diff(timeseries):
    diff_count = 0
    while True:
        result = adfuller(timeseries.dropna())
        p_value = result[1]
        print(f"Attempt {diff_count}: ADF Statistic = {result[0]}, p-value = {p_value}")
        if p_value <= 0.05:
            print("Series is now stationary.")
            return diff_count
        timeseries = timeseries.diff().dropna()
        diff_count += 1
    return diff_count

def diff_n_times(series_a, series_b, n):
    for _ in range(n):
        series_a = series_a.diff().dropna()
        series_b = series_b.diff().dropna()
    return series_a, series_b

def get_optimal_lag(data):
    max_lag = min(len(data) // 2, 15)
    aic_values = []
    for lag in range(1, max_lag + 1):
        model = VAR(data)
        results = model.fit(lag)
        aic_values.append(results.aic)
    return aic_values.index(min(aic_values)) + 1

# Master list to collect all results
all_results = []

def run_granger_analysis(day_range):
    print(f"\n{'='*30} Running for last {day_range} days {'='*30}")
    
    groupA = ["AAPL", "GOOG", "MSFT", "NVDA", "META", "AMZN", "TSLA"]
    groupB = ["BBAI", "SOUN", "QUBT", "OKLO", "RKLB", "AI", "AKAM"]
    stocks = groupA + groupB
    
    end_date = datetime(2025,4,28)
    start_date = end_date - timedelta(days=day_range)
    
    data = yf.download(stocks, start=start_date, end=end_date, interval="1h")["High"].reset_index(drop=True)
    
    max_diff_per_stock = {}
    for stock in stocks:
        print(f"\nChecking maximum differencing for {stock}...")
        max_diff_per_stock[stock] = get_max_diff(data[stock])
    
    for stock_a in groupA:
        for stock_b in groupB:
            print(f"\nGranger Causality test between {stock_a} and {stock_b}:")

            series_a, series_b = diff_n_times(
                data[stock_a], data[stock_b],
                max(max_diff_per_stock[stock_a], max_diff_per_stock[stock_b])
            )

            if len(series_a) == 0 or len(series_b) == 0:
                print(f"Skipping {stock_a} vs {stock_b} due to empty series.")
                continue

            # Test A causes B
            diff_data_ab = pd.DataFrame({stock_b: series_b, stock_a: series_a}).dropna()
            optimal_lag = get_optimal_lag(diff_data_ab)
            granger_result_ab = grangercausalitytests(diff_data_ab, [optimal_lag], verbose=False)
            p_ab = granger_result_ab[optimal_lag][0]['ssr_ftest'][1]

            # Test B causes A
            diff_data_ba = pd.DataFrame({stock_a: series_a, stock_b: series_b}).dropna()
            optimal_lag = get_optimal_lag(diff_data_ba)
            granger_result_ba = grangercausalitytests(diff_data_ba, [optimal_lag], verbose=False)
            p_ba = granger_result_ba[optimal_lag][0]['ssr_ftest'][1]

            # Append results to global list
            all_results.append({
                "Days": day_range,
                "Stock_A": stock_a,
                "Stock_B": stock_b,
                "Optimal_Lag": optimal_lag,
                "P_Value_A_to_B": round(p_ab, 4),
                "Causal_A_to_B": "Yes" if p_ab < 0.05 else "No",
                "P_Value_B_to_A": round(p_ba, 4),
                "Causal_B_to_A": "Yes" if p_ba < 0.05 else "No"
            })

# Run the analysis for multiple day ranges
day_ranges = [30, 60, 90, 180, 360, 720]
for days in day_ranges:
    run_granger_analysis(days)

# Save all results into one CSV file
all_results_df = pd.DataFrame(all_results)
all_results_df.to_csv("all_granger_results.csv", index=False)
print("\n✅ All results saved to 'all_granger_results.csv'")



YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -1.781085832464729, p-value = 0.3898916565525684
Attempt 1: ADF Statistic = -11.285198800761595, p-value = 1.4237333661747784e-20
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -2.033082025526113, p-value = 0.27225874829682783
Attempt 1: ADF Statistic = -11.522610784509283, p-value = 4.0061198075340634e-21
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -1.610575319667589, p-value = 0.47767362460909585
Attempt 1: ADF Statistic = -4.598574011444424, p-value = 0.00012995363157943391
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -2.2457887215645993, p-value = 0.19004826726600754
Attempt 1: ADF Statistic = -4.728316550943215, p-value = 7.442787968633953e-05
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -2.120904391466736, p-val

[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -1.7584499262832298, p-value = 0.40127639305411744
Attempt 1: ADF Statistic = -8.082441977532183, p-value = 1.4492484150162045e-12
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -1.889682537183095, p-value = 0.3369302288738906
Attempt 1: ADF Statistic = -15.859822387771999, p-value = 9.2398284531186e-29
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -2.9212134442335116, p-value = 0.04293619065926928
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -2.2720200036925777, p-value = 0.18115090484977975
Attempt 1: ADF Statistic = -6.68419682833227, p-value = 4.266835115138362e-09
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -1.987627094686228, p-value = 0.29202439876972763
Attempt 1: ADF Statistic = -6.496881710353983, p-value = 1

[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -1.2367929780790652, p-value = 0.6575715103692713
Attempt 1: ADF Statistic = -9.756962888935547, p-value = 7.743405640931824e-17
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -1.6764081883350759, p-value = 0.44330683564024426
Attempt 1: ADF Statistic = -9.571589998168262, p-value = 2.2805991919228824e-16
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -2.116600776192431, p-value = 0.23789195125730683
Attempt 1: ADF Statistic = -6.558165635351132, p-value = 8.512569487166703e-09
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -1.4662691173564784, p-value = 0.550126681998717
Attempt 1: ADF Statistic = -21.266489573183165, p-value = 0.0
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -0.9421392391092874, p-value = 0.7737789265032

[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -1.3472499115563519, p-value = 0.607287978517753
Attempt 1: ADF Statistic = -10.982665282594972, p-value = 7.391277946969822e-20
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -1.182916038379399, p-value = 0.6809012902364383
Attempt 1: ADF Statistic = -9.957974651844253, p-value = 2.4156464576909826e-17
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -1.5699809537566356, p-value = 0.49865319695974114
Attempt 1: ADF Statistic = -26.57090149225406, p-value = 0.0
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -1.459581936137465, p-value = 0.553402497930744
Attempt 1: ADF Statistic = -19.78575720929928, p-value = 0.0
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -1.6147664886408373, p-value = 0.4754740713284508
Attempt 1: ADF St

[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -2.538093740748357, p-value = 0.10647862384304785
Attempt 1: ADF Statistic = -15.121264221190218, p-value = 7.45429958302826e-28
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -2.019115045230883, p-value = 0.278255217228779
Attempt 1: ADF Statistic = -10.101324196628946, p-value = 1.0576117587994333e-17
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -1.9868693922780587, p-value = 0.2923599237321233
Attempt 1: ADF Statistic = -14.962754047122548, p-value = 1.2326188699322497e-27
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -2.8642931963347484, p-value = 0.049660561207900224
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -1.792460935925799, p-value = 0.3842110014021131
Attempt 1: ADF Statistic = -8.104682523056551, p-value =

[*********************100%***********************]  14 of 14 completed



Checking maximum differencing for AAPL...
Attempt 0: ADF Statistic = -1.763671162206232, p-value = 0.39864119478488486
Attempt 1: ADF Statistic = -10.916477391039296, p-value = 1.0643618528107436e-19
Series is now stationary.

Checking maximum differencing for GOOG...
Attempt 0: ADF Statistic = -1.8197774597857923, p-value = 0.3706878774426035
Attempt 1: ADF Statistic = -11.910132026727554, p-value = 5.320679057119158e-22
Series is now stationary.

Checking maximum differencing for MSFT...
Attempt 0: ADF Statistic = -2.0535817466837276, p-value = 0.26358430479915895
Attempt 1: ADF Statistic = -11.037512227019866, p-value = 5.46991755169715e-20
Series is now stationary.

Checking maximum differencing for NVDA...
Attempt 0: ADF Statistic = -1.5259270139952639, p-value = 0.5206488634053015
Attempt 1: ADF Statistic = -11.43445804159633, p-value = 6.3982123836785115e-21
Series is now stationary.

Checking maximum differencing for META...
Attempt 0: ADF Statistic = -1.6167646163743206, p-va