# 05 Significance Tests

In [6]:
%load_ext autoreload
%autoreload 2
import sys

sys.path.append("../")

import jupyter_black

jupyter_black.load()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Wilcoxon signed-rank 

In [11]:
import pandas as pd
import numpy as np
from scipy.stats import wilcoxon
from src.utils import inplace_normalise_df, inplace_zscore_df

## Get the results

In [8]:
df_data_granger = pd.read_csv("../data/results/granger_causality_p_values.csv")
df_data_granger.rename(columns={"Lag": "Lag Granger"}, inplace=True)

df_data_te = pd.read_csv("../data/results/transfer_entropy_bits.csv")
df_data_te.rename(columns={"Lag": "Lag TE"}, inplace=True)

df_data_corr = pd.read_csv("../data/results/correlation.csv")
df_data_corr.rename(columns={"Unnamed: 0": "index"}, inplace=True)

## Transfer Entropy vs Pearson's Correlation

In [12]:
df_corr_all_combinations = pd.melt(
    df_data_corr, id_vars="index", var_name="to_column", value_name="Correlation"
)
df_corr_all_combinations.columns = ["from_column", "to_column", "Correlation"]

df_te_corr = df_data_te.merge(
    df_corr_all_combinations, on=["from_column", "to_column"], how="inner"
)
df_te_corr["Correlation"] = np.abs(df_te_corr["Correlation"])
inplace_normalise_df(df_te_corr, column="TE")
inplace_normalise_df(df_te_corr, column="Correlation")
df_te_corr

Unnamed: 0,Lag TE,TE,from_column,to_column,Correlation,TE_normalized,Correlation_normalized
0,111,4.092433,dew_point_temp_c,real_hum_pct,0.139494,1.0,0.145081
1,111,4.075979,temp_c,real_hum_pct,0.220182,0.995495,0.232046
2,128,3.815822,press_kpa,real_hum_pct,0.231424,0.924268,0.244162
3,137,3.617468,temp_c,press_kpa,0.236389,0.869962,0.249514
4,121,3.560052,dew_point_temp_c,press_kpa,0.320616,0.854242,0.340291
5,162,2.788516,temp_c,dew_point_temp_c,0.932714,0.643007,1.0
6,110,2.592789,wind_speed_kmh,real_hum_pct,0.092743,0.589419,0.094694
7,155,2.400088,wind_speed_kmh,temp_c,0.061876,0.536661,0.061426
8,166,2.373941,wind_speed_kmh,dew_point_temp_c,0.095685,0.529502,0.097865
9,137,2.075443,wind_speed_kmh,press_kpa,0.356613,0.447778,0.379089


In [10]:
# Perform the Wilcoxon signed-rank test
statistic, p_value = wilcoxon(df_te_corr["TE"], df_te_corr["Correlation"])

# Display the test results
print(f"Wilcoxon statistic: {statistic}")
print(f"P-value: {p_value}")

Wilcoxon statistic: 0.0
P-value: 6.103515625e-05


In [13]:
# Perform the Wilcoxon signed-rank test
statistic, p_value = wilcoxon(df_te_corr["TE_normalized"], df_te_corr["Correlation_normalized"])

# Display the test results
print(f"Wilcoxon statistic: {statistic}")
print(f"P-value: {p_value}")

Wilcoxon statistic: 15.0
P-value: 0.018566712279734197




If the p-value is smaller than a chosen significance level (commonly 0.05), we can reject the null hypothesis and conclude that there is a significant difference between Pearsons' Correlation and Transfer Entropy Results.



## Transfer Entropy vs Granger Causality

In [14]:
df_te_granger = df_data_te.merge(df_data_granger, on=["from_column", "to_column"], how="inner")
inplace_normalise_df(df_te_granger, column="TE")
inplace_normalise_df(df_te_granger, column="GrangerCausality")
df_te_granger

Unnamed: 0,Lag TE,TE,from_column,to_column,Lag Granger,GrangerCausality,TE_normalized,GrangerCausality_normalized
0,111,4.075979,temp_c,real_hum_pct,64,0.0,1.0,0.0
1,137,3.617468,temp_c,press_kpa,15,0.0,0.873898,0.0
2,121,3.560052,dew_point_temp_c,press_kpa,33,0.0,0.858108,0.0
3,162,2.788516,temp_c,dew_point_temp_c,0,0.0,0.645916,0.0
4,110,2.592789,wind_speed_kmh,real_hum_pct,30,0.0,0.592087,0.0
5,137,2.075443,wind_speed_kmh,press_kpa,46,0.0,0.449804,0.0
6,161,1.329664,visibility_km,dew_point_temp_c,64,0.0,0.244696,0.0
7,157,1.283395,visibility_km,temp_c,70,0.0,0.231971,0.0
8,105,1.225943,visibility_km,press_kpa,166,0.0,0.216171,0.0
9,14,0.439938,visibility_km,wind_speed_kmh,10,2e-20,0.0,1.0


In [15]:
# missing relations
print(len(df_data_te), len(df_data_granger), len(df_te_granger))

15 15 10


There are 5 different causality relationships.

In [8]:
# Perform the Wilcoxon signed-rank test
statistic, p_value = wilcoxon(df_te_granger['TE'], df_te_granger['GrangerCausality'])

# Display the test results
print(f"Wilcoxon statistic: {statistic}")
print(f"P-value: {p_value}")


Wilcoxon statistic: 0.0
P-value: 0.001953125


In [17]:
# Perform the Wilcoxon signed-rank test
statistic, p_value = wilcoxon(
    df_te_granger["TE_normalized"], df_te_granger["GrangerCausality_normalized"]
)

# Display the test results
print(f"Wilcoxon statistic: {statistic}")
print(f"P-value: {p_value}")

Wilcoxon statistic: 9.5
P-value: 0.083984375


If the p-value is smaller than a chosen significance level (commonly 0.05), we can reject the null hypothesis and conclude that there is a significant difference between Granger Causality and Transfer Entropy Results.



## Lags from Transfer Entropy vs Granger Causality

In [19]:
# Perform test to compare lags from both methods
statistic, p_value = wilcoxon(df_te_granger["Lag TE"], df_te_granger["Lag Granger"])

# Display the test results
print(f"F-statistic: {statistic}")
print(f"P-value: {p_value}")

F-statistic: 3.0
P-value: 0.009765625


If the p-value is smaller than a chosen significance level (commonly 0.05), we can reject the null hypothesis and conclude that there is a significant difference between Lag reported by Granger Causality and the Lag reported by Transfer Entropy Results.

