In [22]:
import pandas as pd
import numpy as np
from numpy import ma, atleast_2d, pi, sqrt, sum, transpose


def get_entropy(df):
    hist, bins = np.histogram(df.dropna().values, bins=2, density=True)
    pdf = hist/hist.sum()
    ## log base 2 returns H(X) in bits
    return -np.sum( pdf * ma.log2(pdf).filled(0)) 


def non_linear_transfer_entropy(df: pd.DataFrame, endog: str, exog: str, lags=1):
    df = df[[exog, endog]]

    for col_name in list(df.columns):
        for t in range(1, lags + 1):
            df[col_name + '_lag' + str(t)] = df[col_name].shift(t)

    ## Initialise list to return TEs
    entropy = [0, 0]

    ## bidirectional
    for i, (X, Y) in enumerate({exog:endog, endog:exog}.items()):

        ### Entropy calculated using Probability Density Estimation:
            # Following: https://stat.ethz.ch/education/semesters/SS_2006/CompStat/sk-ch2.pdf
            # Also: https://www.cs.cmu.edu/~aarti/Class/10704_Spring15/lecs/lec5.pdf

        ## Note Lagged Terms
        X_lagged = X + '_lag' + str(lags)
        Y_lagged = Y + '_lag' + str(lags)

        ### Estimate PDF using Gaussian Kernels and use H(x) = p(x) log p(x)

        ## 1. H(Y, Y-t, X-t)  
        H1 = get_entropy(df[[Y, Y_lagged, X_lagged]])
        
        ## 2. H(Y-t, X-t)
        H2 = get_entropy(df[[X_lagged, Y_lagged]]) 
        ## 3. H(Y, Y-t)  
        H3 = get_entropy(df[[Y, Y_lagged]])
        ## 4. H(Y-t)  
        H4 = get_entropy(df[[Y_lagged]])                

        print(H1, H2, H3, H4)
        ### Calculate Conditonal Entropy using: H(Y|X-t,Y-t) = H(Y,X-t,Y-t) - H(X-t,Y-t)
        conditional_entropy_joint =  H1 - H2

        ### And Conditional Entropy independent of X(t) H(Y|Y-t) = H(Y,Y-t) - H(Y-t)            
        conditional_entropy_independent = H3 - H4

        ### Directional Transfer Entropy is the difference between the conditional entropies
        entropy[i] =  conditional_entropy_independent - conditional_entropy_joint
        
    return entropy

df = pd.read_csv("data/results_day_binned_with_states.csv")
endog = 'Anti-Regulation Fear-of-Regulation'
exog = 'Daily Background Checks'
df = df[[exog, endog]]

print(non_linear_transfer_entropy(df, endog, exog, lags = 2))

0.8645801663882439 0.985909442028652 0.12682573008667578 0.15935006268563445
0.9956608874301811 0.985909442028652 0.9276824136249767 0.8976844934141643
[0.08880494304144951, 0.02024647480928332]


In [24]:
variables = [
    'Pro-Regulation',
    'Anti-Regulation',
    'Self-Defense',
    'Fear-of-Regulation',
    'Pro-Regulation Self-Defense',
    'Anti-Regulation Self-defense',
    'Pro-Regulation Fear-of-Regulation',
    'Anti-Regulation Fear-of-Regulation',
    'Daily Background Checks'
]

df = pd.read_csv("data/results_day_binned_with_states.csv")

for endog in variables:
    for exog in variables:
        if endog == exog:
            continue
        te = non_linear_transfer_entropy(df, endog, exog, lags = 1)
        print(te)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col_name + '_lag' + str(t)] = df[col_name].shift(t)


0.11432167035215779 0.12565805017256743 0.12565805017256726 0.15790637433488172
0.11432167035215779 0.12565805017256743 0.12565805017256743 0.15790637433488186
[-0.020911944341904826, -0.020911944341904798]
0.13668132042261708 0.15790637433488186 0.12565805017256726 0.15790637433488172
0.13668132042261708 0.15790637433488186 0.12565805017256743 0.15790637433488186
[-0.011023270250049677, -0.01102327025004965]
0.13668132042261708 0.15790637433488172 0.12565805017256726 0.15790637433488172
0.13668132042261708 0.15790637433488172 0.12565805017256743 0.15790637433488172
[-0.011023270250049816, -0.01102327025004965]
0.11432167035215779 0.12565805017256726 0.12565805017256726 0.15790637433488172
0.11432167035215779 0.12565805017256726 0.12565805017256726 0.15790637433488172
[-0.020911944341904992, -0.020911944341904992]
0.09054385326964312 0.09054385326964312 0.12565805017256726 0.15790637433488172
0.06486635476598611 0.09054385326964312 0.12565805017256743 0.15790637433488172
[-0.0322483241