# Tuebingen cause-effect pairs dataset

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import scipy.stats as st
import scipy.special as sp
import random
from sklearn.metrics import confusion_matrix

from fastkde import fastKDE

In [2]:
def estimator(x, alpha = 0.05):
    
    if(x.shape[0]%2!=0):
        x = x[:-1]
    
    estim, inf = np.split(x, 2) #split data up into two halves
    
    ## first split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = estim[:,0], list_of_points = list(inf[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = estim[:,1], list_of_points = list(inf[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x1 = -np.mean(np.log(margin_x))
    h_y1 = -np.mean(np.log(margin_y))
    
    ## second split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = inf[:,0], list_of_points = list(estim[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = inf[:,1], list_of_points = list(estim[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x2 = -np.mean(np.log(margin_x))
    h_y2 = -np.mean(np.log(margin_y))
    
    ## cross fitting
    h_x = (h_x1 + h_x2)/2
    h_y = (h_y1 + h_y2)/2
    delta = (h_x - h_y)
    
    ## variance estimation using monte carlo
    margin_x = fastKDE.pdf_at_points(var1 = x[:,0])
    margin_y = fastKDE.pdf_at_points(var1 = x[:,1])
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    covar = np.cov(np.log(margin_x), np.log(margin_y))
    
    delta_var = covar[0,0] + covar[1,1] - 2*covar[0,1]
    delta_sd = np.sqrt(delta_var)
    
    delta_lcb = delta - st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    delta_ucb = delta + st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select)/2)
    
    if(delta_lcb*delta_ucb < 0):
        decision = 0
    else:
        decision = delta_lcb/np.abs(delta_lcb)
    
    return ([decision, h_x, h_y, delta_lcb, delta, delta_ucb, delta_sd/np.sqrt(len(select)/2)])

## Load data (99 cause-effect pairs). 

In [3]:
from cdt.data import load_dataset
data, labels = load_dataset('tuebingen')

### ANM predictions for CEP

In [4]:
from cdt.causality.pairwise import ANM
anm = ANM()
dec_anm = [anm.predict_proba(data.iloc[i]) for i in range(0, 99)]

In [5]:
[sum(dec_anm/np.abs(dec_anm) == 1), sum(np.isnan(dec_anm/np.abs(dec_anm))), sum(dec_anm/np.abs(dec_anm) == -1)]

[51, 18, 30]

### CDS predictions for CEP

In [6]:
from cdt.causality.pairwise import CDS
cds = CDS()
dec_cds = [cds.predict_proba(data.iloc[i]) for i in range(0, 99)]

In [7]:
[sum(dec_cds/np.abs(dec_cds) == 1), sum(np.isnan(dec_cds/np.abs(dec_cds))), sum(dec_cds/np.abs(dec_cds) == -1)]

[67, 0, 32]

### EL predictions for CEP

In [8]:
def affine(x):
    return (x - np.min(x)/(np.max(x) - np.min(x)))

dec_el = [estimator(np.column_stack((affine(data.iloc[i]['A']), affine(data.iloc[i]['B']))))[0] for i in range(0, 99)]

In [9]:
[sum(dec_el/np.abs(dec_el) == 1), sum(np.isnan(dec_el/np.abs(dec_el))), sum(dec_el/np.abs(dec_el) == -1)]

[58, 7, 34]

### RECI predictions from CEP

In [10]:
from cdt.causality.pairwise import RECI
reci = RECI()
dec_reci = [reci.predict_proba(data.iloc[i]) for i in range(0, 99)]

In [11]:
[sum(dec_reci/np.abs(dec_reci) == 1), sum(np.isnan(dec_reci/np.abs(dec_reci))), sum(dec_reci/np.abs(dec_reci) == -1)]

[62, 0, 37]