In [2]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import scipy.stats as st
import scipy.special as sp
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.api as sm

from fastkde import fastKDE

In [3]:
def affine(x):
    y = (x - np.min(x))/(np.max(x) - np.min(x))
    return y

In [11]:
def estimator_strata(x, alpha = 0.05):
    
    if(x.shape[0]%2!=0):
        x = x[:-1]
        
    x = np.column_stack((affine(x[:,0]), affine(x[:,1])))    
    
    estim, inf = np.split(x, 2) #split data up into two halves
    
    ## first split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = estim[:,0], list_of_points = list(inf[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = estim[:,1], list_of_points = list(inf[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x1 = -np.mean(np.log(margin_x))
    h_y1 = -np.mean(np.log(margin_y))
    covar1 = np.cov(np.log(margin_x), np.log(margin_y))
    delta_var1 = covar1[0,0] + covar1[1,1] - 2*covar1[0,1]
    
    
    ## second split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = inf[:,0], list_of_points = list(estim[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = inf[:,1], list_of_points = list(estim[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x2 = -np.mean(np.log(margin_x))
    h_y2 = -np.mean(np.log(margin_y))
    covar2 = np.cov(np.log(margin_x), np.log(margin_y))
    delta_var2 = covar2[0,0] + covar2[1,1] - 2*covar2[0,1]
    
    ## cross fitting
    h_x = (h_x1 + h_x2)/2
    h_y = (h_y1 + h_y2)/2
    delta = (h_x - h_y)
    
    ## variance estimation using monte carlo
    delta_var = (delta_var1 + delta_var2)/2
    delta_sd = np.sqrt(delta_var)
    
    delta_lcb = delta - st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select))
    delta_ucb = delta + st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select))
    
    return ([delta, delta_lcb, delta_ucb])

In [5]:
def estimator(x, alpha = 0.05):
    
    if(x.shape[0]%2!=0):
        x = x[:-1]
    
    x = np.column_stack((affine(x[:,0]), affine(x[:,1])))
    
    estim, inf = np.split(x, 2) #split data up into two halves
    
    ## first split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = estim[:,0], list_of_points = list(inf[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = estim[:,1], list_of_points = list(inf[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x1 = -np.mean(np.log(margin_x))
    h_y1 = -np.mean(np.log(margin_y))
    covar1 = np.cov(np.log(margin_x), np.log(margin_y))
    delta_var1 = covar1[0,0] + covar1[1,1] - 2*covar1[0,1]
    
    
    ## second split used for density estimation
    margin_x = fastKDE.pdf_at_points(var1 = inf[:,0], list_of_points = list(estim[:,0]))
    margin_y = fastKDE.pdf_at_points(var1 = inf[:,1], list_of_points = list(estim[:,1]))
    select = np.logical_and(margin_x > 0, margin_y > 0)
    margin_y = margin_y[select]
    margin_x = margin_x[select]
    
    h_x2 = -np.mean(np.log(margin_x))
    h_y2 = -np.mean(np.log(margin_y))
    covar2 = np.cov(np.log(margin_x), np.log(margin_y))
    delta_var2 = covar2[0,0] + covar2[1,1] - 2*covar2[0,1]
    
    ## cross fitting
    h_x = (h_x1 + h_x2)/2
    h_y = (h_y1 + h_y2)/2
    delta = (h_x - h_y)
    
    ## variance estimation using monte carlo
    delta_var = (delta_var1 + delta_var2)/2
    delta_sd = np.sqrt(delta_var)
    
    delta_lcb = delta - st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select))
    delta_ucb = delta + st.norm.ppf(1 - alpha/2)*delta_sd/np.sqrt(len(select))
    
    return ([delta, delta_sd, x.shape[0]])

In [6]:
bps = pd.read_csv("/home/soumikp/enar_2023/data/outcome_bp.csv")

# FGF5 

In [7]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_FGF5.csv").iloc[:,1:22].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_FGF5.csv").iloc[:,23]
data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('FGF5 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('FGF5 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('FGF5 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('FGF5 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('FGF5 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('FGF5 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('FGF5 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('FGF5 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

FGF5 vs BPS (sex = 0): [0.3223948835919948, 0.09991462614716379, 0.5448751410368258]
FGF5 vs BPS (sex = 1): [0.5968485254061351, 0.4215411655372173, 0.7721558852750529]
FGF5 vs BPS (sex = 2): [0.4664567568814094, 0.37445978992597395, 0.5584537238368449]
FGF5 vs BPS (sex = 3): [0.5402736052315196, 0.40107135363812807, 0.6794758568249111]
FGF5 vs BPD (sex = 0): [0.2359196967205609, 0.022991044066117794, 0.448848349375004]
FGF5 vs BPD (sex = 1): [0.5466658424974875, 0.37967442932827167, 0.7136572556667033]
FGF5 vs BPD (sex = 2): [0.39903165829695536, 0.3091324269515348, 0.4889308896423759]
FGF5 vs BPD (sex = 3): [0.4252802834483475, 0.2954142326738052, 0.5551463342228898]


# ARHGAP42

In [8]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_ARHGAP42.csv").iloc[:,1:44].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_ARHGAP42.csv").iloc[:,45]

data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('ARHGAP42 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('ARHGAP42 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('ARHGAP42 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('ARHGAP42 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('ARHGAP42 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('ARHGAP42 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('ARHGAP42 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('ARHGAP42 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

ARHGAP42 vs BPS (sex = 0): [0.00550651357846435, -0.1984466803015549, 0.2094597074584836]
ARHGAP42 vs BPS (sex = 1): [0.14765372018318146, -0.018194216458762097, 0.313501656825125]
ARHGAP42 vs BPS (sex = 2): [0.08012018141312428, -0.008633431980155454, 0.16887379480640402]
ARHGAP42 vs BPS (sex = 3): [0.2379704650668123, 0.09762080508035476, 0.37832012505326984]
ARHGAP42 vs BPD (sex = 0): [-0.09226827343903699, -0.27158251562702906, 0.08704596874895507]
ARHGAP42 vs BPD (sex = 1): [0.10219583630837803, -0.05582813488436833, 0.2602198075011244]
ARHGAP42 vs BPD (sex = 2): [0.009806757347920322, -0.07508897828551754, 0.09470249298135817]
ARHGAP42 vs BPD (sex = 3): [0.11892547972798229, -0.007340235999588246, 0.24519119545555282]


## ATP2B1

In [9]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_ATP2B1.csv").iloc[:,1:22].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_ATP2B1.csv").iloc[:,23]

data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('ATP2B1 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('ATP2B1 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('ATP2B1 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('ATP2B1 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('ATP2B1 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('ATP2B1 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('ATP2B1 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('ATP2B1 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

ATP2B1 vs BPS (sex = 0): [0.25769311860148675, 0.0699192941339982, 0.4454669430689753]
ATP2B1 vs BPS (sex = 1): [0.155508943163371, -0.005713210887125841, 0.31673109721386783]
ATP2B1 vs BPS (sex = 2): [0.204056214252744, 0.11774801514503433, 0.2903644133604537]
ATP2B1 vs BPS (sex = 3): [0.2840248140417999, 0.15232803085126853, 0.41572159723233126]
ATP2B1 vs BPD (sex = 0): [0.16804451575791524, -0.002625798524282602, 0.33871483004011305]
ATP2B1 vs BPD (sex = 1): [0.11129572001718913, -0.04790764887940943, 0.2704990889137877]
ATP2B1 vs BPD (sex = 2): [0.13825683370243833, 0.05421643482060827, 0.22229723258426837]
ATP2B1 vs BPD (sex = 3): [0.17785716242626137, 0.051807817568096715, 0.30390650728442603]


## HSD11B2

In [10]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_HSD11B2.csv").iloc[:,0:21].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_HSD11B2.csv").iloc[:,22]

data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('HSD11B2 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('HSD11B2 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('HSD11B2 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('HSD11B2 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('HSD11B2 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('HSD11B2 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('HSD11B2 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('HSD11B2 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

HSD11B2 vs BPS (sex = 0): [0.5676164241276913, 0.39949571894117863, 0.735737129314204]
HSD11B2 vs BPS (sex = 1): [0.5628597629696817, 0.39465294635496423, 0.7310665795843991]
HSD11B2 vs BPS (sex = 2): [0.565129162377129, 0.480246884450896, 0.6500114403033619]
HSD11B2 vs BPS (sex = 3): [0.726278558585789, 0.5911744113217503, 0.8613827058498278]
HSD11B2 vs BPD (sex = 0): [0.4520478566845446, 0.29799025105801813, 0.6061054623110711]
HSD11B2 vs BPD (sex = 1): [0.51663018313585, 0.36202996536482324, 0.6712304009068767]
HSD11B2 vs BPD (sex = 2): [0.485818004485418, 0.40449810137859693, 0.5671379075922391]
HSD11B2 vs BPD (sex = 3): [0.5998491281313261, 0.47430656392632287, 0.7253916923363293]


## KCNK3

In [10]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_KCNK3.csv").iloc[:,1:36].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_KCNK3.csv").iloc[:,37]

data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('KCNK3 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('KCNK3 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('KCNK3 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('KCNK3 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('KCNK3 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('KCNK3 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('KCNK3 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('KCNK3 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

KCNK3 vs BPS (sex = 0): [0.1935569969711156, 0.05410932012198491, 0.33300467382024634]
KCNK3 vs BPS (sex = 1): [0.2760622160504427, 0.13552394095337622, 0.41660049114750913]
KCNK3 vs BPS (sex = 2): [0.23686433418899994, 0.14465664900705885, 0.329072019370941]
KCNK3 vs BPS (sex = 3): [0.4311330963279364, 0.3329586300405629, 0.5293075626153099]
KCNK3 vs BPD (sex = 0): [0.08579174015711732, -0.04338128805106006, 0.2149647683652947]
KCNK3 vs BPD (sex = 1): [0.23601861823811038, 0.10179596821169476, 0.37024126826452597]
KCNK3 vs BPD (sex = 2): [0.16464646160192975, 0.07516137032316907, 0.25413155288069045]
KCNK3 vs BPD (sex = 3): [0.31227550811329535, 0.21691221739289948, 0.40763879883369125]


## PRDM8

In [11]:
dnam = pd.read_csv("/home/soumikp/enar_2023/data/dat_PRDM8.csv").iloc[:, 1:52].mean(axis=1)
folio = pd.read_csv("/home/soumikp/enar_2023/data/dat_PRDM8.csv").iloc[:,53]

data = pd.concat([folio, dnam], axis=1)
data = pd.merge(data, bps, on = 'foliocc', how='inner').dropna()
data.columns = ['id', 'dnam', 'sex', 'bps', 'bpd']

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('PRDM8 vs BPS (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bps'], temp['dnam'])))
print('PRDM8 vs BPS (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('PRDM8 vs BPS (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('PRDM8 vs BPS (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bps'], temp['dnam'])))))

temp = data[data.sex == 0]
strata1 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('PRDM8 vs BPD (sex = 0): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

temp = data[data.sex == 1]
strata2 = estimator(np.column_stack((temp['bpd'], temp['dnam'])))
print('PRDM8 vs BPD (sex = 1): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

delta = (strata1[2]*strata1[0] + strata2[2]*strata2[0])/(strata1[2] + strata2[2])
delta_var = (strata1[2]*strata1[1])/(strata1[2] + strata2[2])**2 + (strata2[2]*strata2[1])/(strata1[2] + strata2[2])**2
alpha = 0.05
delta_lcb = delta - st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
delta_ucb = delta + st.norm.ppf(1 - alpha/2)*np.sqrt(delta_var)
print('PRDM8 vs BPD (sex = 2): ' +  str([delta, delta_lcb, delta_ucb]))

temp = data
print('PRDM8 vs BPD (sex = 3): ' +  str(estimator_strata(np.column_stack((temp['bpd'], temp['dnam'])))))

PRDM8 vs BPS (sex = 0): [0.05329673426838899, -0.06670204457100361, 0.17329551310778157]
PRDM8 vs BPS (sex = 1): [-0.08982934259633235, -0.1982694569489983, 0.018610771756333605]
PRDM8 vs BPS (sex = 2): [-0.02183074669125401, -0.10494955465149813, 0.06128806126899011]
PRDM8 vs BPS (sex = 3): [-0.009461883842166396, -0.10146295135861184, 0.08253918367427905]
PRDM8 vs BPD (sex = 0): [-0.046736555651878064, -0.1488226440619987, 0.05534953275824257]
PRDM8 vs BPD (sex = 1): [-0.11926419826869628, -0.22115458447141778, -0.017373812065974756]
PRDM8 vs BPD (sex = 2): [-0.08480662093350294, -0.16349495841795647, -0.006118283449049397]
PRDM8 vs BPD (sex = 3): [-0.11747698417040797, -0.20359952974280543, -0.031354438598010514]
