In [1]:
import pandas as pd
import numpy as np
from firthlogist import FirthLogisticRegression
from statsmodels.stats.multitest import multipletests
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

In [33]:
genes = ["ACTN2", "ALPK3", "DES", "FLNC", "MYBPC3", "MYH6", "MYH7", "PTPN11", "TNNI3", "TTR"]

deleterious_vals = {}
deleterious_odds = {}
missense_vals = {}
missense_odds = {} 
def logistic_regression(gene):


    # read in file
    file_name=gene+'.csv'
    phenotypic_data = pd.read_csv("/Users/uriel/Downloads/work_temp/cross_val_lifelines/"+file_name, dtype={
            'is_family_hist':'boolean',
            'is_hcm':'boolean'
            })
    

    phenotypic_data = phenotypic_data.drop(["Name", "death_age", "Unnamed: 0", "birth_date", 'synonymous_variant'], axis = 1)



    phenotypic_data = phenotypic_data[['is_hcm','sex','is_family_hist', 'age', 'principal_component_1', 'principal_component_4', 'prs_score', 'deleterious', 'missense_variant']].dropna()
    print(phenotypic_data)


    X=phenotypic_data[['sex','is_family_hist', 'age', 'principal_component_1', 'principal_component_4', 'prs_score', 'deleterious', 'missense_variant']].values
    y = phenotypic_data['is_hcm'].values
    feature_names = ['sex','is_family_hist', 'age', 'principal_component_1', 'principal_component_4', 'prs_score', 'deleterious', 'missense_variant']
    fl = FirthLogisticRegression()
    fl.fit(X, y)
    fl.summary(xname = feature_names)

    deleterious_vals.update({gene:fl.pvals_[6]})
    deleterious_odds.update({gene:np.exp(fl.coef_[6])})
    missense_vals.update({gene:fl.pvals_[7]})
    missense_odds.update({gene:np.exp(fl.coef_[7])})



for gene in genes:
    print(gene)
    logistic_regression(gene)
    plt.show()








ACTN2
        is_hcm  sex  is_family_hist        age  principal_component_1  \
0        False  1.0           False  69.535934               -10.8246   
1        False  0.0           False  55.865845               -12.4943   
2        False  0.0            True  67.118412               -11.9850   
3        False  1.0           False  70.283368                10.1654   
4        False  0.0            True  78.781656               -13.7975   
...        ...  ...             ...        ...                    ...   
502354   False  0.0            True  65.779603               -13.9005   
502355   False  1.0           False  58.450376               -13.1965   
502356   False  1.0           False  75.780972               -14.1409   
502357   False  0.0            True  72.950034               -12.7431   
502358   False  1.0            True  82.450376               -12.1356   

        principal_component_4  prs_score  deleterious  missense_variant  
0                    3.870260  -0.006924   

In [34]:
print("Deleterious odds: ", deleterious_odds)
p_adjusted = multipletests(list(deleterious_vals.values()), alpha=0.05, method='bonferroni')
updated_dict = {key: new_p_val for key, new_p_val in zip(deleterious_vals.keys(), p_adjusted[1])}
print("Deleterious p-values: ", updated_dict)
p_val_true_dict = {key: new_p_val for key, new_p_val in zip(deleterious_vals.keys(), p_adjusted[0])}
print("Deleterious p-values < 0.05?: ",p_val_true_dict)


Deleterious odds:  {'ACTN2': 1.9870462903914552, 'ALPK3': 6.399748208115962, 'DES': 0.9510593917468904, 'FLNC': 1.4737647802094618, 'MYBPC3': 57.22947563498607, 'MYH6': 0.6664603600107166, 'MYH7': 1.7388415314136898, 'PTPN11': 6.13293034565486, 'TNNI3': 18.890508611150395, 'TTR': 2.7932310839759826}
Deleterious p-values:  {'ACTN2': 1.0, 'ALPK3': 0.00036357931027176536, 'DES': 1.0, 'FLNC': 1.0, 'MYBPC3': 4.475937491191588e-43, 'MYH6': 1.0, 'MYH7': 1.0, 'PTPN11': 0.27728496851315415, 'TNNI3': 0.15273249464803756, 'TTR': 1.0}
Deleterious p-values < 0.05?:  {'ACTN2': False, 'ALPK3': True, 'DES': False, 'FLNC': False, 'MYBPC3': True, 'MYH6': False, 'MYH7': False, 'PTPN11': False, 'TNNI3': False, 'TTR': False}


### Missense variants

In [35]:
print("Missense odds: ", missense_odds)
p_adjusted = multipletests(list(missense_vals.values()), alpha=0.05, method='bonferroni')
updated_dict = {key: new_p_val for key, new_p_val in zip(missense_vals.keys(), p_adjusted[1])}
print("Missense p-values: ", updated_dict)
p_val_true_dict = {key: new_p_val for key, new_p_val in zip(missense_vals.keys(), p_adjusted[0])}
print("Missense p-values < 0.05?: ",p_val_true_dict)

Missense odds:  {'ACTN2': 1.3046945708950841, 'ALPK3': 1.7204788127784203, 'DES': 0.5848107109851669, 'FLNC': 1.8402843714651107, 'MYBPC3': 2.156318065164935, 'MYH6': 0.7980553668523375, 'MYH7': 7.77479436781506, 'PTPN11': 1.9944378757285073, 'TNNI3': 5.85256146941811, 'TTR': 2.1338383240364527}
Missense p-values:  {'ACTN2': 1.0, 'ALPK3': 0.13278087262537286, 'DES': 1.0, 'FLNC': 0.042246452875287416, 'MYBPC3': 0.008755476177748784, 'MYH6': 1.0, 'MYH7': 5.2515474061527155e-24, 'PTPN11': 1.0, 'TNNI3': 0.10761089476599858, 'TTR': 1.0}
Missense p-values < 0.05?:  {'ACTN2': False, 'ALPK3': False, 'DES': False, 'FLNC': True, 'MYBPC3': True, 'MYH6': False, 'MYH7': True, 'PTPN11': False, 'TNNI3': False, 'TTR': False}
