# Calculate partial correlations

In [143]:
import pandas as pd
import pingouin as pg

Function

In [140]:
def partial_correlations(df, y, groups_df):
    """Get the partial correlations between each x and a given y, adjusting for covariates."""
   
    df_numeric = df.apply(pd.to_numeric).replace([np.inf, -np.inf], np.nan).dropna()
    groups_dict = dict(list(zip(groups_df["var"], groups_df["var_group"])))
    correlations = []
    
    for var in groups_df["var"]:
        if var in list(df):
            var_group = groups_dict.get(var)
            z = [n for n in groups_df["var"] if groups_dict.get(n) != var_group]
            z = [n for n in z if n in list(df)]
            if var != y:
                cor = pg.partial_corr(data=df, x=var, y=y, covar=z, method="spearman")
                cor["var"] = var
                cor["group"] = var_group
                correlations.append(cor)
            
    # Combine to single df
    correlations_df = pd.concat(correlations)
            
    # Bonferroni adjust
    correlations_df["p-adj"] = correlations_df["p-val"] * len(correlations)
            
    return correlations_df

Read data

In [154]:
imputed = pd.read_csv("../data/imputed.csv").iloc[:,1:]
imputed.columns = imputed.columns.str.replace(" ", ".")
groups_df = pd.read_csv("../data/variable_groups.csv").iloc[:,1:].dropna()

Run calculations

In [169]:
dN = imputed.drop(columns = ["mean.phylop"]).dropna()
phyloP = imputed.drop(columns = ["dN"]).dropna()

cors_dN = partial_correlations(dN.drop(columns = ["Systematic_ID"]), "dN", groups_df = groups_df)
cors_phylop = partial_correlations(phyloP.drop(columns = ["Systematic_ID"]), "mean.phylop", groups_df = groups_df)

Combine output

In [170]:
cors_dN = cors_dN[["r", "p-adj", "var", "group"]].rename(columns = {"r":"cor_dN", "p-adj":"p_adj_dN"})
cors_phylop = cors_phylop[["r", "p-adj", "var", "group"]].rename(columns = {"r":"cor_phylop", "p-adj":"p_adj_phylop"})

correlations = cors_dN.merge(cors_phylop, on = "var")

In [172]:
correlations.to_csv("../data/correlations.csv", index = None)