# Simulations to benchmark the fine-mapping

The simulations are based on the specific GnomAD LD matrix from the 1Mb region on chromosome 7.

At each iteration of the simulation we randomly select n_causal causal variants and generate Z-scores. We then perform fine mapping using GentroPy functions and examine the output.

We expect all selected variants to be presented in detected credible sets.

In [1]:
import numpy as np

from gentropy.common.session import Session
from gentropy.finemapping_simulations import FineMappingSimulations

session = Session()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


24/05/21 18:05:22 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [2]:
ld_matrix = np.load("/Users/yt4/Projects/ot_data/tmp/ld_matrix.npy")
ld_index=session.spark.read.parquet("/Users/yt4/Projects/ot_data/tmp/ld_index")
ld_matrix_for_sim=ld_matrix[0:500,:][:,0:500]
ld_index_for_sim=ld_index.limit(500)

## Number of causal variants = 1

In [None]:
n_causal=1
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.003,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim
);

In [4]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 76, 'number_of_cs': 76, 'expected_results': 76, 'false_positives': 0.013157894736842105, 'accuracy': 0.9868421052631579, 'accuracy_lead': 0.6447368421052632, 'sensitivity': 0.9868421052631579}
{'successful_runs': 76, 'number_of_cs': 76, 'expected_results': 76, 'false_positives': 0.013157894736842105, 'accuracy': 0.9868421052631579, 'accuracy_lead': 0.6447368421052632, 'sensitivity': 0.9868421052631579}
{'successful_runs': 76, 'number_of_cs': 76, 'expected_results': 76, 'false_positives': 0.013157894736842105, 'accuracy': 0.9868421052631579, 'accuracy_lead': 0.6447368421052632, 'sensitivity': 0.9868421052631579}


## Number of causal variants = 3

In [None]:
n_causal=3
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.003,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim
);

In [6]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 89, 'number_of_cs': 177, 'expected_results': 267, 'false_positives': 0.062146892655367235, 'accuracy': 0.9378531073446328, 'accuracy_lead': 0.6666666666666666, 'sensitivity': 0.6217228464419475}
{'successful_runs': 89, 'number_of_cs': 172, 'expected_results': 267, 'false_positives': 0.05232558139534884, 'accuracy': 0.9476744186046512, 'accuracy_lead': 0.6802325581395349, 'sensitivity': 0.6104868913857678}
{'successful_runs': 89, 'number_of_cs': 161, 'expected_results': 267, 'false_positives': 0.049689440993788817, 'accuracy': 0.9503105590062112, 'accuracy_lead': 0.6832298136645962, 'sensitivity': 0.5730337078651685}


## CARMA without noise

In [None]:
n_causal=1
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.003,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim,
        noise=False,
        run_carma=True
);

In [8]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 74, 'number_of_cs': 74, 'expected_results': 74, 'false_positives': 0.04054054054054054, 'accuracy': 0.9594594594594594, 'accuracy_lead': 0.7027027027027027, 'sensitivity': 0.9594594594594594}
{'successful_runs': 74, 'number_of_cs': 74, 'expected_results': 74, 'false_positives': 0.04054054054054054, 'accuracy': 0.9594594594594594, 'accuracy_lead': 0.7027027027027027, 'sensitivity': 0.9594594594594594}
{'successful_runs': 74, 'number_of_cs': 74, 'expected_results': 74, 'false_positives': 0.04054054054054054, 'accuracy': 0.9594594594594594, 'accuracy_lead': 0.7027027027027027, 'sensitivity': 0.9594594594594594}


## No noise, but with CARMA

In [None]:
n_causal=3
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.003,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim,
        noise=False,
        run_carma=True
);

In [10]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 91, 'number_of_cs': 172, 'expected_results': 273, 'false_positives': 0.10465116279069768, 'accuracy': 0.8953488372093024, 'accuracy_lead': 0.6453488372093024, 'sensitivity': 0.5641025641025641}
{'successful_runs': 91, 'number_of_cs': 162, 'expected_results': 273, 'false_positives': 0.09259259259259259, 'accuracy': 0.9074074074074074, 'accuracy_lead': 0.6666666666666666, 'sensitivity': 0.5384615384615384}
{'successful_runs': 91, 'number_of_cs': 150, 'expected_results': 273, 'false_positives': 0.07333333333333333, 'accuracy': 0.9266666666666666, 'accuracy_lead': 0.6933333333333334, 'sensitivity': 0.5091575091575091}


## Adding noise

### One varaint, noise, no CARMA

In [None]:
n_causal=1
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.005,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim,
        noise=True,
        run_carma=False,
        scale_noise=2,
);

In [16]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 76, 'number_of_cs': 115, 'expected_results': 76, 'false_positives': 0.48695652173913045, 'accuracy': 0.5130434782608696, 'accuracy_lead': 0.4, 'sensitivity': 0.7763157894736842}
{'successful_runs': 76, 'number_of_cs': 112, 'expected_results': 76, 'false_positives': 0.4732142857142857, 'accuracy': 0.5267857142857143, 'accuracy_lead': 0.4107142857142857, 'sensitivity': 0.7763157894736842}
{'successful_runs': 76, 'number_of_cs': 111, 'expected_results': 76, 'false_positives': 0.46846846846846846, 'accuracy': 0.5315315315315315, 'accuracy_lead': 0.4144144144144144, 'sensitivity': 0.7763157894736842}


### One varaint, noise and CARMA

In [None]:
n_causal=1
x1=FineMappingSimulations.SimulationLoop(
        n_iter=100,
        n_causal=n_causal,
        session=session,
        he2_reggen=0.005,
        sample_size=100_000,
        ld_matrix_for_sim=ld_matrix_for_sim,
        ld_index=ld_index_for_sim,
        noise=True,
        run_carma=True,
        scale_noise=2,
);

In [18]:
x2=x1[(x1["pValueExponent"]<=-6) | (x1["credibleSetIndex"]==1)]
x3=x2[(x2["purityMinR2"]>=0.25) | (x2["credibleSetIndex"]==1)]

{'successful_runs': 86, 'number_of_cs': 99, 'expected_results': 86, 'false_positives': 0.30303030303030304, 'accuracy': 0.696969696969697, 'accuracy_lead': 0.5353535353535354, 'sensitivity': 0.8023255813953488}
{'successful_runs': 86, 'number_of_cs': 95, 'expected_results': 86, 'false_positives': 0.2736842105263158, 'accuracy': 0.7263157894736842, 'accuracy_lead': 0.5578947368421052, 'sensitivity': 0.8023255813953488}
{'successful_runs': 86, 'number_of_cs': 93, 'expected_results': 86, 'false_positives': 0.26881720430107525, 'accuracy': 0.7311827956989247, 'accuracy_lead': 0.5698924731182796, 'sensitivity': 0.7906976744186046}
