In [None]:
pip install fastDFE

Collecting fastDFE
  Downloading fastdfe-1.1.9-py3-none-any.whl.metadata (2.4 kB)
Collecting biopython<1.82,>=1.80 (from fastDFE)
  Downloading biopython-1.81-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting cyvcf2<0.32.0,>=0.31.0 (from fastDFE)
  Downloading cyvcf2-0.31.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting jsonpickle<4.0.0,>=3.0.0 (from fastDFE)
  Downloading jsonpickle-3.4.2-py3-none-any.whl.metadata (8.1 kB)
Collecting multiprocess<0.71.0,>=0.70.12 (from fastDFE)
  Downloading multiprocess-0.70.17-py311-none-any.whl.metadata (7.2 kB)
Collecting coloredlogs (from cyvcf2<0.32.0,>=0.31.0->fastDFE)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting dill>=0.3.9 (from multiprocess<0.71.0,>=0.70.12->fastDFE)
  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->cyvcf2<0.32.0,>=0.31.0->fastDFE)
  Downloading humanfrie

In [None]:
import fastdfe as fd
import numpy as np

# Define your data
variants = {
    'SNP_cds': { 'sfs_neut': [24357618, 523205, 160739, 82683, 59879, 45340, 48086, 39631, 66023, 77880, 2603219],
                 'sfs_sel': [4339976, 40928, 11627, 6013, 4111, 3343, 3100, 3010, 3507, 5666, 169099],
                 'output_prefix': 'SNP_cds'},
    'SNP_intron': { 'sfs_neut': [24357618, 523205, 160739, 82683, 59879, 45340, 48086, 39631, 66023, 77880, 2603219],
                    'sfs_sel': [16026438, 482503, 154838, 85041, 59423, 48183, 43221, 42138, 45192, 64199, 2641580],
                    'output_prefix': 'SNP_intron'},
    'INDEL_cds': {'sfs_neut': [50791780, 646243, 90315, 29904, 15697, 9209, 7385, 5601, 6196, 10696, 451277],
                  'sfs_sel': [9082532, 3620, 206, 68, 45, 25, 25, 23, 25, 60, 3167],
                  'output_prefix': 'INDEL_cds'},
    'INDEL_intron': {'sfs_neut': [50791780, 646243, 90315, 29904, 15697, 9209, 7385, 5601, 6196, 10696, 451277],
                     'sfs_sel': [90643795, 476493, 69578, 23624, 12294, 8008, 6383, 5405, 5441, 7292, 489105],
                     'output_prefix': 'INDEL_intron'},
    'SV_cds': {'sfs_neut': [60991876, 27797, 931, 121, 24, 16, 13, 10, 14, 21, 43480],
               'sfs_sel': [9088460, 416, 20, 2, 1, 1, 0, 0, 0, 0, 683],
               'output_prefix': 'SV_cds'},
    'SV_intron': {'sfs_neut': [60991876, 27797, 931, 121, 24, 16, 13, 10, 14, 21, 43480],
                  'sfs_sel': [40700010, 17035, 752, 69, 22, 25, 13, 17, 9, 20, 37820],
                  'output_prefix': 'SV_intron'}
}

# Storage for results
results = []

for key, data in variants.items():
    # Create Spectrum objects
    sfs_neut = fd.Spectrum(data['sfs_neut'])
    sfs_sel = fd.Spectrum(data['sfs_sel'])

    # Create inference object
    inf = fd.BaseInference(
        sfs_neut=sfs_neut,
        sfs_sel=sfs_sel,
        n_runs=10,
        fixed_params=dict(all=dict(eps=0, S_b=1, p_b=0)),
        do_bootstrap=True
    )

    # Run inference
    inf.run()

    # Run bootstrapping
    inf.bootstrap(n_samples=100)

    # Extract discretized DFE values and store
    discretized_values, discretized_deviations = inf.get_discretized(
        intervals=np.array([-np.inf, -100., -10., -1., 0., 1., np.inf]),  # Define intervals
        confidence_intervals=True,  # Show confidence intervals
        ci_level=0.05,  # Confidence interval level
        bootstrap_type='percentile'  # Type of bootstrap
    )

    # Append results to the list
    results.append({
        'variant': key,
        'discretized_values': discretized_values.tolist(),
        'discretized_deviations': discretized_deviations.tolist()
    })

    # Save the inference object to the file for each run
    inf.to_file(f"{data['output_prefix']}_serialized.json")

# You can later save the results to a file, upload them to R or use them for further analysis
import json
with open("dfe_results.json", "w") as f:
    json.dump(results, f)

# Print results for verification
print(results)

[32mINFO:Discretization: Precomputing linear DFE-SFS transformation using midpoint integration.[0m
Discretization>Precomputing: 100%|██████████| 9/9 [00:01<00:00,  4.78it/s]
BaseInference>Performing inference: 100%|██████████| 10/10 [00:01<00:00,  6.12it/s]
[32mINFO:BaseInference: Successfully finished optimization after 24 iterations and 99 function evaluations, obtaining a log-likelihood of -206.60891796253054.[0m
[32mINFO:BaseInference: Inferred parameters: {all.S_d: -63283.5167210076, all.b: 0.06973488759411901, all.p_b: 0.0, all.S_b: 1.0, all.eps: 0.0}.[0m
BaseInference>Bootstrapping: 100%|██████████| 100/100 [00:08<00:00, 12.14it/s]
BaseInference>Bootstrapping: 100%|██████████| 100/100 [00:15<00:00,  6.32it/s]
[32mINFO:Discretization: Precomputing linear DFE-SFS transformation using midpoint integration.[0m
Discretization>Precomputing: 100%|██████████| 9/9 [00:03<00:00,  2.50it/s]
BaseInference>Performing inference: 100%|██████████| 10/10 [00:00<00:00, 26.81it/s]
[32mINF

[{'variant': 'SNP_cds', 'discretized_values': [0.4506758709686284, 0.08147468306803793, 0.06939269954500453, 0.39845674641832884, 0.0, 0.0], 'discretized_deviations': [[0.0018308411043616024, 0.00019858127761993027, 9.968100038286465e-05, 0.0019563116382325108, 0.0, 0.0], [0.001696343933768385, 0.0001830320584652878, 9.122640109589242e-05, 0.0024397558321282986, 0.0, 0.0]]}, {'variant': 'SNP_intron', 'discretized_values': [0.0, 4.281704979458567e-08, 0.0022161918061784913, 0.9977837653767702, 0.0, 0.0], 'discretized_deviations': [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 1.5543122344752192e-15, 0.0, 0.0]]}, {'variant': 'INDEL_cds', 'discretized_values': [0.9435151173470896, 0.03292890501314899, 0.013733055704049604, 0.009822921935711416, 0.0, 0.0], 'discretized_deviations': [[0.0010988019579107888, 0.00040688553110984627, 0.0002452447109451278, 0.00026772686679030905, 0.0, 0.0], [0.0009348994041633585, 0.000517987280583776, 0.00031344913472917957, 0.00034467482843705127, 0.0, 0.0