# Import libraries

In [1]:
import subprocess
import os
from helpers import parse_variables
import pandas as pd
import numpy as np

# Load simulation parameters

In [2]:
dict = parse_variables('geno_simulation.txt')
G = int(dict['G'])
L = int(dict['L'])
c = int(dict['c'])
k = int(dict['k'])
M = float(dict['M'])
HWE = int(dict['HWE'])

nr_humans = int(dict['nr_humans'])
nr_snps = int(dict['nr_snps'])
bottleneck_nr = int(dict['bottleneck_nr'])

# Thresholds
very_rare_threshold_L = float(dict['very_rare_threshold_L'])
very_rare_threshold_H = float(dict['very_rare_threshold_H'])

rare_threshold_L = float(dict['rare_threshold_L'])
rare_threshold_H = float(dict['rare_threshold_H'])

common_threshold_L = float(dict['common_threshold_L'])
common_threshold_H = float(dict['common_threshold_H'])

number_of_snps = (G*L)/2 # one loci per chromosome
number_of_individuals = c*k*k


very_rare = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/genotype/01_veryrare_genotype_AF_{very_rare_threshold_L}_{very_rare_threshold_H}.pkl")
rare = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/genotype/01_rare_genotype_AF_{rare_threshold_L}_{rare_threshold_H}.pkl")
common = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/genotype/01_common_genotype_AF_{common_threshold_L}_{common_threshold_H}.pkl")

very_rare = very_rare.rename(columns=lambda x: 'VR' + x)/2
rare = rare.rename(columns=lambda x: 'R' + x)/2
common = common.rename(columns=lambda x: 'C' + x)/2
complete = pd.concat([common, rare, very_rare], axis=1)
complete = ((complete*2)-1)

In [3]:
path_bottle = f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/phenotype/abyss_bottleneck"
bottle_file = [f for f in os.listdir(path_bottle) if int(f.split("_")[2]) ==  bottleneck_nr][0]
elapsed_time_bottleneck = float(bottle_file.split('_')[3].split('seconds')[0])
bottle = pd.read_pickle(f"{path_bottle}/{bottle_file}")

In [4]:
bottle

Unnamed: 0,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,dim10,...,dim56,dim57,dim58,dim59,dim60,dim61,dim62,dim63,dim64,cluster
0,0.455359,0.388592,0.191418,0.458930,0.661687,0.144426,0.416784,0.411217,0.602725,0.047508,...,-0.054351,0.128195,0.163453,0.397887,0.549206,0.344904,0.159588,0.854892,0.564665,0
1,0.398619,0.427502,0.188442,0.426922,0.623239,0.180459,0.358597,0.353137,0.564396,0.073455,...,-0.054978,0.144686,0.173839,0.364902,0.490640,0.345759,0.173041,0.746469,0.515783,0
2,0.415138,0.415163,0.199577,0.428840,0.630878,0.199640,0.382206,0.368302,0.611632,0.093413,...,-0.028839,0.168215,0.198402,0.367392,0.516175,0.350618,0.196854,0.734936,0.518452,0
3,0.446739,0.425477,0.169041,0.467861,0.639894,0.153074,0.406852,0.402006,0.587955,0.047552,...,-0.076800,0.114113,0.158178,0.404257,0.540886,0.333449,0.143392,0.814889,0.529119,0
4,0.402221,0.399577,0.192166,0.405225,0.635604,0.200144,0.370177,0.354940,0.606097,0.091730,...,-0.038359,0.168506,0.214195,0.346864,0.504147,0.372922,0.196239,0.727235,0.518504,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,0.277684,0.625397,0.240928,0.396594,0.549671,0.390021,0.263985,0.225398,0.630699,0.206685,...,0.076396,0.272116,0.217016,0.315906,0.392289,0.269825,0.295036,0.374932,0.390425,0
796,0.268830,0.626732,0.216540,0.374135,0.578263,0.399622,0.259443,0.215734,0.656260,0.221935,...,0.050029,0.261970,0.239817,0.297749,0.387003,0.294427,0.281333,0.359910,0.379922,0
797,0.269964,0.621104,0.231321,0.384872,0.570897,0.381842,0.257174,0.217777,0.640618,0.220924,...,0.062267,0.258651,0.232238,0.306227,0.383843,0.292831,0.280791,0.378925,0.393973,0
798,0.267053,0.639698,0.226458,0.409782,0.528988,0.356534,0.246967,0.216326,0.600068,0.237072,...,0.049454,0.237244,0.227488,0.327080,0.372514,0.288172,0.261581,0.368331,0.374303,0


In [5]:
path_pops_estimated_lds = f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/genotype/LD_blocks_estimated_mafs/"

In [6]:
L*G

10000

In [7]:
q2s_pop = []
twopqs_pop = []
p2s_pop = []

for pop in os.listdir(path_pops_estimated_lds):
    print(pop)
    bottle_index = bottle[bottle['cluster']==int(pop)]
    path_estimated_lds = path_pops_estimated_lds + "/" + pop
    q2_files = [f for f in os.listdir(path_estimated_lds) if f.split(f"_")[6] == 'q2']
    q2_files = sorted(q2_files, key=lambda x: int(x.split('_')[0]))
    p2_files = [f for f in os.listdir(path_estimated_lds) if f.split(f"_")[6] == 'p2']
    p2_files = sorted(p2_files, key=lambda x: int(x.split('_')[0]))
    
    twopq_files = [f for f in os.listdir(path_estimated_lds) if f.split(f"_")[6] == '2pq']
    twopq_files = sorted(twopq_files, key=lambda x: int(x.split('_')[0]))
    
    
    q2s = []
    for q2_file in q2_files:
        path_q2_file = path_estimated_lds + '/' + q2_file
        q2 = pd.read_pickle(path_q2_file)
        q2s.append(q2)
    
    q2s = pd.concat(q2s, axis=1)
    q2s = q2s[list(complete.columns)]
    q2s_pop.append(q2s)

    p2s = []
    for p2_file in p2_files:
        path_p2_file = path_estimated_lds + '/' + p2_file
        p2 = pd.read_pickle(path_p2_file)
        p2s.append(p2)

    p2s = pd.concat(p2s, axis=1)
    p2s = p2s[list(complete.columns)]
    p2s_pop.append(p2s)

    
    twopqs = []
    for twopq_file in twopq_files:
        path_2pq_file = path_estimated_lds + '/' + twopq_file
        twopq = pd.read_pickle(path_2pq_file)
        twopqs.append(twopq)

    twopqs = pd.concat(twopqs, axis=1)
    twopqs = twopqs[list(complete.columns)]
    twopqs_pop.append(twopqs)


q2s = pd.concat(q2s_pop, axis=0)
q2s = q2s.sort_index()

p2s = pd.concat(p2s_pop, axis=0)
p2s = q2s.sort_index()

twopqs = pd.concat(twopqs_pop, axis=0)
twopqs = twopqs.sort_index()

0


In [17]:
bottle_index

Unnamed: 0,dim1,dim2,dim3,dim4,dim5,dim6,dim7,dim8,dim9,dim10,...,dim56,dim57,dim58,dim59,dim60,dim61,dim62,dim63,dim64,cluster
0,0.455359,0.388592,0.191418,0.458930,0.661687,0.144426,0.416784,0.411217,0.602725,0.047508,...,-0.054351,0.128195,0.163453,0.397887,0.549206,0.344904,0.159588,0.854892,0.564665,0
1,0.398619,0.427502,0.188442,0.426922,0.623239,0.180459,0.358597,0.353137,0.564396,0.073455,...,-0.054978,0.144686,0.173839,0.364902,0.490640,0.345759,0.173041,0.746469,0.515783,0
2,0.415138,0.415163,0.199577,0.428840,0.630878,0.199640,0.382206,0.368302,0.611632,0.093413,...,-0.028839,0.168215,0.198402,0.367392,0.516175,0.350618,0.196854,0.734936,0.518452,0
3,0.446739,0.425477,0.169041,0.467861,0.639894,0.153074,0.406852,0.402006,0.587955,0.047552,...,-0.076800,0.114113,0.158178,0.404257,0.540886,0.333449,0.143392,0.814889,0.529119,0
4,0.402221,0.399577,0.192166,0.405225,0.635604,0.200144,0.370177,0.354940,0.606097,0.091730,...,-0.038359,0.168506,0.214195,0.346864,0.504147,0.372922,0.196239,0.727235,0.518504,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,0.277684,0.625397,0.240928,0.396594,0.549671,0.390021,0.263985,0.225398,0.630699,0.206685,...,0.076396,0.272116,0.217016,0.315906,0.392289,0.269825,0.295036,0.374932,0.390425,0
796,0.268830,0.626732,0.216540,0.374135,0.578263,0.399622,0.259443,0.215734,0.656260,0.221935,...,0.050029,0.261970,0.239817,0.297749,0.387003,0.294427,0.281333,0.359910,0.379922,0
797,0.269964,0.621104,0.231321,0.384872,0.570897,0.381842,0.257174,0.217777,0.640618,0.220924,...,0.062267,0.258651,0.232238,0.306227,0.383843,0.292831,0.280791,0.378925,0.393973,0
798,0.267053,0.639698,0.226458,0.409782,0.528988,0.356534,0.246967,0.216326,0.600068,0.237072,...,0.049454,0.237244,0.227488,0.327080,0.372514,0.288172,0.261581,0.368331,0.374303,0


In [16]:
q2s_pop

[snps  CV107_AF_0.245625  CV109_AF_0.256875  CV113_AF_0.2275  CV114_AF_0.25875  \
 0              0.083540           0.143493         0.046064          0.097430   
 1              0.087888           0.134960         0.047565          0.099633   
 2              0.081336           0.125597         0.045521          0.090652   
 3              0.083344           0.139513         0.048004          0.096597   
 4              0.084522           0.130256         0.046812          0.096611   
 ..                  ...                ...              ...               ...   
 795            0.067334           0.047018         0.036337          0.050808   
 796            0.064289           0.043349         0.036197          0.050773   
 797            0.065798           0.046406         0.035340          0.051595   
 798            0.067267           0.045395         0.035911          0.051610   
 799            0.066205           0.042688         0.030731          0.048733   
 
 snps  CV118_A

In [9]:
path_output = f"data/G{G}_L{L}_c{c}_k{k}_M{M}_HWE{HWE}/genotype/"

In [10]:
os.system(f"rm -rf {path_output}/estimated_q2s_via_esti_pop.pkl")
os.system(f"rm -rf {path_output}/estimated_p2s_via_esti_pop.pkl")
os.system(f"rm -rf {path_output}/estimated_2pqs_via_esti_pop.pkl")

0

In [11]:
q2s.to_pickle(f"{path_output}/estimated_q2s_via_esti_pop.pkl")
p2s.to_pickle(f"{path_output}/estimated_p2s_via_esti_pop.pkl")
twopqs.to_pickle(f"{path_output}/estimated_2pqs_via_esti_pop.pkl")

In [12]:
twopq_files

['0_2047_maf_0.01312_0.28813.pkl_esti_2pq_via_esti_pop_117.896seconds.pkl',
 '1_2049_maf_0.01375_0.28813.pkl_esti_2pq_via_esti_pop_176.669seconds.pkl',
 '2_2045_maf_0.01312_0.29.pkl_esti_2pq_via_esti_pop_149.578seconds.pkl',
 '3_2048_maf_0.01375_0.2825.pkl_esti_2pq_via_esti_pop_166.73seconds.pkl']

In [13]:
p2_files

['0_2047_maf_0.01312_0.28813.pkl_esti_p2_via_esti_pop_116.06seconds.pkl',
 '1_2049_maf_0.01375_0.28813.pkl_esti_p2_via_esti_pop_157.681seconds.pkl',
 '2_2045_maf_0.01312_0.29.pkl_esti_p2_via_esti_pop_154.812seconds.pkl',
 '3_2048_maf_0.01375_0.2825.pkl_esti_p2_via_esti_pop_126.955seconds.pkl']

In [15]:
q2 + p2 + twopq

snps,CV118_AF_0.2475,CV135_AF_0.228125,CV139_AF_0.245625,CV176_AF_0.25125,CV177_AF_0.24625,CV460_AF_0.215,CV602_AF_0.265,CV626_AF_0.220625,CV633_AF_0.23625,CV650_AF_0.27875,...,VRV9903_AF_0.01875,VRV9923_AF_0.02625,VRV9924_AF_0.03375,VRV9930_AF_0.023125,VRV9932_AF_0.019375,VRV9933_AF_0.034375,VRV9962_AF_0.018125,VRV9967_AF_0.031875,VRV9972_AF_0.019375,VRV9988_AF_0.0425
0,1.017496,0.976301,0.926622,1.111181,1.124911,1.082091,1.062157,1.045741,0.962637,1.115378,...,0.989014,0.962884,1.031152,1.033616,0.989014,1.078134,1.016955,1.034678,1.006662,0.971039
1,1.006230,0.980473,0.930024,1.057786,1.100194,1.074846,1.056426,1.028637,0.978685,1.088584,...,0.985035,0.970559,1.024206,1.021607,0.991960,1.062613,1.009215,1.020639,1.006170,0.970025
2,1.011413,0.992637,0.946531,1.074153,1.106654,1.072196,1.056156,1.033109,0.974662,1.085423,...,0.986681,0.965748,1.024094,1.024143,0.990172,1.059137,1.013627,1.014696,1.007878,0.969559
3,1.012545,0.976536,0.922517,1.088473,1.110972,1.085652,1.069460,1.045686,0.982301,1.117902,...,0.986563,0.968283,1.023842,1.025680,0.987391,1.070669,1.008879,1.033857,1.003411,0.976316
4,0.997423,0.976818,0.926585,1.054390,1.093659,1.070655,1.049127,1.023887,0.969731,1.079990,...,0.989678,0.967928,1.022540,1.021054,0.988997,1.062015,1.011752,1.008896,1.010213,0.971427
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,1.005056,1.038795,1.060976,1.022607,1.030247,0.995456,1.081292,1.031868,0.991979,1.063479,...,0.965910,0.974146,1.030191,1.004053,1.001789,0.996425,1.002842,0.942523,1.004715,0.984121
796,1.003057,1.026645,1.050274,1.017814,1.019629,0.991024,1.077515,1.037613,0.985837,1.066520,...,0.970045,0.980265,1.031976,1.007114,1.001122,1.002010,1.001643,0.927223,1.008584,0.991194
797,1.004282,1.027806,1.054537,1.016059,1.025946,0.986625,1.085038,1.026888,0.982668,1.065219,...,0.967784,0.974869,1.030105,1.004399,1.002097,0.998003,0.998869,0.935232,1.007574,0.984727
798,0.998574,1.030090,1.057506,1.008546,1.025235,0.999695,1.096918,1.040666,1.001199,1.077307,...,0.967175,0.979670,1.031484,1.003596,1.007414,1.005512,0.998565,0.938940,1.004004,0.981934
