#### Test file parameters: rho_5 theta_0.01 sample_size_20 depth_4 genome_size_10000 seed_1
#### The steps that follow the generation of a pairwise table:


In [78]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
from ldpop import rhos_from_string

import m_isolate_by_depth
import m_biallelic_filter_pairwise_table
import m_pairwise_lookup_format_pyrho
import m_custom_hap_sets_and_merge
import m_pij_grid_vectorised
import m_pairwise_rho_estimator_intp_rect_biv

depth = 50 # Change to try different depths. Needs appropriate lookup tables though

recom_tract_len = 500
depth_range = "3,200"
n_resamples = 50
lookup_table_rho_range = "101,100"
pairwise_table_file = "../Recom_Est_Output/pairwise_table.pkl"
num_cores = 4
lookup_table_rho_vals = rhos_from_string(lookup_table_rho_range)
lookup_table = f"/Volumes/Backup/Lookup_tables/Lookup_tables_m_0.01_r_0-100/lk_downsampled_{depth}.csv"

#### Load pairwise table

In [79]:
pairwise_table = pd.read_pickle(pairwise_table_file)
pairwise_table

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(42, 50)",0,0,0,0,0,1,0,3,0,0,0,0,0,10,0,0
"(42, 76)",0,0,0,0,0,1,3,0,0,0,0,0,0,0,10,0
"(42, 91)",0,0,0,0,0,1,0,3,0,0,0,0,0,10,0,0
"(42, 106)",0,0,0,0,0,1,3,0,0,0,0,0,0,0,10,0
"(42, 107)",0,0,0,0,3,0,1,0,0,0,0,0,0,0,10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(9908, 9950)",0,0,0,0,0,5,0,0,0,0,4,0,0,0,0,0
"(9908, 9968)",0,0,0,0,0,1,0,2,0,4,0,0,0,0,0,0
"(9946, 9950)",0,0,0,0,0,0,0,0,0,1,0,0,0,4,4,0
"(9946, 9968)",0,0,0,0,0,0,0,0,0,1,0,0,0,4,0,2


#### Isolate a single depth for testing

In [80]:
pairwise_table_slice = m_isolate_by_depth.main(pairwise_table, depth)
pairwise_table_slice

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(564, 688)",4,0,0,0,1,0,0,0,22,23,0,0,0,0,0,0
"(709, 803)",23,0,18,0,9,0,0,0,0,0,0,0,0,0,0,0
"(803, 933)",0,0,0,36,0,0,0,0,0,12,0,2,0,0,0,0
"(2185, 2243)",0,0,0,0,0,0,0,0,0,0,0,0,8,0,42,0
"(2755, 2885)",0,0,0,0,0,2,8,0,0,40,0,0,0,0,0,0
"(3021, 3117)",0,0,0,0,0,37,0,0,0,3,10,0,0,0,0,0
"(3037, 3132)",0,0,0,0,15,1,0,0,0,0,0,0,10,0,24,0
"(3038, 3132)",0,0,0,0,0,0,0,0,19,1,24,0,6,0,0,0
"(3633, 3679)",0,0,0,0,0,0,0,4,0,33,0,13,0,0,0,0
"(4782, 4866)",0,0,0,0,1,0,25,15,0,0,0,0,0,0,0,9


#### Perform bi-allelic filtering

In [81]:
pairwise_biallelic_table = m_biallelic_filter_pairwise_table.main(pairwise_table_slice.copy())
pairwise_biallelic_table

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(709, 803)",23,0,18,0,9,0,0,0,0,0,0,0,0,0,0,0
"(803, 933)",0,0,0,36,0,0,0,0,0,12,0,2,0,0,0,0
"(2755, 2885)",0,0,0,0,0,2,8,0,0,40,0,0,0,0,0,0
"(3021, 3117)",0,0,0,0,0,37,0,0,0,3,10,0,0,0,0,0
"(3633, 3679)",0,0,0,0,0,0,0,4,0,33,0,13,0,0,0,0
"(5932, 6026)",0,0,0,0,0,0,0,0,0,0,2,0,0,0,7,41
"(6817, 6947)",0,0,30,1,0,0,0,0,0,0,0,19,0,0,0,0
"(7825, 7932)",4,0,0,0,0,0,0,0,33,0,0,13,0,0,0,0
"(8160, 8280)",0,0,0,0,3,0,24,0,0,0,0,0,23,0,0,0
"(8205, 8312)",0,0,0,0,3,0,0,0,13,0,34,0,0,0,0,0


#### Convert to lookup format to match against likelihood tables

In [82]:
lookup_formatted_table = m_pairwise_lookup_format_pyrho.main(pairwise_biallelic_table.copy())
lookup_formatted_table

Unnamed: 0,00,01,10,11
"(709, 803)",0.0,9.0,18.0,23.0
"(803, 933)",2.0,12.0,36.0,0.0
"(2755, 2885)",0.0,40.0,8.0,2.0
"(3021, 3117)",10.0,3.0,0.0,37.0
"(3633, 3679)",13.0,33.0,4.0,0.0
"(5932, 6026)",41.0,7.0,0.0,2.0
"(6817, 6947)",19.0,0.0,1.0,30.0
"(7825, 7932)",13.0,33.0,0.0,4.0
"(8160, 8280)",0.0,23.0,24.0,3.0
"(8205, 8312)",34.0,13.0,0.0,3.0


#### Merge lookup formatted table on likelihood table

In [83]:
merged_eq3_table, table_ids_for_eq3 = m_custom_hap_sets_and_merge.main(pairwise_biallelic_table.copy(),
                                                                           lookup_formatted_table.copy(),
                                                                           lookup_table_rho_vals,
                                                                           lookup_table)
merged_eq3_table

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.000000000000001,8.0,9.0,...,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0,d_ij
0,-63.407341,-63.450302,-63.522497,-63.601174,-63.680284,-63.757721,-63.832687,-63.904903,-63.974319,-64.040991,...,-66.056109,-66.065108,-66.073972,-66.082705,-66.091308,-66.099786,-66.108141,-66.116374,-66.12449,94
1,-48.60208,-48.141035,-48.447715,-48.823685,-49.196044,-49.549756,-49.882467,-50.19525,-50.490037,-50.768823,...,-60.091752,-60.147231,-60.202079,-60.256303,-60.309915,-60.362922,-60.415335,-60.467162,-60.51841,130
2,-43.12399,-42.730268,-42.916198,-43.160218,-43.40302,-43.632947,-43.848457,-44.050521,-44.240637,-44.42026,...,-50.564476,-50.603554,-50.642259,-50.680597,-50.718575,-50.756198,-50.793471,-50.8304,-50.866989,130
3,-48.702983,-48.407871,-48.634291,-48.918166,-49.20144,-49.471564,-49.726316,-49.966314,-50.19292,-50.407579,...,-57.711433,-57.755634,-57.799351,-57.842591,-57.885361,-57.92767,-57.969523,-58.010927,-58.051891,96
4,-53.223861,-53.219663,-53.265636,-53.321441,-53.378234,-53.433492,-53.486485,-53.537084,-53.58537,-53.631493,...,-55.188806,-55.197601,-55.206291,-55.21488,-55.223369,-55.231759,-55.240052,-55.24825,-55.256354,46
5,-40.280738,-40.23451,-40.246883,-40.270463,-40.295885,-40.320729,-40.344367,-40.366703,-40.387809,-40.407797,...,-41.084918,-41.089108,-41.093256,-41.097363,-41.10143,-41.105458,-41.109447,-41.113398,-41.117311,94
6,-51.02859,-50.302686,-50.864879,-51.507634,-52.138238,-52.738021,-53.304006,-53.837558,-54.341262,-54.817897,...,-69.190704,-69.258878,-69.326017,-69.392143,-69.457276,-69.52144,-69.584653,-69.646938,-69.708312,130
7,-52.296261,-52.300391,-52.310913,-52.324088,-52.338394,-52.3531,-52.367825,-52.382359,-52.396587,-52.410446,...,-52.86197,-52.864087,-52.866174,-52.868231,-52.870258,-52.872257,-52.874228,-52.876171,-52.878088,107
8,-57.104125,-56.751465,-57.172512,-57.678934,-58.192202,-58.692653,-59.174533,-59.636498,-60.0788,-60.502286,...,-73.019559,-73.07432,-73.128266,-73.181416,-73.233786,-73.285396,-73.336261,-73.386397,-73.43582,120
9,-50.828903,-50.817438,-50.845209,-50.881132,-50.918273,-50.954617,-50.989556,-51.022953,-51.054839,-51.085307,...,-52.122976,-52.128956,-52.134867,-52.140712,-52.14649,-52.152205,-52.157856,-52.163444,-52.168971,107


#### Calculate p_ij values for variant pairs

In [84]:
p_ij_grid = m_pij_grid_vectorised.main(recom_tract_len, lookup_table_rho_vals, merged_eq3_table.copy())
p_ij_grid

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
0,0.0,0.342771,0.685541,1.028312,1.371082,1.713853,2.056624,2.399394,2.742165,3.084935,...,31.192123,31.534894,31.877664,32.220435,32.563206,32.905976,33.248747,33.591517,33.934288,34.277059
1,0.0,0.457897,0.915794,1.37369,1.831587,2.289484,2.747381,3.205278,3.663175,4.121071,...,41.668611,42.126508,42.584405,43.042302,43.500199,43.958096,44.415992,44.873889,45.331786,45.789683
2,0.0,0.457897,0.915794,1.37369,1.831587,2.289484,2.747381,3.205278,3.663175,4.121071,...,41.668611,42.126508,42.584405,43.042302,43.500199,43.958096,44.415992,44.873889,45.331786,45.789683
3,0.0,0.349386,0.698773,1.048159,1.397545,1.746931,2.096318,2.445704,2.79509,3.144476,...,31.79415,32.143536,32.492922,32.842309,33.191695,33.541081,33.890468,34.239854,34.58924,34.938626
4,0.0,0.17579,0.351579,0.527369,0.703159,0.878949,1.054738,1.230528,1.406318,1.582107,...,15.996863,16.172652,16.348442,16.524232,16.700022,16.875811,17.051601,17.227391,17.40318,17.57897
5,0.0,0.342771,0.685541,1.028312,1.371082,1.713853,2.056624,2.399394,2.742165,3.084935,...,31.192123,31.534894,31.877664,32.220435,32.563206,32.905976,33.248747,33.591517,33.934288,34.277059
6,0.0,0.457897,0.915794,1.37369,1.831587,2.289484,2.747381,3.205278,3.663175,4.121071,...,41.668611,42.126508,42.584405,43.042302,43.500199,43.958096,44.415992,44.873889,45.331786,45.789683
7,0.0,0.385303,0.770606,1.15591,1.541213,1.926516,2.311819,2.697123,3.082426,3.467729,...,35.062594,35.447897,35.8332,36.218504,36.603807,36.98911,37.374413,37.759717,38.14502,38.530323
8,0.0,0.426744,0.853489,1.280233,1.706977,2.133721,2.560466,2.98721,3.413954,3.840699,...,38.833729,39.260474,39.687218,40.113962,40.540706,40.967451,41.394195,41.820939,42.247684,42.674428
9,0.0,0.385303,0.770606,1.15591,1.541213,1.926516,2.311819,2.697123,3.082426,3.467729,...,35.062594,35.447897,35.8332,36.218504,36.603807,36.98911,37.374413,37.759717,38.14502,38.530323


#### Get final pairwise (variant pairs) likelihoods

In [85]:
interpolated_eq2_df = m_pairwise_rho_estimator_intp_rect_biv.main(merged_eq3_table.copy(),
                                                                      table_ids_for_eq3.copy(),
                                                                      p_ij_grid.copy(),
                                                                      lookup_table,
                                                                      depth)
interpolated_eq2_df

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
0,-63.407341,-63.422067,-63.436793,-63.452346,-63.477093,-63.501839,-63.526952,-63.55392,-63.580888,-63.607893,...,-65.036863,-65.047368,-65.057873,-65.068195,-65.078416,-65.088636,-65.098658,-65.108606,-65.118553,-65.128288
1,-48.60208,-48.390969,-48.179858,-48.255638,-48.396066,-48.556552,-48.728708,-48.900122,-49.070624,-49.238869,...,-56.154402,-56.204892,-56.254754,-56.304539,-56.353562,-56.402585,-56.450869,-56.499079,-56.546717,-56.594138
2,-43.12399,-42.943706,-42.763422,-42.799748,-42.884885,-42.986838,-43.098574,-43.21006,-43.321238,-43.430858,...,-47.910164,-47.943299,-47.976044,-48.00874,-48.040962,-48.073183,-48.104946,-48.136663,-48.168024,-48.199251
3,-48.702983,-48.599875,-48.496767,-48.418775,-48.497884,-48.576992,-48.661634,-48.760816,-48.859998,-48.959093,...,-53.665635,-53.701983,-53.737893,-53.773804,-53.809324,-53.844524,-53.879723,-53.914454,-53.94897,-53.983487
4,-53.223861,-53.223123,-53.222385,-53.221647,-53.220909,-53.220171,-53.222179,-53.230261,-53.238342,-53.246424,...,-53.90649,-53.912459,-53.918424,-53.924389,-53.930354,-53.936319,-53.942229,-53.948006,-53.953783,-53.95956
5,-40.280738,-40.264893,-40.249047,-40.23486,-40.239101,-40.243343,-40.248218,-40.256301,-40.264384,-40.272623,...,-40.703714,-40.707033,-40.710352,-40.713627,-40.716878,-40.720129,-40.723333,-40.72652,-40.729706,-40.732843
6,-51.02859,-50.696201,-50.363812,-50.512772,-50.770199,-51.050946,-51.345262,-51.637083,-51.925834,-52.210855,...,-63.684077,-63.761942,-63.838615,-63.91514,-63.990215,-64.065289,-64.138955,-64.21248,-64.284914,-64.356933
7,-52.296261,-52.297852,-52.299443,-52.302031,-52.306086,-52.31014,-52.315021,-52.320097,-52.325267,-52.330779,...,-52.653839,-52.65623,-52.658621,-52.660979,-52.66331,-52.665642,-52.667919,-52.670194,-52.672449,-52.674669
8,-57.104125,-56.95363,-56.803134,-56.869456,-57.049136,-57.240231,-57.456344,-57.672457,-57.891403,-58.110437,...,-68.088163,-68.154476,-68.220118,-68.285319,-68.349307,-68.413295,-68.475812,-68.538206,-68.59971,-68.66057
9,-50.828903,-50.824486,-50.820068,-50.821768,-50.832468,-50.843168,-50.85641,-50.870252,-50.884194,-50.898504,...,-51.605119,-51.610481,-51.615844,-51.621145,-51.6264,-51.631656,-51.63681,-51.641962,-51.647077,-51.652129


#### ...Collect pairwise likelihoods across depths, bootstrap and perform final sums
Here we are only looking at one depth