#### Test file parameters: rho_5 theta_0.01 sample_size_20 depth_4 genome_size_10000 seed_1
#### The steps that follow the generation of a pairwise table:


In [70]:
#!/usr/bin/env python

import numpy as np
import pandas as pd
from ldpop import rhos_from_string

import m_isolate_by_depth
import m_biallelic_filter_pairwise_table
import m_pairwise_lookup_format_pyrho
import m_custom_hap_sets_and_merge
import m_pij_grid_vectorised
import m_pairwise_rho_estimator_intp_rect_biv

depth = 5 # Change to try different depths. Needs appropriate lookup tables though

recom_tract_len = 500
depth_range = "3,200"
n_resamples = 50
lookup_table_rho_range = "101,100"
pairwise_table_file = "../Recom_Est_Output/pairwise_table.pkl"
num_cores = 4
lookup_table_rho_vals = rhos_from_string(lookup_table_rho_range)
lookup_table = f"/Volumes/Backup/Lookup_tables/Lookup_tables_m_0.01_r_0-100/lk_downsampled_{depth}.csv"

#### Load pairwise table

In [71]:
pairwise_table = pd.read_pickle(pairwise_table_file)
pairwise_table

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(42, 50)",0,0,0,0,0,1,0,3,0,0,0,0,0,10,0,0
"(42, 76)",0,0,0,0,0,1,3,0,0,0,0,0,0,0,10,0
"(42, 91)",0,0,0,0,0,1,0,3,0,0,0,0,0,10,0,0
"(42, 106)",0,0,0,0,0,1,3,0,0,0,0,0,0,0,10,0
"(42, 107)",0,0,0,0,3,0,1,0,0,0,0,0,0,0,10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"(9908, 9950)",0,0,0,0,0,5,0,0,0,0,4,0,0,0,0,0
"(9908, 9968)",0,0,0,0,0,1,0,2,0,4,0,0,0,0,0,0
"(9946, 9950)",0,0,0,0,0,0,0,0,0,1,0,0,0,4,4,0
"(9946, 9968)",0,0,0,0,0,0,0,0,0,1,0,0,0,4,0,2


#### Isolate a single depth for testing

In [72]:
pairwise_table_slice = m_isolate_by_depth.main(pairwise_table, depth)
pairwise_table_slice

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(50, 344)",0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1
"(198, 458)",0,0,0,0,2,0,3,0,0,0,0,0,0,0,0,0
"(198, 466)",0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0
"(2744, 3037)",0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0
"(2744, 3038)",0,0,0,0,0,0,4,1,0,0,0,0,0,0,0,0
"(2755, 3037)",0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0
"(2755, 3038)",0,0,0,0,0,0,0,0,0,0,4,1,0,0,0,0
"(3267, 3527)",0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0
"(3267, 3532)",1,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0
"(3633, 3891)",0,0,0,0,0,0,0,0,0,2,3,0,0,0,0,0


#### Perform bi-allelic filtering

In [73]:
pairwise_biallelic_table = m_biallelic_filter_pairwise_table.main(pairwise_table_slice.copy())
pairwise_biallelic_table

Unnamed: 0,AA,AC,AG,AT,CA,CC,CG,CT,GA,GC,GG,GT,TA,TC,TG,TT
"(50, 344)",0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,1
"(3267, 3527)",0,1,0,3,0,0,0,0,0,0,0,1,0,0,0,0
"(3267, 3532)",1,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0
"(3689, 3969)",0,0,0,0,2,2,0,0,0,0,0,0,0,1,0,0
"(5249, 5534)",2,0,0,0,0,0,0,0,2,0,0,1,0,0,0,0
"(7234, 7522)",1,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0
"(7555, 7840)",1,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0
"(7566, 7856)",0,0,0,0,0,0,0,0,2,0,0,0,1,0,0,2
"(7579, 7856)",2,0,0,0,0,0,0,0,1,0,0,2,0,0,0,0
"(7873, 8166)",0,0,0,0,0,0,0,0,1,0,2,0,2,0,0,0


#### Convert to lookup format to match against likelihood tables

In [74]:
lookup_formatted_table = m_pairwise_lookup_format_pyrho.main(pairwise_biallelic_table.copy())
lookup_formatted_table

Unnamed: 0,00,01,10,11
"(50, 344)",1.0,0.0,2.0,2.0
"(3267, 3527)",1.0,0.0,3.0,1.0
"(3267, 3532)",1.0,0.0,3.0,1.0
"(3689, 3969)",1.0,0.0,2.0,2.0
"(5249, 5534)",1.0,2.0,0.0,2.0
"(7234, 7522)",1.0,0.0,3.0,1.0
"(7555, 7840)",0.0,3.0,1.0,1.0
"(7566, 7856)",2.0,1.0,0.0,2.0
"(7579, 7856)",2.0,1.0,0.0,2.0
"(7873, 8166)",0.0,2.0,2.0,1.0


#### Merge lookup formatted table on likelihood table

In [75]:
merged_eq3_table, table_ids_for_eq3 = m_custom_hap_sets_and_merge.main(pairwise_biallelic_table.copy(),
                                                                           lookup_formatted_table.copy(),
                                                                           lookup_table_rho_vals,
                                                                           lookup_table)
merged_eq3_table

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.000000000000001,8.0,9.0,...,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0,d_ij
0,-14.730968,-14.635944,-14.593298,-14.569315,-14.554192,-14.543955,-14.536679,-14.531316,-14.52725,-14.524097,...,-14.507561,-14.507557,-14.507554,-14.507551,-14.507547,-14.507544,-14.507541,-14.507538,-14.507536,294
1,-13.643803,-13.589609,-13.554835,-13.531097,-13.513994,-13.50114,-13.491157,-13.483195,-13.476709,-13.471329,...,-13.422153,-13.422084,-13.422016,-13.42195,-13.421884,-13.42182,-13.421758,-13.421696,-13.421636,260
2,-13.643803,-13.589609,-13.554835,-13.531097,-13.513994,-13.50114,-13.491157,-13.483195,-13.476709,-13.471329,...,-13.422153,-13.422084,-13.422016,-13.42195,-13.421884,-13.42182,-13.421758,-13.421696,-13.421636,265
3,-14.730968,-14.635944,-14.593298,-14.569315,-14.554192,-14.543955,-14.536679,-14.531316,-14.52725,-14.524097,...,-14.507561,-14.507557,-14.507554,-14.507551,-14.507547,-14.507544,-14.507541,-14.507538,-14.507536,280
4,-14.730968,-14.635944,-14.593298,-14.569315,-14.554192,-14.543955,-14.536679,-14.531316,-14.52725,-14.524097,...,-14.507561,-14.507557,-14.507554,-14.507551,-14.507547,-14.507544,-14.507541,-14.507538,-14.507536,285
5,-13.643803,-13.589609,-13.554835,-13.531097,-13.513994,-13.50114,-13.491157,-13.483195,-13.476709,-13.471329,...,-13.422153,-13.422084,-13.422016,-13.42195,-13.421884,-13.42182,-13.421758,-13.421696,-13.421636,288
6,-14.709313,-14.503764,-14.459224,-14.449067,-14.449188,-14.452646,-14.457045,-14.461492,-14.465657,-14.469434,...,-14.505944,-14.50597,-14.505996,-14.506021,-14.506045,-14.506069,-14.506092,-14.506115,-14.506136,285
7,-15.394012,-15.154997,-15.127278,-15.140172,-15.163228,-15.188054,-15.211969,-15.234133,-15.254373,-15.272757,...,-15.540587,-15.541161,-15.541725,-15.542277,-15.542819,-15.543351,-15.543873,-15.544386,-15.544889,290
8,-15.394012,-15.154997,-15.127278,-15.140172,-15.163228,-15.188054,-15.211969,-15.234133,-15.254373,-15.272757,...,-15.540587,-15.541161,-15.541725,-15.542277,-15.542819,-15.543351,-15.543873,-15.544386,-15.544889,277
9,-15.394012,-15.154997,-15.127278,-15.140172,-15.163228,-15.188054,-15.211969,-15.234133,-15.254373,-15.272757,...,-15.540587,-15.541161,-15.541725,-15.542277,-15.542819,-15.543351,-15.543873,-15.544386,-15.544889,293


#### Calculate p_ij values for variant pairs

In [76]:
p_ij_grid = m_pij_grid_vectorised.main(recom_tract_len, lookup_table_rho_vals, merged_eq3_table.copy())
p_ij_grid

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
0,0.0,0.889126,1.778252,2.667378,3.556504,4.44563,5.334755,6.223881,7.113007,8.002133,...,80.910457,81.799583,82.688709,83.577835,84.466961,85.356087,86.245213,87.134338,88.023464,88.91259
1,0.0,0.810959,1.621918,2.432877,3.243836,4.054795,4.865753,5.676712,6.487671,7.29863,...,73.79726,74.608219,75.419178,76.230137,77.041096,77.852055,78.663014,79.473973,80.284932,81.09589
2,0.0,0.82279,1.64558,2.46837,3.29116,4.11395,4.93674,5.75953,6.58232,7.405111,...,74.873896,75.696686,76.519476,77.342266,78.165056,78.987846,79.810636,80.633426,81.456216,82.279006
3,0.0,0.857582,1.715164,2.572746,3.430327,4.287909,5.145491,6.003073,6.860655,7.718237,...,78.03995,78.897532,79.755114,80.612696,81.470278,82.32786,83.185442,84.043023,84.900605,85.758187
4,0.0,0.868949,1.737898,2.606847,3.475796,4.344746,5.213695,6.082644,6.951593,7.820542,...,79.07437,79.943319,80.812268,81.681218,82.550167,83.419116,84.288065,85.157014,86.025963,86.894912
5,0.0,0.875715,1.75143,2.627145,3.50286,4.378576,5.254291,6.130006,7.005721,7.881436,...,79.690075,80.56579,81.441505,82.31722,83.192935,84.068651,84.944366,85.820081,86.695796,87.571511
6,0.0,0.868949,1.737898,2.606847,3.475796,4.344746,5.213695,6.082644,6.951593,7.820542,...,79.07437,79.943319,80.812268,81.681218,82.550167,83.419116,84.288065,85.157014,86.025963,86.894912
7,0.0,0.880203,1.760407,2.64061,3.520813,4.401016,5.28122,6.161423,7.041626,7.921829,...,80.098497,80.978701,81.858904,82.739107,83.61931,84.499514,85.379717,86.25992,87.140123,88.020327
8,0.0,0.850707,1.701414,2.55212,3.402827,4.253534,5.104241,5.954947,6.805654,7.656361,...,77.414315,78.265022,79.115729,79.966435,80.817142,81.667849,82.518556,83.369262,84.219969,85.070676
9,0.0,0.886902,1.773804,2.660706,3.547608,4.43451,5.321412,6.208314,7.095215,7.982117,...,80.708076,81.594978,82.48188,83.368782,84.255683,85.142585,86.029487,86.916389,87.803291,88.690193


#### Get final pairwise (variant pairs) likelihoods

In [77]:
interpolated_eq2_df = m_pairwise_rho_estimator_intp_rect_biv.main(merged_eq3_table.copy(),
                                                                      table_ids_for_eq3.copy(),
                                                                      p_ij_grid.copy(),
                                                                      lookup_table,
                                                                      depth)
interpolated_eq2_df

Unnamed: 0,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,...,91.0,92.0,93.0,94.0,95.0,96.0,97.0,98.0,99.0,100.0
0,-14.730968,-14.64648,-14.602755,-14.577292,-14.560899,-14.54963,-14.541519,-14.535478,-14.530856,-14.527244,...,-14.507611,-14.507606,-14.507601,-14.507597,-14.507593,-14.507588,-14.507584,-14.50758,-14.507576,-14.507573
1,-13.643803,-13.599854,-13.567983,-13.54456,-13.526927,-13.513289,-13.502866,-13.494384,-13.487274,-13.481258,...,-13.423734,-13.423648,-13.423563,-13.42348,-13.423399,-13.42332,-13.423242,-13.423166,-13.423091,-13.423017
2,-13.643803,-13.599213,-13.56716,-13.543717,-13.526118,-13.512529,-13.501953,-13.493557,-13.486521,-13.480568,...,-13.42362,-13.423535,-13.423451,-13.42337,-13.42329,-13.423211,-13.423134,-13.423059,-13.422985,-13.422913
3,-14.730968,-14.649478,-14.605445,-14.579562,-14.562807,-14.551244,-14.542896,-14.536662,-14.532063,-14.528396,...,-14.507628,-14.507623,-14.507618,-14.507613,-14.507608,-14.507603,-14.507599,-14.507595,-14.50759,-14.507586
4,-14.730968,-14.648397,-14.604476,-14.578744,-14.562119,-14.550663,-14.5424,-14.536236,-14.531576,-14.52798,...,-14.507622,-14.507617,-14.507612,-14.507607,-14.507602,-14.507598,-14.507593,-14.507589,-14.507585,-14.507581
5,-13.643803,-13.596345,-13.563479,-13.539948,-13.522497,-13.509128,-13.498601,-13.490122,-13.483158,-13.477478,...,-13.423146,-13.423065,-13.422987,-13.42291,-13.422834,-13.42276,-13.422688,-13.422617,-13.422548,-13.422479
6,-14.709313,-14.530701,-14.470898,-14.45306,-14.449125,-14.45038,-14.453586,-14.457412,-14.461277,-14.46491,...,-14.505518,-14.505552,-14.505586,-14.505618,-14.50565,-14.505681,-14.505711,-14.50574,-14.505768,-14.505796
7,-15.394012,-15.183631,-15.133919,-15.135538,-15.15218,-15.173184,-15.194779,-15.215547,-15.234976,-15.25279,...,-15.53276,-15.53341,-15.534046,-15.53467,-15.535283,-15.535884,-15.536475,-15.537054,-15.537624,-15.538183
8,-15.394012,-15.190681,-15.135554,-15.134397,-15.14946,-15.169523,-15.190547,-15.210891,-15.229826,-15.247418,...,-15.530697,-15.531364,-15.53202,-15.532662,-15.533291,-15.533908,-15.534515,-15.53511,-15.535695,-15.53627
9,-15.394012,-15.18203,-15.133548,-15.135797,-15.152798,-15.174015,-15.195741,-15.216586,-15.23606,-15.254011,...,-15.53321,-15.533856,-15.534489,-15.53511,-15.535719,-15.536318,-15.536905,-15.53748,-15.538045,-15.5386


#### ...Collect pairwise likelihoods across depths, bootstrap and perform final sums
Here we are only looking at one depth