# Import libraries

In [1]:
import subprocess
import os
from helpers import parse_variables
import pandas as pd
import numpy as np

# Load simulation parameters

In [2]:
dict = parse_variables('geno_simulation.txt')
G = int(dict['G'])
L = int(dict['L'])
c = int(dict['c'])
k = int(dict['k'])
M = float(dict['M'])

# Define the R commands to run, passing parameters as arguments
commands = [
    f"source('geno_simulation.txt')",
    f"source('create_geno.R', echo=TRUE)",
]


commands = [
    "source('geno_simulation.txt')",
    f"G <- {G}",
    f"L <- {L}",
    f"c <- {c}",
    f"k <- {k}",
    f"M <- {M}",
    "source('create_geno.R', echo=TRUE)"
]

# Concatenate commands into a single string
r_script = ";".join(commands)

# Run the R script
result = subprocess.run(['Rscript', '-e', r_script], capture_output=True, text=True)

# Print the output
#print(result.stdout)

# Check for errors
if result.returncode != 0:
    #print("Error executing R script:")
    #print(result.stderr)
    pass

In [3]:
# Load complete
dict = parse_variables('geno_simulation.txt')
G = int(dict['G'])
L = int(dict['L'])
c = int(dict['c'])
k = int(dict['k'])
M = float(dict['M'])

# Thresholds
very_rare_threshold_L = float(dict['very_rare_threshold_L'])
very_rare_threshold_H = float(dict['very_rare_threshold_H'])

rare_threshold_L = float(dict['rare_threshold_L'])
rare_threshold_H = float(dict['rare_threshold_H'])

common_threshold_L = float(dict['common_threshold_L'])
common_threshold_H = float(dict['common_threshold_H'])

number_of_snps = (G*L)/2 # one loci per chromosome
number_of_individuals = c*k*k

very_rare = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/02_veryrare_genotype_AF_{very_rare_threshold_L}_{very_rare_threshold_H}.pkl")
rare = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/02_rare_genotype_AF_{rare_threshold_L}_{rare_threshold_H}.pkl")
common = pd.read_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/02_common_genotype_AF_{common_threshold_L}_{common_threshold_H}.pkl")
very_rare = very_rare.rename(columns=lambda x: 'VR' + x)/2
rare = rare.rename(columns=lambda x: 'R' + x)/2
common = common.rename(columns=lambda x: 'C' + x)/2
complete = pd.concat([common, rare, very_rare], axis=1)
complete = ((complete*2)-1)

In [4]:
path_output = f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/LD_blocks_estimated_mafs"

In [5]:
q2_files = [f for f in os.listdir(path_output) if f.split(f"_")[6] == 'q2']
p2_files = [f for f in os.listdir(path_output) if f.split(f"_")[6] == 'p2']
twopq_files = [f for f in os.listdir(path_output) if f.split(f"_")[6] == '2pq']

In [6]:
q2s = []
for q2_file in q2_files:
    path_q2_file = path_output + '/' + q2_file
    q2 = pd.read_pickle(path_q2_file)
    q2s.append(q2)

q2s = pd.concat(q2s, axis=1)
q2s = q2s.sort_index()
q2s = q2s[list(complete.columns)]

In [7]:
p2s = []
for p2_file in p2_files:
    path_p2_file = path_output + '/' + p2_file
    p2 = pd.read_pickle(path_p2_file)
    p2s.append(p2)

p2s = pd.concat(p2s, axis=1)
p2s = p2s.sort_index()
p2s = p2s[list(complete.columns)]

In [8]:
twopqs = []
for pq_file in twopq_files:
    path_pq_file = path_output + '/' + pq_file
    pq = pd.read_pickle(path_pq_file)
    twopqs.append(pq)

twopqs = pd.concat(twopqs, axis=1)
twopqs = twopqs.sort_index()
twopqs = twopqs[list(complete.columns)]

In [9]:
# Maybe?
error = (1 - (twopqs + p2s + q2s))/3
q2s = q2s + error
p2s = p2s + error
twopqs = twopqs + error

In [10]:
os.system(f"rm -rf {path_output}")

0

In [11]:
q2s.to_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/estimated_q2s_via_esti_pop.pkl")
p2s.to_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/estimated_p2s_via_esti_pop.pkl")
twopqs.to_pickle(f"data/G{G}_L{L}_c{c}_k{k}_M{M}/genotype/estimated_2pqs_via_esti_pop.pkl")