In [1]:
import numpy as np
import pandas as pd
import pickle as pkl

In [2]:
def H1_H2_H12(genotype_window):
    genotype_dict = {}
    for genotype in genotype_window:
        if genotype in genotype_dict:
            genotype_dict[genotype] += 1
        else:
            genotype_dict[genotype] = 1  
    AF_list = np.array(list(genotype_dict.values()))/len(genotype_window)
    AF_list = np.sort(AF_list)
    H1 = np.power(AF_list,2).sum()
    H2 = H1 - AF_list[-1]**2
    H12 = H1 + 2*AF_list[-1]*AF_list[-2]
    maxP = AF_list[-1]
    return H1,H2,H12,H2/H1,maxP,genotype_dict

In [3]:
def get_left_right_idx(left,right,positions):
    left_idx = None
    right_idx = None
    
    if left < positions[0]:
        left_idx = 0
    elif left > positions[-1]:
        left_idx = len(positions)-1
    if right < positions[0]:
        right_idx = 0
    elif right > positions[-1]:
        right_idx = len(positions)-1
    
    for i,pos in enumerate(positions):
        if (pos >= left) and (left_idx is None):
            left_idx = i
        if (pos >= right) and (right_idx is None):
            right_idx = i-1
    return left_idx, right_idx

In [6]:
def get_summary_dict(positions,genotype_list):
    step_size = 500/200000
    window_size = 4000/200000
    summary_dict = {
        'H1':[],
        'H2':[],
        'H12':[],
        'H2_H1':[],
        'maxP':[]
    }

    x_list = []

    for left in range(int((1-window_size)/step_size)+1):
        left = left*step_size
        right = left + window_size
        x_list.append((left+window_size/2)*100000)
        left_idx, right_idx = get_left_right_idx(left,right,positions)
        genotype_window = []
        for genotype in genotype_list:
            genotype_window.append(genotype[left_idx:right_idx+1])
        H1,H2,H12,H2_H1,maxP,genotype_dict = H1_H2_H12(genotype_window)
        summary_dict['H1'].append(H1)
        summary_dict['H2'].append(H2)
        summary_dict['H12'].append(H12)
        summary_dict['H2_H1'].append(H2_H1)
        summary_dict['maxP'].append(maxP)
    return summary_dict,x_list
    

In [13]:
H12_dict = {}
for version in ['Pseudo','Neutral','AdapTrack','AdapTrack_env20','Adaptive']:
    print(version)
    H12_list = []
    for rep in range(1,31):
        print(rep, end='\r', flush=True)
        target_dir = \
            f'./data/Simulation_selsweep/rep{rep}/{version}_20samples/'
        for N_gen in range(80000,100001,1000):
            print(N_gen,end='\r',flush=True)
            with open(target_dir + f'{version}_genome_{N_gen}.txt','r') as f:
                lines = f.readlines()
                positions = list(map(float, lines[2].strip().split(' ')[1:]))
                genotype_list = list(map(str.strip,lines[3:]))
            summary_dict, x_list = get_summary_dict(positions,genotype_list)
            H12_list.append(summary_dict['H12'])
    H12_dict[version] = H12_list

AdapTrack
AdapTrack_env20
Adaptive
100000

In [11]:
# with open('./data/H12_dict.pkl','wb') as f:
#     pkl.dump(H12_dict,f)