In [None]:
from wgs import *

generations_of_selection = 5
generations_of_random_mating = 3
selection_population_size = 2000
proportion_of_individuals_saved = 0.05
overshoot_as_proportion = 0.50
individuals_per_breeding_subpop = 5
initial_heritability = 0.7
meta_pop_sample_size = 100

ep = runsim.EnhancedPopulation(generations_of_selection, selection_population_size, proportion_of_individuals_saved,
                               overshoot_as_proportion, individuals_per_breeding_subpop, initial_heritability,
                               meta_pop_sample_size)
#prefounders = ep.initialize_prefounders(20, 1)
prefounders = sim.loadPopulation('prefounders.pop')
sim.tagID(prefounders, reset=True)
helpers.ae_file_reader(prefounders, 'ae.csv')
ep.initialize_population_data_storage(prefounders)
founder_indices = [2, 9, 14, 3, 7, 5, 19, 22]
pop = prefounders.clone()
ep.arbitrary_ordering_of_founders(prefounders, pop, founder_indices)
ep.generate_f_one(pop)
ep.generate_f_two(pop)
ep.mate_and_merge(pop)
replicates = sim.Simulator(pop, 4)
ep.interim_random_mating(replicates.population(0), generations_of_random_mating)
ep.interim_random_mating(replicates.population(1), generations_of_random_mating)
selection_meta_pop = ep.initial_meta_population(replicates.population(0), meta_pop_sample_size)
selection_meta_no_rm = ep.initial_meta_population(replicates.population(2), meta_pop_sample_size)
drift_meta_pop = ep.initial_meta_population(replicates.population(1), meta_pop_sample_size)
drift_meta_no_rm = ep.initial_meta_population(replicates.population(3), meta_pop_sample_size)
ep.pure_selection(replicates.population(0), selection_meta_pop)
ep.pure_selection(replicates.population(2), selection_meta_no_rm)
ep.pure_drift(replicates.population(1), drift_meta_pop)
ep.pure_drift(replicates.population(3), drift_meta_no_rm)

In [None]:
meta_filenames = ['post_selection_meta.pop', 'post_drift_meta.pop', 'post_selection_no_rm.pop', 'post_drift_no_rm.pop']
for population, filename in zip(replicates.populations(), meta_filenames):
    population.save(filename)

In [None]:
selection_meta_pop.removeSubPops(0)
selection_meta_no_rm.removeSubPops(0)
drift_meta_pop.removeSubPops(0)
drift_meta_no_rm.removeSubPops(0)

In [None]:
selection_splitlets = np.flipud(helpers.lineage_triplets_to_splitlets(selection_meta_pop))
drift_splitlets = np.flipud(helpers.lineage_triplets_to_splitlets(drift_meta_pop))
selection_norm_splitlets = np.flipud(helpers.lineage_triplets_to_splitlets(selection_meta_no_rm))
drift_norm_splitlets = np.flipud(helpers.lineage_triplets_to_splitlets(drift_meta_no_rm))

selection_sf = helpers.splitlet_frequencies(selection_splitlets)
drift_sf = helpers.splitlet_frequencies(drift_splitlets)
selection_norm_sf = helpers.splitlet_frequencies(selection_norm_splitlets)
drift_norm_sf = helpers.splitlet_frequencies(drift_norm_splitlets)
                                     
selection_major = helpers.max_finder(selection_sf)
drift_major = helpers.max_finder(drift_sf)
selection_norm_major = helpers.max_finder(selection_norm_sf)
drift_norm_major = helpers.max_finder(drift_norm_sf)

In [None]:
assert len(selection_major) == len(drift_major) == len(selection_norm_major) == len(drift_norm_major), "Meta-population sizes are unequal."

In [None]:
def multiple_replicate_genotype_matrix():
    """
    Performs all the same functions as PCA.genotype_matrix; however, the return numpy.arrays of the eigenvectors
    and eigenvalues will be three-dimensional. The third dimension is obviously the number of replicates.
    """
    major_allele_list = np.array(list(major_allele_dict.values()), dtype=int)
    return major_allele_list

In [None]:
def write_eigenanalysis_to_file(eigenvectors, eigenvalues, eigenvector_filename, eigenvalue_filename):
    np.savetxt(eigenvector_filename, eigenvectors, fmt='%0.3f')
    np.savetxt(eigenvalue_filename, eigenvalues.T, fmt='%3e.')

In [None]:
selection_evecs, evals, m, n = genotype_matrix(selection_meta_pop, selection_major, selection_splitlets)

In [None]:
evals = np.diagonal(evals)

In [None]:
genotype_matrix_inputs = [(selection_meta_pop, selection_major, selection_splitlets), (drift_meta_pop, drift_major, drift_splitlets),
(selection_meta_no_rm, selection_norm_major, selection_norm_splitlets), 
 (drift_meta_no_rm, drift_norm_major, drift_norm_splitlets)]

In [None]:
eigvecs = []
eigvals = []
for g, m, s in genotype_matrix_inputs:
    eigvec, eigval, m, n = genotype_matrix(g, m, s)
    eigvecs.append(eigvec)
    eigvals.append(eigval)

In [None]:
sns.set(context='paper', style='darkgrid', palette='Set2')

In [None]:
import matplotlib.lines as mlines

In [None]:
plot_multiple_replicate_population_structure(eigvecs, 'multiple_replicate_structure.pdf')

In [None]:
eigz = np.diagonal(eigvals[0])
eigo = np.diagonal(eigvals[1])
eigtw = np.diagonal(eigvals[2])
eigth = np.diagonal(eigvals[3])

In [None]:
for i in range(4):
    print(np.diagonal(eigvals[i])[0]/np.sum(np.diagonal(eigvals[i])))

In [None]:
selvecs = np.loadtxt('selection_eigenvectors.txt', usecols=[0,1])

In [None]:
driftvecs = np.loadtxt('drift_eigenvectors.txt', usecols=[0, 1])
selnormvecs = np.loadtxt('selection_no_rm_eigenvectors.txt', usecols=[0, 1])
driftnormvecs = np.loadtxt('drift_no_rm_eigenvectors.txt', usecols=[0, 1])

In [None]:
np.array(selvecs[:, 0], selvecs[:, 1])

In [None]:
len(selvecs)

In [None]:
len(driftvecs)

In [None]:
sns.distplot(np.diagonal(eigvals[0]))

In [None]:
plt.show()

In [None]:
combine.shape

In [None]:
plot_principal_components(selvecs, driftvecs, selnormvecs, driftnormvecs, 'four_diff_scenaries_pop_structure.pdf')