In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4)
import simuPOP as sim
import numpy as np
import pandas as pd
import random
#from bokeh.plotting import figure, show
#from bokeh.io import output_notebook, export_svgs
#from scipy import linalg
from saegus import analyze, operators, parameters
np.set_printoptions(suppress=True, precision=5)

In [2]:
example_pop = sim.loadPopulation('example_pop.pop')

In [3]:
example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id', 'g', 'p'])

In [4]:
sim.tagID(example_pop)

In [5]:
sim.stat(example_pop, numOfSegSites=sim.ALL_AVAIL, vars=['segSites'])

In [6]:
segregating_loci = example_pop.dvars().segSites

In [7]:
qtl = sorted(random.sample(segregating_loci, 20))

In [None]:
trait = parameters.Trait()

In [None]:
allele_effects_table = trait.construct_allele_effects_table(example_pop, qtl, random.expovariate, 1)

In [None]:
allele_effects_array = trait.construct_ae_array(allele_effects_table, qtl)

In [None]:
heritability = 0.7

In [None]:
operators.calculate_g(example_pop, allele_effects_array)

In [None]:
operators.calculate_error_variance(example_pop, heritability)

In [None]:
operators.calculate_p(example_pop)

In [8]:
sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL)

In [9]:
allele_states = analyze.gather_allele_data(example_pop)

In [None]:
np.array(segregating_loci).shape

In [10]:
gwas = analyze.GWAS(example_pop, np.array(segregating_loci, dtype=np.int_), allele_states[:, 3], 'example')

In [11]:
count_matrix = gwas.calculate_count_matrix()

In [22]:
M = np.matrix((-1)*count_matrix + 1)

In [23]:
print(M)

[[ 0  0  0 ...,  0  0  0]
 [ 1  1  1 ...,  0  1  1]
 [ 0  1  1 ...,  0  1  1]
 ..., 
 [ 1  0  1 ..., -1  0 -1]
 [ 1 -1  1 ...,  0  1  1]
 [ 1  1  1 ...,  0  0  1]]


In [27]:
M*M.T

matrix([[34209, 12727, 16903, ..., 13230, 12727, 17480],
        [12727, 31348, 13006, ..., 12628, 13243, 13176],
        [16903, 13006, 29276, ..., 15411, 12571, 18255],
        ..., 
        [13230, 12628, 15411, ..., 27720, 13895, 15570],
        [12727, 13243, 12571, ..., 13895, 27546, 12879],
        [17480, 13176, 18255, ..., 15570, 12879, 29214]])

In [30]:
gwas.segregating_

(42837,)

In [35]:
P = np.array(list(example_pop.dvars().alleleFreq[locus][allele]
         for locus, allele in zip(segregating_loci, gwas.segregating_minor_alleles)))

In [38]:
P

array([ 0.31905,  0.21905,  0.0619 , ...,  0.46667,  0.2619 ,  0.26667])

In [65]:
Z = np.matrix(np.zeros((105, 42837)))

(105, 1)

In [69]:
Z[0, :] = M[0, :] - P

matrix([[-0.31905, -0.21905, -0.0619 , ..., -0.46667, -0.2619 , -0.26667],
        [ 0.     ,  0.     ,  0.     , ...,  0.     ,  0.     ,  0.     ],
        [ 0.     ,  0.     ,  0.     , ...,  0.     ,  0.     ,  0.     ],
        ..., 
        [ 0.     ,  0.     ,  0.     , ...,  0.     ,  0.     ,  0.     ],
        [ 0.     ,  0.     ,  0.     , ...,  0.     ,  0.     ,  0.     ],
        [ 0.     ,  0.     ,  0.     , ...,  0.     ,  0.     ,  0.     ]])

In [48]:
[M[:, i] - P[i] for i in range()

matrix([[-0.31905, -0.21905, -0.0619 , ..., -0.46667, -0.2619 , -0.26667],
        [ 0.68095,  0.78095,  0.9381 , ...,  0.53333,  0.7381 ,  0.73333],
        [-0.31905, -0.21905, -0.0619 , ..., -0.46667, -0.2619 , -0.26667],
        ..., 
        [ 0.68095,  0.78095,  0.9381 , ...,  0.53333,  0.7381 ,  0.73333],
        [ 0.68095,  0.78095,  0.9381 , ...,  0.53333,  0.7381 ,  0.73333],
        [ 0.68095,  0.78095,  0.9381 , ...,  0.53333,  0.7381 ,  0.73333]])

matrix([[-0.31905, -0.21905, -0.0619 , ..., -0.46667, -0.2619 , -0.26667],
        [ 0.68095,  0.78095,  0.9381 , ..., -0.46667,  0.7381 ,  0.73333],
        [-0.31905,  0.78095,  0.9381 , ..., -0.46667,  0.7381 ,  0.73333],
        ..., 
        [ 0.68095, -0.21905,  0.9381 , ..., -1.46667, -0.2619 , -1.26667],
        [ 0.68095, -1.21905,  0.9381 , ..., -0.46667,  0.7381 ,  0.73333],
        [ 0.68095,  0.78095,  0.9381 , ..., -0.46667, -0.2619 ,  0.73333]])

In [52]:
Z[:, 0] = M[:, 0] - P

ValueError: could not broadcast input array from shape (105,42837) into shape (105,1)

In [73]:
for i in range(105):
    Z[i, :] = M[i, :] - P

In [78]:
G = (Z*Z.T)/(2*np.sum((P*(1-P))))

In [80]:
2*np.sum((P*(1-P)))

14147.803673469387

In [79]:
G

matrix([[ 2.11118,  0.63565,  0.88774, ...,  0.66449,  0.64983,  0.93576],
        [ 0.63565,  1.99468,  0.65515, ...,  0.6648 ,  0.72916,  0.6744 ],
        [ 0.88774,  0.65515,  1.76207, ...,  0.81843,  0.63858,  0.99032],
        ..., 
        [ 0.66449,  0.6648 ,  0.81843, ...,  1.72482,  0.76853,  0.8369 ],
        [ 0.64983,  0.72916,  0.63858, ...,  0.76853,  1.75432,  0.66759],
        [ 0.93576,  0.6744 ,  0.99032, ...,  0.8369 ,  0.66759,  1.77216]])

In [None]:
print(gwas.individual_names)

In [None]:
gwas.trait_formatter(trait_file_name='example_trait.txt')

In [None]:
sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL)

In [None]:
allele_states[]

In [None]:
minor_alleles = np.array(allele_states[:, 3], dtype=np.int8)
major_alleles = np.array(allele_states[:, 4], dtype=np.int8)

In [None]:
for i, ind in enumerate(example_pop.individuals()):
    np.equal(ind.genotype(ploidy=0). minor_alleles)

In [None]:
example_ind = example_pop.individual(0)

In [None]:
np.equal(example_ind.genotype(ploidy=0), minor_alleles)
np.equal(example_ind.genotype(ploidy=1), minor_alleles)

In [None]:
np.equal()

In [None]:
true_minor = (-1/2)
false_minor = (1/2)

In [None]:
zygosity_M = 

In [None]:
segregating_minor_alleles = minor_alleles[segregating_loci]

In [None]:
count_matrix = np.array(gwas.calculate_count_matrix(segregating_minor_alleles, segregating_loci), dtype=np.int8)

In [None]:
count_matrix.shape

In [None]:
print(count_matrix)

The rows of v are the eigenvectors of a^T * a. The columns of u are the eigenvectors of a * a^T. For row i in v and column i in u the corresponding eigenvalue is s[i] ** 2

In [None]:
count_matrix = np.zeros((105, 42837), dtype=np.int8)

In [None]:
for i, ind in enumerate(example_pop.individuals()):
    ageno = np.array(ind.genotype(ploidy=0), dtype=np.int8)[segregating_loci]
    bgeno = np.array(ind.genotype(ploidy=1), dtype=np.int8)[segregating_loci]
    acomps = np.array(np.equal(segregating_minor_alleles, ageno), dtype=np.int8)
    bcomps = np.array(np.equal(segregating_minor_alleles, bgeno), dtype=np.int8)
    comp_count = acomps + bcomps
    count_matrix[i, :] = comp_count

In [None]:
column_means = np.apply_along_axis(np.mean, axis=0, arr=count_matrix)

In [None]:
print(column_means)

In [None]:
print(count_matrix)

In [None]:
shifted = np.array([count_matrix[:, i] - column_means[i] for i in range(42837)]).T

In [None]:
print(shifted)

In [None]:
P = column_means/2

In [None]:
scale = np.sqrt(P*(1-P))

In [None]:
M = np.matrix(np.array([shifted[:, i] / scale[i] for i in range(42837)]).T)

In [None]:
print(M)

In [None]:
X = (1/42837)*(M * M.T)

In [None]:
print(X)

In [None]:
eigendata = linalg.eig(X)

In [None]:
eigenvalues = np.array(eigendata[0], dtype=np.float)

In [None]:
eigenvalues

In [None]:
eigenvectors = np.array(eigendata[1], dtype=np.float)

In [None]:
print(eigenvalues)

In [None]:
print(eigenvectors)

In [None]:
sum_eigen_values = np.sum(eigenvalues)

In [None]:
eigenvalues[0]/sum_eigen_values

In [None]:
structure_covariates = np.array([eigenvalues[0]*eigenvectors[:, 0], eigenvalues[1]*eigenvectors[:, 1]]).T

In [None]:
output_matrix = pd.DataFrame(structure_covariates, 
                             index=gwas.individual_names)

In [None]:
with open('example_structure.txt', 'w') as f:
    f.write(structure_header)
    output_matrix.to_csv(f, sep='\t', index=True, header=False)

In [None]:
hapmap_columns = ['rs', 'alleles', 'chrom', 'pos',
                 'strand', 'assembly', 'center',
                 'center', 'protLSID', 'assayLSID',
                 'panelLSID', 'QCode'] + list(gwas.individual_names)

In [None]:
hapmap_columns

In [None]:
hapmap_matrix = pd.DataFrame(columns=hapmap_columns)

In [None]:
hapmap_matrix.rs = segregating_loci

In [None]:
hapmap_matrix.alleles = segregating_minor_alleles

In [None]:
chromosomes = np.array([example_pop.chromLocusPair(locus)[0] + 1
 for locus in segregating_loci], dtype=np.int8)

In [None]:
chromosomes

In [None]:
hapmap_matrix.chrom = chromosomes

In [None]:
hapmap_matrix.chrom

In [None]:
hapmap_matrix.pos = np.arange(42837)

In [None]:
hapmap_matrix.loc[:, 'strand':'QCode'] = np.core.defchararray.array(
    [['NA']*len(hapmap_matrix.pos)]*8).T

In [None]:
hapmap_matrix.loc[:, 'strand':'QCode'] = np.core.defchararray.array([['NA']*42837]*8).T

In [None]:
for i, ind in enumerate(example_pop.individuals()):
    hapmap_matrix.loc[:, gwas.individual_names[i]] = [
        ''.join(sorted(gwas.int_to_snp_conversions[a] +
                      gwas.int_to_snp_conversions[b]))
        for a, b, in zip(np.array(ind.genotype(ploidy=0))[segregating_loci], 
                         np.array(ind.genotype(ploidy=1))[segregating_loci])
    ]

In [None]:
print(np.array(hapmap_matrix))

In [None]:
with open('example_hapmap.txt', 'w') as hmp_file:
    hapmap_matrix.to_csv(hmp_file, sep='\t', index=False)

## Kinship Matrix

In [None]:
minor_alleles = np.array(allele_states[:, 3], dtype=np.int8)

In [None]:
major_alleles = np.array(allele_states[:, 4], dtype=np.int8)

In [None]:
major_alleles

In [None]:
minor_alleles[0], minor_alleles[0]

In [None]:
major_alleles[0], major_alleles[0]

In [None]:
exgenotype.T[0]

In [None]:
(-1/2)

In [None]:
comparisons = np.zeros((42837, 5))

In [None]:
minor_alleles[0]

In [None]:
comparisons[0, minor_alleles[0]] = -1/2

In [None]:
for locus, allele in enumerate(minor_alleles[segregating_loci]):
    comparisons[locus, allele] = -1/2

In [None]:
comparisons

In [None]:
for locus, allele in enumerate(major_alleles[segregating_loci]):
    comparisons[locus, allele] = 1/2

In [None]:
comparisons

In [None]:
alpha_geno = exgenotype[0][segregating_loci]

In [None]:
comparisons[range(42837), alpha_geno]

In [None]:
exgenotype[0]

In [None]:
major_alleles

In [None]:
comps = np.array(np.equal(major_alleles[segregating_loci], alpha_geno), dtype=np.int8)

In [None]:
comps

In [None]:
for i, ind in enumerate(example_pop.individuals()):
    VanRadenM[i, :] =     comparisons[:, np.array(ind.genotype(ploidy=0))[segregating_loci]] +\
    comparisons[:, np.array(ind.genotype(ploidy=1))[segregating_loci]]

In [None]:
(comparisons[:, ind.genotype(ploidy=0)[segregating_loci[i]]] +
 comparisons[:,  for i in range(42837))

In [None]:
gc.enable()

In [None]:
gc.collect()

In [None]:
gc.get_stats()

In [None]:
comparisons[:, alpha_genotype] + comparisons[:, omega_genotype]

In [None]:
comparisons[0, alpha_genotype]

In [None]:
VanRadenM = np.zeros((105, 42837))

In [None]:
VanRadenM[0, :] = 

In [None]:
minor_alleles

In [None]:
example_ind = example_pop.individual(0)

In [None]:
exgenotype = np.array([example_ind.genotype(ploidy=0), example_ind.genotype(ploidy=1)])

In [None]:
exgenotype