In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4)
import simuPOP as sim
import numpy as np
import pandas as pd
import random
import h5py
#from bokeh.plotting import figure, show
#from bokeh.io import output_notebook, export_svgs
#from scipy import linalg
from saegus import analyze, operators, parameters
np.set_printoptions(suppress=True, precision=8)

In [2]:
example_pop = sim.loadPopulation('example_pop.pop')

In [3]:
example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id', 'g', 'p'])

In [4]:
sim.tagID(example_pop)

In [5]:
sim.stat(example_pop, numOfSegSites=sim.ALL_AVAIL, 
         vars=['numOfSegSites','segSites', 'fixedSites'])

In [6]:
sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL)

In [7]:
segregating_loci = example_pop.dvars().segSites

In [8]:
allele_states = analyze.gather_allele_data(example_pop)

In [9]:
allele_frequencies = analyze.gather_allele_frequencies(example_pop, allele_states)

In [None]:
gwas = analyze.GWAS(example_pop, np.array(segregating_loci, dtype=np.int_), allele_states[:, 3], 'example')

In [None]:
count_matrix = gwas.calculate_count_matrix('example_count_matrix.txt')

###  Hapmap

In [None]:
gwas.hapmap_formatter(hapmap_file_name = 'example_hapmap.txt')

### Structure

In [None]:
eigenvalues, eigenvectors = gwas.pop_struct_eigendecomp(count_matrix)

In [None]:
gwas.population_structure_formatter(eigenvalues, eigenvectors, 
                         pop_struct_file_name='example_structure.txt', number_of_pcs=2)

### Kinship

In [None]:
k = gwas.calc_kinship_matrix(count_matrix)

In [None]:
k

In [None]:
import genocompute

In [None]:
%%timeit
genocompute.compute_D(P)

In [None]:
genocompute.diag_matrix_mult()

In [None]:
D

In [None]:
print(D[10])

In [None]:
2*(1/42837)*P[9]

In [None]:
1/42837

In [None]:
import scipy

In [None]:
ones = scipy.sparse.eye(42837, n=42837)

In [None]:
ones

In [None]:
V = np.matrix(((-1)*count_matrix) + 1)

In [None]:
P = np.array([example_pop.dvars().alleleFreq[locus][allele]
                  for locus, allele in zip(gwas.segregating_loci,
                       gwas.segregating_minor_alleles)])

In [None]:
P

In [None]:
ProP = (1/42837)*2*P*(1 - P)

In [None]:
ProP

In [None]:
np.matrix(ProP)

In [None]:
np.zeros((42837, 42837))

In [None]:
np.sparse

In [None]:
Z * 

In [None]:
Pc = 1 - P

In [None]:
Z

In [None]:
Zc = np.matrix(np.zeros((example_pop.popSize(),
                        len(gwas.segregating_loci))))

for i in range(example_pop.popSize()):
    Zc[i, :] = V[i, :] - (Pc - 0.5)

In [None]:
Zc

In [None]:
ZcZcT = Zc*Zc.T

In [None]:
ZZT = Z*Z.T

In [None]:
ZZT

In [None]:
ZcZcT/(2*sum_of_Pc)

In [None]:
P

In [None]:
sum_of_Pc = np.sum((Pc*(1-Pc)))

In [None]:
2*sum_of_Pc

### Trait

In [None]:
qtl = sorted(random.sample(segregating_loci, 20))
trait = parameters.Trait()
allele_effects_table = trait.construct_allele_effects_table(example_pop, qtl, random.expovariate, 1)
allele_effects_array = trait.construct_ae_array(allele_effects_table, qtl)
heritability = 0.7
operators.calculate_g(example_pop, allele_effects_array)
operators.calculate_error_variance(example_pop, heritability)
operators.calculate_p(example_pop)
tassel_trait = gwas.trait_formatter(trait_file_name='example_trait.txt')

In [None]:
example_trait_data = h5py.File('8517example_trait_data.hdf5')
example_trait_data['allele/states'] = allele_states
example_trait_data['allele'].attrs['columns'] = \
list(map(np.string_, ['locus', 'alpha', 'omega', 'minor', 'major']))
example_trait_data['allele/effects'] = allele_effects_table
example_trait_data ['allele/frequencies'] = allele_frequencies
example_trait_data['qtl'] = np.array(qtl)
example_trait_data['segregating_loci'] = segregating_loci

In [None]:
example_trait_data.close()

In [None]:
np.savetxt('example_qtl.txt', qtl, fmt='%d',)

In [None]:
allele

### Config File

In [None]:
gwas.tassel_gwas_config(config_template='example_config_file.xml',
                       hapmap_file_name='example_hapmap.txt',
                       kinship_file_name='example_kinship.txt',
                       trait_file_name='example_trait.txt',
                       structure_file_name='example_structure.txt',
                       output_prefix='example_')

In [None]:
example_pop.dvars().fixedSites

In [None]:
allele_group['states'].attrs['columns'] = list(map(np.string_, ['locus',  # metadata attached to dataset
                            'alpha', 'omega', 'minor', 'major' ]))
