In [1]:
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4)
import simuPOP as sim
import numpy as np
import pandas as pd
import random
from saegus import analyze, operators, parameters
np.set_printoptions(suppress=True, precision=5)

In [2]:
example_pop = sim.loadPopulation('example_pop.pop')

In [4]:
example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id', 'g', 'p'])

In [5]:
sim.tagID(example_pop)

In [6]:
sim.stat(example_pop, numOfSegSites=sim.ALL_AVAIL, vars=['segSites'])

In [7]:
segregating_loci = example_pop.dvars().segSites

In [8]:
qtl = sorted(random.sample(segregating_loci, 20))

In [9]:
trait = parameters.Trait()

In [10]:
allele_effects_table = trait.construct_allele_effects_table(example_pop, qtl, random.expovariate, 1)

In [11]:
allele_effects_array = trait.construct_ae_array(allele_effects_table, qtl)

In [12]:
heritability = 0.7

In [13]:
operators.calculate_g(example_pop, allele_effects_array)

In [14]:
operators.calculate_error_variance(example_pop, heritability)

In [15]:
operators.calculate_p(example_pop)

In [16]:
gwas = analyze.GWAS(example_pop, segregating_loci, 'example')

In [18]:
gwas.individual_names

array(['I1', 'I2', 'I3', 'I4', 'I5', 'I6', 'I7', 'I8', 'I9', 'I10', 'I11',
       'I12', 'I13', 'I14', 'I15', 'I16', 'I17', 'I18', 'I19', 'I20',
       'I21', 'I22', 'I23', 'I24', 'I25', 'I26', 'I27', 'I28', 'I29',
       'I30', 'I31', 'I32', 'I33', 'I34', 'I35', 'I36', 'I37', 'I38',
       'I39', 'I40', 'I41', 'I42', 'I43', 'I44', 'I45', 'I46', 'I47',
       'I48', 'I49', 'I50', 'I51', 'I52', 'I53', 'I54', 'I55', 'I56',
       'I57', 'I58', 'I59', 'I60', 'I61', 'I62', 'I63', 'I64', 'I65',
       'I66', 'I67', 'I68', 'I69', 'I70', 'I71', 'I72', 'I73', 'I74',
       'I75', 'I76', 'I77', 'I78', 'I79', 'I80', 'I81', 'I82', 'I83',
       'I84', 'I85', 'I86', 'I87', 'I88', 'I89', 'I90', 'I91', 'I92',
       'I93', 'I94', 'I95', 'I96', 'I97', 'I98', 'I99', 'I100', 'I101',
       'I102', 'I103', 'I104', 'I105'], 
      dtype='<U4')

In [19]:
gwas.trait_formatter(trait_file_name='example_trait.txt')

Unnamed: 0,0,1
0,I1,37.504949302655376
1,I2,32.2765649387724
2,I3,31.74542968992492
3,I4,40.70513322362167
4,I5,50.8160772768153
5,I6,45.71957486090434
6,I7,48.962719841127836
7,I8,45.71098110400288
8,I9,48.17893195119641
9,I10,46.673284351052175


In [20]:
sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL)

In [21]:
allele_states = analyze.gather_allele_data(example_pop)

In [29]:
minor_alleles = np.array(allele_states[:, 3], dtype=np.int8)

In [35]:
segregating_minor_alleles = minor_alleles[segregating_loci]

In [36]:
ccm = gwas.calculate_count_matrix(segregating_minor_alleles, segregating_loci)

In [38]:
ccm.shape

(105, 42837)

In [317]:
print(ccm)

[[1 1 1 ..., 1 1 1]
 [0 0 0 ..., 1 0 0]
 [1 0 0 ..., 1 0 0]
 ..., 
 [0 1 0 ..., 2 1 2]
 [0 2 0 ..., 1 0 0]
 [0 0 0 ..., 1 1 0]]


In [40]:
shift = np.apply_along_axis(np.mean, axis=1, arr=ccm)
p_vector = np.divide(shift, 2)
scale = np.sqrt(np.multiply(p_vector, (1-p_vector)))

In [43]:
shift_matrix = np.zeros((ccm.shape[0], ccm.shape[1]))
scale_matrix = np.zeros((ccm.shape[0], ccm.shape[1]))

In [45]:
for i in range(42837):
    shift_matrix[:, i] = shift
    scale_matrix[:, i] = scale

In [58]:
ccm.shape

(105, 42837)

In [87]:
corrected_matrix = (ccm - shift_matrix)/scale_matrix
# singular value decomposition using scipy linalg module
s = linalg.svd(corrected_matrix, compute_uv=False, check_finite=False)

In [89]:
linalg.svdvals(corrected_matrix)

array([ 1443.83593,   985.55428,   728.16896,   679.60093,   618.2437 ,
         532.4782 ,   465.58015,   437.75929,   399.27515,   373.38854,
         349.36788,   346.0276 ,   336.1584 ,   334.58799,   331.89053,
         326.486  ,   323.45468,   319.49672,   313.31786,   307.09202,
         306.38246,   297.8251 ,   296.58476,   294.49568,   293.43124,
         290.08975,   281.68802,   280.90928,   279.3363 ,   275.83053,
         274.77253,   270.1265 ,   269.13951,   267.79877,   263.75742,
         261.98142,   261.44743,   259.4777 ,   258.11526,   256.01914,
         253.54783,   252.91525,   249.43858,   248.85473,   247.21952,
         244.64417,   241.06522,   239.11959,   237.44958,   236.37641,
         234.69801,   234.10778,   231.82163,   231.31942,   229.9256 ,
         228.10331,   227.07792,   226.08737,   225.27356,   222.16924,
         221.27317,   219.99582,   218.54394,   216.33351,   215.82817,
         215.05244,   213.63928,   213.25249,   212.32354,   211

In [92]:
orthonormal = linalg.orth(corrected_matrix)

In [93]:
orthonormal.shape

(105, 105)

In [94]:
orthonormal[:, 0]

array([-0.10837, -0.0774 , -0.11713, -0.09912, -0.07769, -0.07381,
       -0.11519, -0.08334, -0.10273, -0.11686, -0.07545, -0.11555,
       -0.0779 , -0.08523, -0.11697, -0.07348, -0.0746 , -0.0921 ,
       -0.07395, -0.1134 , -0.11777, -0.11701, -0.07583, -0.11716,
       -0.119  , -0.07226, -0.07313, -0.11559, -0.07353, -0.11075,
       -0.08314, -0.11586, -0.11607, -0.08095, -0.11326, -0.09878,
       -0.11836, -0.11212, -0.07466, -0.08264, -0.07165, -0.08299,
       -0.08307, -0.11471, -0.11791, -0.11531, -0.11509, -0.07397,
       -0.09485, -0.08092, -0.08257, -0.11611, -0.083  , -0.11833,
       -0.08178, -0.08164, -0.08106, -0.11821, -0.11706, -0.08269,
       -0.116  , -0.07958, -0.11367, -0.12068, -0.11281, -0.07617,
       -0.09353, -0.11773, -0.08083, -0.07541, -0.08285, -0.07389,
       -0.08012, -0.11601, -0.09273, -0.11622, -0.09857, -0.11443,
       -0.07271, -0.1174 , -0.07578, -0.08251, -0.11319, -0.12039,
       -0.11421, -0.0816 , -0.11626, -0.11406, -0.11167, -0.08

In [96]:
orthonormal[:, 1]

array([-0.05967,  0.08603, -0.09201,  0.06214,  0.08708,  0.07107,
       -0.07818,  0.11354, -0.01028, -0.07496,  0.09255, -0.08606,
        0.09317,  0.1465 , -0.08518,  0.08024,  0.07397,  0.00542,
        0.07425, -0.08027, -0.1012 , -0.08681,  0.08481, -0.08498,
       -0.09133,  0.07599,  0.0785 , -0.08546,  0.10415, -0.06604,
        0.18051, -0.08426, -0.08456,  0.08627, -0.0831 ,  0.0014 ,
       -0.09111, -0.07485,  0.06854,  0.10326,  0.0841 ,  0.10081,
        0.17584, -0.08778, -0.08626, -0.08063, -0.08175,  0.07535,
       -0.01109,  0.08698,  0.09612, -0.08217,  0.18397, -0.09498,
        0.17281,  0.13128,  0.09532, -0.09169, -0.09128,  0.18845,
       -0.09174,  0.16245, -0.07441, -0.09746, -0.08657,  0.08161,
        0.02007, -0.09412,  0.16888,  0.07098,  0.11734,  0.08925,
        0.09215, -0.08869, -0.01877, -0.09127,  0.00282, -0.08408,
        0.0894 , -0.0825 ,  0.0822 ,  0.1489 , -0.08047, -0.09765,
       -0.09056,  0.10827, -0.09572, -0.0877 , -0.07183,  0.09

In [None]:
from bokeh import 

In [90]:
linalg.diagsvd(linalg.svdvals(corrected_matrix), 42837, 105)

array([[ 1443.83593,     0.     ,     0.     , ...,     0.     ,
            0.     ,     0.     ],
       [    0.     ,   985.55428,     0.     , ...,     0.     ,
            0.     ,     0.     ],
       [    0.     ,     0.     ,   728.16896, ...,     0.     ,
            0.     ,     0.     ],
       ..., 
       [    0.     ,     0.     ,     0.     , ...,     0.     ,
            0.     ,     0.     ],
       [    0.     ,     0.     ,     0.     , ...,     0.     ,
            0.     ,     0.     ],
       [    0.     ,     0.     ,     0.     , ...,     0.     ,
            0.     ,     0.     ]])

In [60]:
sigma_matrix = linalg.diagsvd(s, ccm.shape[1], ccm.shape[0])

In [99]:
svd_result = np.linalg.svd(corrected_matrix, full_matrices=False)

In [102]:
svd_result[0].shape

(105, 105)

In [104]:
svd_result[2].shape

(105, 42837)

In [106]:
eigenvectors = svd_result[2]

In [111]:
eigenvectors[0]

array([-0.00298,  0.00165,  0.0058 , ..., -0.00637, -0.00044, -0.00015])

In [112]:
v = svd_result[2]

In [113]:
u = svd_result[0]

The rows of v are the eigenvectors of a^T * a. The columns of u are the eigenvectors of a * a^T. For row i in v and column i in u the corresponding eigenvalue is s[i] ** 2

In [123]:
eigenvalues = np.square(svd_result[1])

In [126]:
svd_result[1]

array([ 1443.83593,   985.55428,   728.16896,   679.60093,   618.2437 ,
         532.4782 ,   465.58015,   437.75929,   399.27515,   373.38854,
         349.36788,   346.0276 ,   336.1584 ,   334.58799,   331.89053,
         326.486  ,   323.45468,   319.49672,   313.31786,   307.09202,
         306.38246,   297.8251 ,   296.58476,   294.49568,   293.43124,
         290.08975,   281.68802,   280.90928,   279.3363 ,   275.83053,
         274.77253,   270.1265 ,   269.13951,   267.79877,   263.75742,
         261.98142,   261.44743,   259.4777 ,   258.11526,   256.01914,
         253.54783,   252.91525,   249.43858,   248.85473,   247.21952,
         244.64417,   241.06522,   239.11959,   237.44958,   236.37641,
         234.69801,   234.10778,   231.82163,   231.31942,   229.9256 ,
         228.10331,   227.07792,   226.08737,   225.27356,   222.16924,
         221.27317,   219.99582,   218.54394,   216.33351,   215.82817,
         215.05244,   213.63928,   213.25249,   212.32354,   211

In [118]:
eigenvalues[0]*eigenvectors[1]

array([ -8540.81186,  11142.10935,  -7172.1559 , ...,  11507.09766,
          130.50313,   6139.55542])

In [121]:
eigenvalues[0] = np.sum(eigenvalues)

In [125]:
eigenvalues

array([ 2084662.19109,   971317.23948,   530230.03572,   461857.4293 ,
         382225.26658,   283533.02845,   216764.8777 ,   191633.19569,
         159420.6422 ,   139419.00335,   122057.91378,   119735.10203,
         113002.46769,   111949.12298,   110151.32113,   106593.11024,
         104622.92863,   102078.15311,    98168.08241,    94305.51099,
          93870.21186,    88699.78741,    87962.51742,    86727.70845,
          86101.89449,    84152.06235,    79348.1393 ,    78910.02576,
          78028.76924,    76082.47938,    75499.94084,    72968.32653,
          72436.07671,    71716.17905,    69567.97644,    68634.26335,
          68354.76061,    67328.67536,    66623.48993,    65545.79788,
          64286.50402,    63966.12205,    62219.60447,    61928.67822,
          61117.49069,    59850.7706 ,    58112.44052,    57178.17723,
          56382.30311,    55873.80499,    55083.15774,    54806.45485,
          53741.26854,    53508.6746 ,    52865.78302,    52031.11819,
      

In [63]:
sigma_matrix.shape

(42837, 105)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [54]:
eigenvectors

array([ 1443.83593,   985.55428,   728.16896,   679.60093,   618.2437 ,
         532.4782 ,   465.58015,   437.75929,   399.27515,   373.38854,
         349.36788,   346.0276 ,   336.1584 ,   334.58799,   331.89053,
         326.486  ,   323.45468,   319.49672,   313.31786,   307.09202,
         306.38246,   297.8251 ,   296.58476,   294.49568,   293.43124,
         290.08975,   281.68802,   280.90928,   279.3363 ,   275.83053,
         274.77253,   270.1265 ,   269.13951,   267.79877,   263.75742,
         261.98142,   261.44743,   259.4777 ,   258.11526,   256.01914,
         253.54783,   252.91525,   249.43858,   248.85473,   247.21952,
         244.64417,   241.06522,   239.11959,   237.44958,   236.37641,
         234.69801,   234.10778,   231.82163,   231.31942,   229.9256 ,
         228.10331,   227.07792,   226.08737,   225.27356,   222.16924,
         221.27317,   219.99582,   218.54394,   216.33351,   215.82817,
         215.05244,   213.63928,   213.25249,   212.32354,   211

In [None]:
sum_of_eigenvalues = np.sum(eigenvalues)
fraction_of_variance = np.divide(eigenvalues, sum_of_eigenvalues)
eigen_data = {'vectors': eigenvectors, 'values': eigenvalues,
                  'fraction_variance': fraction_of_variance}

In [None]:
linalg.svd()

In [135]:
corrected_matrix

array([[ 1.29963,  1.29963,  1.29963, ...,  1.29963,  1.29963,  1.29963],
       [-1.19916, -1.19916, -1.19916, ...,  1.06826, -1.19916, -1.19916],
       [ 1.32883, -1.07238, -1.07238, ...,  1.32883, -1.07238, -1.07238],
       ..., 
       [-1.17233,  1.11984, -1.17233, ...,  3.41201,  1.11984,  3.41201],
       [-1.23525,  3.23821, -1.23525, ...,  1.00148, -1.23525, -1.23525],
       [-1.08349, -1.08349, -1.08349, ...,  1.30415,  1.30415, -1.08349]])

In [138]:
p = figure()

In [143]:
corrected_matrix.shape

(105, 42837)

In [152]:
len(example_pop.dvars().segSites)

42837

In [155]:
ccm = np.array(ccm, dtype=np.int8)

In [164]:
ccm

array([[1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       ..., 
       [0, 1, 0, ..., 2, 1, 2],
       [0, 2, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 1, 0]], dtype=int8)

In [161]:
cmatrix = np.zeros((105, 42837), dtype=np.int8)

In [174]:
for i, ind in enumerate(example_pop.individuals()):
    ageno = np.array(ind.genotype(ploidy=0), dtype=np.int8)[segregating_loci]
    bgeno = np.array(ind.genotype(ploidy=1), dtype=np.int8)[segregating_loci]
    acomps = np.array(np.equal(segregating_minor_alleles, ageno), dtype=np.int8)
    bcomps = np.array(np.equal(segregating_minor_alleles, bgeno), dtype=np.int8)
    comp_count = acomps + bcomps
    cmatrix[i, :] = comp_count

In [193]:
column_means = np.apply_along_axis(np.mean, axis=0, arr=cmatrix)

In [197]:
print(column_means)

[ 0.6381   0.4381   0.12381 ...,  0.93333  0.52381  0.53333]


In [234]:
cmatrix

array([[1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       ..., 
       [0, 1, 0, ..., 2, 1, 2],
       [0, 2, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 1, 0]], dtype=int8)

In [229]:
shifted = np.array([cmatrix[:, i] - column_means[i] for i in range(42837)]).T

In [235]:
print(shifted)

[[ 0.3619   0.5619   0.87619 ...,  0.06667  0.47619  0.46667]
 [-0.6381  -0.4381  -0.12381 ...,  0.06667 -0.52381 -0.53333]
 [ 0.3619  -0.4381  -0.12381 ...,  0.06667 -0.52381 -0.53333]
 ..., 
 [-0.6381   0.5619  -0.12381 ...,  1.06667  0.47619  1.46667]
 [-0.6381   1.5619  -0.12381 ...,  0.06667 -0.52381 -0.53333]
 [-0.6381  -0.4381  -0.12381 ...,  0.06667  0.47619 -0.53333]]


In [237]:
P = column_means/2

In [244]:
scale = np.sqrt(P*(1-P))

In [256]:
M = np.matrix(np.array([shifted[:, i] / scale[i] for i in range(42837)]).T)

In [257]:
print(M)

[[ 0.77644  1.35857  3.63591 ...,  0.13363  1.08306  1.05529]
 [-1.36899 -1.05922 -0.51377 ...,  0.13363 -1.19137 -1.20605]
 [ 0.77644 -1.05922 -0.51377 ...,  0.13363 -1.19137 -1.20605]
 ..., 
 [-1.36899  1.35857 -0.51377 ...,  2.13809  1.08306  3.31662]
 [-1.36899  3.77636 -0.51377 ...,  0.13363 -1.19137 -1.20605]
 [-1.36899 -1.05922 -0.51377 ...,  0.13363  1.08306 -1.20605]]


In [254]:
(1/42837)*M

array([[ 0.00002,  0.00003,  0.00008, ...,  0.     ,  0.00003,  0.00002],
       [-0.00003, -0.00002, -0.00001, ...,  0.     , -0.00003, -0.00003],
       [ 0.00002, -0.00002, -0.00001, ...,  0.     , -0.00003, -0.00003],
       ..., 
       [-0.00003,  0.00003, -0.00001, ...,  0.00005,  0.00003,  0.00008],
       [-0.00003,  0.00009, -0.00001, ...,  0.     , -0.00003, -0.00003],
       [-0.00003, -0.00002, -0.00001, ...,  0.     ,  0.00003, -0.00003]])

In [259]:
X = (1/42837)*(M * M.T)

In [262]:
print(X)

[[ 2.31946 -0.23581  0.01629 ..., -0.22222 -0.15755  0.11152]
 [-0.23581  2.86221 -0.24749 ..., -0.08434  0.17242 -0.21032]
 [ 0.01629 -0.24749  1.52916 ..., -0.01163 -0.23874  0.17398]
 ..., 
 [-0.22222 -0.08434 -0.01163 ...,  2.15091  0.22036  0.02144]
 [-0.15755  0.17242 -0.23874 ...,  0.22036  2.70727 -0.17811]
 [ 0.11152 -0.21032  0.17398 ...,  0.02144 -0.17811  1.55631]]


In [286]:
eigendata = linalg.eig(X)

In [292]:
eigenvalues = np.array(eigendata[0], dtype=np.float)

  """Entry point for launching an IPython kernel.


In [294]:
eigenvectors = np.array(eigendata[1], dtype=np.float)

In [295]:
print(eigenvectors)

[[-0.08314 -0.0391  -0.035   ..., -0.01993  0.00391 -0.00073]
 [ 0.08093  0.15168 -0.20354 ...,  0.02106  0.08078  0.10215]
 [-0.1107  -0.01949  0.00632 ..., -0.08702  0.0631  -0.05064]
 ..., 
 [-0.01524  0.028    0.00578 ..., -0.12245 -0.03194 -0.12669]
 [ 0.07456  0.08529 -0.06012 ...,  0.02345 -0.04345 -0.02508]
 [-0.10475 -0.01612  0.00005 ..., -0.07645 -0.05062  0.0532 ]]


In [296]:
print(eigenvalues)

[ 24.35496  14.43709  10.95      9.87834   8.96032   5.66113   5.13832
   4.69216   3.68163   3.48883   3.30212   0.        3.14576   3.06948
   2.95014   2.93696   2.82475   2.68146   2.64672   2.56109   2.5032
   2.43017   2.44561   0.45029   2.33185   2.26826   2.22576   2.20675
   0.52016   0.53806   0.55489   2.14117   0.59063   0.61377   0.65783
   2.12336   2.09859   2.06955   2.02005   2.00959   0.69339   1.99597
   0.71133   0.73246   1.93089   0.75211   1.90489   0.76822   1.89395
   1.86385   1.82935   0.80614   0.82226   0.81851   0.85179   0.86758
   1.78954   1.76452   0.89322   1.74747   1.71413   0.88218   0.91721
   0.93122   0.93741   0.95435   1.67932   1.65778   0.97081   1.01248
   1.64478   1.02182   1.60899   1.04379   1.08803   1.07039   1.06256
   1.16961   1.5986    1.13442   1.57835   1.19701   1.20852   1.13875
   1.56733   1.24321   1.57096   1.49243   1.5277    1.25801   1.27303
   1.44836   1.45728   1.41486   1.54286   1.31309   1.3893    1.36034
   1.46

In [288]:
eigenvalues

array([ 24.35496+0.j,  14.43709+0.j,  10.95000+0.j,   9.87834+0.j,
         8.96032+0.j,   5.66113+0.j,   5.13832+0.j,   4.69216+0.j,
         3.68163+0.j,   3.48883+0.j,   3.30212+0.j,   0.00000+0.j,
         3.14576+0.j,   3.06948+0.j,   2.95014+0.j,   2.93696+0.j,
         2.82475+0.j,   2.68146+0.j,   2.64672+0.j,   2.56109+0.j,
         2.50320+0.j,   2.43017+0.j,   2.44561+0.j,   0.45029+0.j,
         2.33185+0.j,   2.26826+0.j,   2.22576+0.j,   2.20675+0.j,
         0.52016+0.j,   0.53806+0.j,   0.55489+0.j,   2.14117+0.j,
         0.59063+0.j,   0.61377+0.j,   0.65783+0.j,   2.12336+0.j,
         2.09859+0.j,   2.06955+0.j,   2.02005+0.j,   2.00959+0.j,
         0.69339+0.j,   1.99597+0.j,   0.71133+0.j,   0.73246+0.j,
         1.93089+0.j,   0.75211+0.j,   1.90489+0.j,   0.76822+0.j,
         1.89395+0.j,   1.86385+0.j,   1.82935+0.j,   0.80614+0.j,
         0.82226+0.j,   0.81851+0.j,   0.85179+0.j,   0.86758+0.j,
         1.78954+0.j,   1.76452+0.j,   0.89322+0.j,   1.74747+

In [282]:
eigenvectors = eigendata[1]

In [284]:
print(eigenvectors)

[[-0.09759 -0.05676 -0.02138 ...,  0.035    0.0391  -0.08314]
 [-0.09759  0.0021   0.00156 ...,  0.20354 -0.15168  0.08093]
 [-0.09759  0.10219 -0.09886 ..., -0.00632  0.01949 -0.1107 ]
 ..., 
 [-0.09759  0.007   -0.01118 ..., -0.00578 -0.028   -0.01524]
 [-0.09759  0.00054 -0.00478 ...,  0.06012 -0.08529  0.07456]
 [-0.09759 -0.18173 -0.06451 ..., -0.00005  0.01612 -0.10475]]


In [285]:
eigenvalues[-1] * eigenvectors[:, -1]

array([-2.02482,  1.97097, -2.6962 ,  0.84177,  2.0406 ,  1.66108,
       -2.40613,  2.11743, -0.75642, -2.35453,  2.16698, -2.53924,
        1.83827,  2.95061, -2.56161,  1.89005,  1.74837, -0.25756,
        1.76071, -2.41473, -2.86635, -2.58557,  1.99133, -2.55591,
       -2.67065,  1.90744,  1.84266, -2.55653,  2.44165, -2.16026,
        3.75216, -2.55362, -2.53788,  1.65159, -2.52492, -0.49618,
       -2.68008, -2.28834,  1.70432,  2.02596,  2.06723,  1.93848,
        3.72435, -2.62214, -2.59927, -2.45411, -2.49021,  1.7809 ,
       -0.66636,  1.63043,  1.80297, -2.47125,  3.75597, -2.75059,
        3.62891,  2.71953,  1.7938 , -2.66145, -2.68045,  3.96874,
       -2.65409,  3.50942, -2.38225, -2.81748, -2.54489,  1.87352,
       -0.05592, -2.73591,  3.62292,  1.63186,  2.17426,  2.09993,
        1.78728, -2.60013, -0.7729 , -2.66278, -0.48228, -2.52339,
        2.13256, -2.519  ,  1.86885,  3.03094, -2.42255, -2.80514,
       -2.64434,  2.10264, -2.73322, -2.60979, -2.2384 ,  1.86

In [298]:
sum_eigen_values = np.sum(eigenvalues)

In [301]:
eigenvalues[0]/sum_eigen_values

0.10414233649249059

In [343]:
del p

In [344]:
p = figure(title="PCA Plot of Components PC1 x PC2")

In [345]:
p.output_backend = "svg"

In [346]:
p.xaxis.axis_label = "PC1"

In [347]:
p.yaxis.axis_label = "PC2"

In [348]:
p.circle(eigenvalues[0]*eigenvectors[:, 0], eigenvalues[1]*eigenvectors[:, 1])

In [331]:
output_notebook()

In [332]:
show(p)

In [334]:
from bokeh.io import export_svgs

In [351]:
export_svgs(p, filename="PC1xPC2.svg")

['PC1xPC2.svg']

In [358]:
structure_covariates = np.array([eigenvalues[0]*eigenvectors[:, 0], eigenvalues[1]*eigenvectors[:, 1]]).T

In [374]:
output_matrix = pd.DataFrame(structure_covariates, 
                             index=gwas.individual_names)

In [379]:
structure_header = "<Covariate>\t\t\n<Trait>\td1\td2\n"

In [380]:
with open('example_structure.txt', 'w') as f:
    f.write(structure_header)
    output_matrix.to_csv(f, sep='\t', index=True, header=False)