In [1]:
# Developing a matrix-based approach to the Pagel's lambda test
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from src.phylogenetic_signal import PagelsLambda
from src.ihmp import get_diffs, pagels_dataframe

In [2]:
diffs = get_diffs("ibd", top_n=10)
diffs

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,589277,589282,589071,851865,583117,568118,1111294,535375,370287,571178
site,patient,visit,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
feces,3002,6,-0.056920,-6.712384,1.365257,-2.937793,0.703366,1.233550,-2.230747,0.608226,-1.885603,-0.208031
feces,3002,8,0.150511,7.954029,0.507177,3.734545,-0.224880,-14.576182,-18.181474,0.344970,-13.973186,-15.904207
feces,3002,9,0.025742,-7.033436,-3.030668,-4.468496,0.028617,13.541401,18.831685,-0.412801,13.135937,14.091447
feces,3003,5,-0.810930,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,21.521774,0.000000,0.000000
feces,3003,6,-1.064244,20.588497,20.780836,21.224825,19.037478,12.972068,14.383053,-0.987078,18.848681,13.665214
feces,...,...,...,...,...,...,...,...,...,...,...,...
feces,2021,11,-0.609224,-0.483634,19.597311,20.342645,-1.966021,13.443480,14.083517,0.023898,18.417355,14.776664
feces,2026,4,-0.368932,0.072582,-3.990053,0.223622,-0.560287,0.912596,1.321320,-2.468158,-0.308956,-4.590431
feces,2027,4,0.000000,0.000000,0.000000,21.079941,0.000000,-1.252763,0.000000,0.000000,0.000000,0.000000
feces,2014,9,-4.543204,-6.209652,-4.471137,-5.842596,14.426603,14.847106,18.490956,15.360912,14.471055,0.856289


In [3]:
lams, tree = pagels_dataframe(diffs, tree_path="./greengenes/data/gg_13_5_otus_99_annotated.tree")
print(lams.head())
print(lams.values.shape)

100%|██████████| 69/69 [00:00<00:00, 152.91it/s]

site   patient  visit
feces  3002     6        0.000000
                8        0.866957
                9        0.870424
       3003     5        0.000000
                6        0.000000
Name: lambda, dtype: float64
(69,)





In [5]:
# Now we modify PagelsLambda to take matrices of values, and go with that

from src.phylogenetic_signal import PagelsLambdaMulti, PagelsLambda

plm = PagelsLambdaMulti(tree)
pl = PagelsLambda(tree)

x = diffs.values.reshape(69, 10, 1)
z0s, sigma2s, lls = plm.mle(x, plm.C)

print(z0s[0].item(), sigma2s[0].item(), lls[0].item())

z0, sigma2, ll = pl.mle(x[0, :, :], pl.C)
print(z0, sigma2, ll)

-0.0012623491281545326 1942.995182543964 -29.80356493632882
-0.039912876512239494 1942.974136785006 -29.803510778003744


In [None]:
# This is slow as hell so maybe it wasn't worth doing this

plm.fit(diffs.values)

ndim 2
Reshaped X to (69, 100, 1)
(69, 100, 1)
(100,)


In [None]:
plm.lam

0.07882134444992613

In [None]:
plm.rescale_cov(lam=0.1)

[autoreload of src.phylogenetic_signal failed: Traceback (most recent call last):
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 257, in check
    superreload(m, reload, self.old_objects)
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 480, in superreload
    update_generic(old_obj, new_obj)
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, b)
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 329, in update_class
    if update_generic(old_obj, new_obj):
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 377, in update_generic
    update(a, b)
  File "/home/phil/miniconda3/lib/python3.9/site-packages/IPython/extensions/autoreload.py", line 289, in update_function
    setattr(old, name, getattr(new, name))
Value

TypeError: rescale_cov() missing 1 required positional argument: 'self'

In [None]:
plm.lam

nan

In [None]:
x = diffs.values
x = x.reshape(-1)
x.shape

(6900,)

In [None]:
pl = PagelsLambda(tree)

z0s_single = []
sigma2s_single = []
lls_single = []
for i in range(diffs.shape[0]):
    vals = diffs.iloc[i]
    z0, sigma2, ll = pl.mle(vals.values.reshape(100, 1), pl.C)
    z0s_single.append(z0)
    sigma2s_single.append(sigma2)
    lls_single.append(ll)

print(z0s_single[:4])
print(sigma2s_single[:4])
print(lls_single[:4])

[0.06562551836789537, -13.719899331138247, 9.821876058480438, 3.053426899433133e-07]
[array([[1851.53769498]]), array([[3526.91389165]]), array([[4213.27385025]]), array([[1484.46178027]])]
[-320.07212336440216, -352.2924611936505, -361.1832984548204, -309.02391279958687]


In [None]:
np.array(z0s_single) / z0s

array([3.00229686e+01, 1.31223008e+06, 6.72507352e+05, 6.49954855e-10,
       3.56654716e+05, 5.86479269e+04, 1.74083955e+05, 2.27649979e+04,
       3.68966367e+04, 1.38526888e+06, 9.69386813e-03, 1.46517375e+06,
       4.90706588e+01, 4.87229901e+03, 9.11692187e+04, 1.49479045e+06,
       3.85243694e-06, 3.27067470e+05, 5.15835511e+03, 9.69150989e+05,
       8.54913923e+02, 9.15911341e+05, 9.33362248e+05, 1.03372746e+06,
       1.55718819e+06, 1.98523623e+05, 1.02882646e+06, 1.51719739e+04,
       5.35436918e+02, 1.43772559e-02, 1.61179114e+06, 1.60887649e+06,
       7.82640744e+03, 5.75472425e+04, 6.39453227e+05, 3.55361693e+05,
       2.29403326e-07, 7.02080466e+02, 6.92564431e+01, 4.12465683e+03,
       1.48893006e+06, 2.47239795e+03, 7.44757903e+03, 1.01354648e+03,
       1.27121582e+06, 1.06320363e+06, 1.81914806e+06, 1.57135415e+06,
       7.13094783e+02, 1.23239155e+06, 7.36440426e-07, 2.85025240e+02,
       1.38340997e+03, 1.66445270e+02, 2.61054300e-01, 9.94944590e+05,
      