In [1]:
import pandas as pd
import MarineDNA as md
import seaborn as sn
import numpy as np

In [2]:
file1 = "../../../Data/Flyer2018_16S_table_counts.tsv"
asvs1 = pd.read_csv(file1, index_col=0, sep="\t").transpose()

# The raw data

In [3]:
asvs1.head()

ASV ID,495c1bd1608a1dad54d3e2824ce899ef,a900b6678ce86851fb16bfafb87f3326,c8e360969108fa2125a3d56eb4dad24f,72143fd9e63fe40c1258948d2f0d79c3,7b6b178fad5599c0e9a734e4fb09fd64,4bbec3bb723375416616a87d785ac74a,0c35cfa523aa27921ef8544a16d1cd36,7ec69f2c62aad60e060e588ef687bdd0,61e9a50f4346bb3a5b16179b8eca71fa,a140195871278e8fcf9447e42bad8786,...,995cc65bcfa53a868c42615004e99ad3,46b90aab075ecd8e4db549da708550d8,c4e1933274329209b7cf24daf18dfe0d,aa9e141a5e2781d280406c513bf34d45,d7682f536589fc5f920533513dd0002b,674933a0d44342a0647f7a5b4591f26e,bebe1b9a7e9aaa78172c1208111f4570,0128431733f67d02efad766d717fe6fd,41102a7dd1f4647ba5477c947daabc0e,51440f89c391fb32f9ee895db22bf8f8
CN18Fc12_8_eDNA,552,210,145,130,156,49,0,89,190,97,...,0,0,0,0,0,0,0,0,0,0
CN18Fc19_5_eDNA,7415,1933,2089,1830,1742,488,234,595,767,918,...,0,0,0,0,0,0,0,0,0,0
CN18Fc21_6_eDNA,8749,2808,2530,2516,1761,787,632,1162,1545,1252,...,0,0,0,0,0,0,0,0,0,0
CN18Fc22_6_eDNA,8152,1967,2086,2178,1855,510,353,750,988,904,...,0,0,0,0,0,0,0,0,0,0
CN18Fc24_6_eDNA,7124,1671,2343,2256,1812,720,308,888,1179,824,...,0,0,0,0,0,0,0,0,0,0


# Extract an example row (samples) of read counts for a handful of columns (ASVs)

In [4]:
asvs1.iloc[0:5, 50:56].head()

ASV ID,9cdadd8a7359a3163fb31ad06be74e8c,f936cc0095df2ce79b485df5f7fe631a,45aae5b06129baf3325f68a675e9c8e2,defcb02ec20f29352cb1b1b267f162a4,e1453e7b5954ac141ec0b8c91939512d,2ed7d51e061664183c05fbbb56c0787e
CN18Fc12_8_eDNA,0,4,16,8,0,31
CN18Fc19_5_eDNA,229,32,185,162,0,157
CN18Fc21_6_eDNA,309,44,191,242,13,244
CN18Fc22_6_eDNA,421,58,206,163,0,178
CN18Fc24_6_eDNA,245,42,166,213,0,218


In [5]:
ex_row = asvs1.iloc[0,50:56]
ex_row

ASV ID
9cdadd8a7359a3163fb31ad06be74e8c     0
f936cc0095df2ce79b485df5f7fe631a     4
45aae5b06129baf3325f68a675e9c8e2    16
defcb02ec20f29352cb1b1b267f162a4     8
e1453e7b5954ac141ec0b8c91939512d     0
2ed7d51e061664183c05fbbb56c0787e    31
Name: CN18Fc12_8_eDNA, dtype: int64

# Total number of counts for sample

In [6]:
ex_row.sum()

59

# The observed relative percentages

In [7]:
print(ex_row / ex_row.sum())

ASV ID
9cdadd8a7359a3163fb31ad06be74e8c    0.000000
f936cc0095df2ce79b485df5f7fe631a    0.067797
45aae5b06129baf3325f68a675e9c8e2    0.271186
defcb02ec20f29352cb1b1b267f162a4    0.135593
e1453e7b5954ac141ec0b8c91939512d    0.000000
2ed7d51e061664183c05fbbb56c0787e    0.525424
Name: CN18Fc12_8_eDNA, dtype: float64


# Compute Beta parameters for each relative percentage

In [8]:
alpha = ex_row + 1
beta = ex_row.sum() - ex_row + 1
print(alpha)
print()
print(beta)

ASV ID
9cdadd8a7359a3163fb31ad06be74e8c     1
f936cc0095df2ce79b485df5f7fe631a     5
45aae5b06129baf3325f68a675e9c8e2    17
defcb02ec20f29352cb1b1b267f162a4     9
e1453e7b5954ac141ec0b8c91939512d     1
2ed7d51e061664183c05fbbb56c0787e    32
Name: CN18Fc12_8_eDNA, dtype: int64

ASV ID
9cdadd8a7359a3163fb31ad06be74e8c    60
f936cc0095df2ce79b485df5f7fe631a    56
45aae5b06129baf3325f68a675e9c8e2    44
defcb02ec20f29352cb1b1b267f162a4    52
e1453e7b5954ac141ec0b8c91939512d    60
2ed7d51e061664183c05fbbb56c0787e    29
Name: CN18Fc12_8_eDNA, dtype: int64


In [9]:
beta_draw = np.random.beta(alpha, beta)
print(beta_draw)
print(beta_draw.sum())
print()
unit_draw = beta_draw / beta_draw.sum()
print(unit_draw)
print(unit_draw.sum())

[0.01540924 0.03859206 0.25445195 0.12597038 0.00072764 0.583481  ]
1.0186322648862953

[0.01512738 0.03788615 0.24979765 0.1236662  0.00071433 0.57280828]
1.0


# Function to draw a random sample from a row

In [10]:
def betaRow(row):
    ran_row = np.random.beta(row + 1, row.sum() - row + 1)
    ran_row = ran_row / ran_row.sum()
    return ran_row

In [11]:
betaRow(ex_row)

array([0.01751629, 0.03796048, 0.25836395, 0.18905139, 0.0076531 ,
       0.48945478])

# Function to draw a random sample for the full data frame

In [12]:
ran_sample = md.ranRelPct(asvs1)
ran_sample.head()

Unnamed: 0,495c1bd1608a1dad54d3e2824ce899ef,a900b6678ce86851fb16bfafb87f3326,c8e360969108fa2125a3d56eb4dad24f,72143fd9e63fe40c1258948d2f0d79c3,7b6b178fad5599c0e9a734e4fb09fd64,4bbec3bb723375416616a87d785ac74a,0c35cfa523aa27921ef8544a16d1cd36,7ec69f2c62aad60e060e588ef687bdd0,61e9a50f4346bb3a5b16179b8eca71fa,a140195871278e8fcf9447e42bad8786,...,995cc65bcfa53a868c42615004e99ad3,46b90aab075ecd8e4db549da708550d8,c4e1933274329209b7cf24daf18dfe0d,aa9e141a5e2781d280406c513bf34d45,d7682f536589fc5f920533513dd0002b,674933a0d44342a0647f7a5b4591f26e,bebe1b9a7e9aaa78172c1208111f4570,0128431733f67d02efad766d717fe6fd,41102a7dd1f4647ba5477c947daabc0e,51440f89c391fb32f9ee895db22bf8f8
CN18Fc12_8_eDNA,-2.402741,-3.374966,-3.838382,-3.91137,-3.61224,-5.093827,-10.600477,-4.363215,-3.566411,-4.343962,...,-8.32555,-9.928938,-9.183758,-8.96742,-10.834805,-11.771702,-8.4159,-8.858444,-8.672677,-10.488615
CN18Fc19_5_eDNA,-1.599508,-3.049147,-3.021787,-3.115934,-3.17101,-4.481994,-5.182626,-4.248335,-4.011664,-3.799011,...,-12.49721,-10.170128,-12.3381,-11.606844,-10.701643,-10.688939,-12.686909,-10.237003,-12.102322,-9.991301
CN18Fc21_6_eDNA,-1.867571,-3.101671,-3.242713,-3.207179,-3.598384,-4.396055,-4.648621,-4.040823,-3.728737,-3.937653,...,-9.997324,-12.094258,-12.348176,-12.004633,-11.448648,-11.708489,-12.643347,-12.084556,-12.125963,-10.712216
CN18Fc22_6_eDNA,-1.699511,-3.226939,-3.14227,-3.155018,-3.290708,-4.642974,-4.96114,-4.13603,-4.001549,-3.961205,...,-11.194968,-16.422884,-10.54452,-9.776486,-13.263532,-9.718059,-13.662441,-10.992633,-11.457783,-10.81423
CN18Fc24_6_eDNA,-1.882898,-3.43685,-3.082224,-3.140842,-3.370288,-4.289929,-5.063632,-4.032275,-3.847984,-4.156647,...,-11.187793,-10.924223,-10.069167,-10.219007,-10.412752,-11.335183,-9.938371,-10.92367,-10.6709,-9.989883
