# Nobel Twin Study, Fecal Data Processing
Notebook for performing any data processing or data adjusting activities.

**Author: Tobin Groth (tgroth@ucsd.edu)**

In [35]:
import pandas as pd
import qiime2
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from skbio.stats import composition

os.chdir('/Users/tgroth/Google Drive/knight_twin_NAFLD/fecal_analysis')

### Creating Consistent Fecal Feature Table
In the old analysis we did not have consistent samples between our analysis. Using the samples from the old combined analysis we can ensure that our fecal, serum and multi-omic analyses have the same samples. We will pull samples from the 'fecal-combined-ft.tsv' feature table found in the old multi-omic machine learning analysis folder.

In [3]:
#loading in the fecal feature table
fecal_table = qiime2.Artifact.load('./feature_tables/merged-fecal-ft.qza')
fecal_tablepd = fecal_table.view(pd.DataFrame)
print(fecal_tablepd.shape)
fecal_tablepd.head(3)

(189, 985)


Unnamed: 0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
TW-CO-002_BG5_01_29911.mzXML,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001_BC11_01_29853.mzXML,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002_RC6_01_29817.mzXML,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


In [4]:
#combined feature table from old analysis
comb_samp = pd.read_csv('../old_analyses/combined_analysis/multiomic_ml_analysis/fecal-combined-ft.tsv',sep='\t',index_col=0)
comb_samp.head(3)

Unnamed: 0,fb758530086adf438d5ebe8f83847f2d,0ba75f593218ef056eefaeab285ae839,cdf8d23687cfc324485e2bd3e0094ff2,d2ef3361261cc1d835ad59394bfe161c,4deeef4aa3231abf469afec860b7b1ea,f2f3dfa3c15062c6cc6fdda001f9ab52,db77705855bc4611e9c4fedca4b4f926,c0095c69fee5c767903814b366fcb860,0366e39a2c8c5aeaa60e42839a8b33e9,31b4a92e41933b13e3182509ebcd71e6,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,14.0,0.0,19.0,10.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,8.0,3.0,0.0,0.0


In [5]:
#filtering feature table using combined feature table index
fecal_copy = fecal_tablepd.copy()
fecal_index = list(fecal_copy.index)
fecal_index_adj = [idx[:idx.find('_')] for idx in fecal_index]
fecal_copy.index = fecal_index_adj
fecal_filt = fecal_copy.loc[comb_samp.index]
print(fecal_filt.shape)
fecal_filt.head(3)

(171, 985)


Unnamed: 0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
TW-CO-002,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


**When creating serum table, TW-BI-001 and TW-DI-002 were duplicated so we have to drop them from our table to maintain consistent samples. CIR4-001 was 'Missing' BMI so will also be dropped. FS-CIR2-002 has NaN for feature data, will be dropped.**

In [6]:
fecal_filt_drop = fecal_filt.drop(['TW-BI-001','TW-DI-002','CIR4-001','FS-CIR2-002'])
fecal_filt_drop.shape

(167, 985)

**Final count for total samples for our feature tables is 167**

In [7]:
#adding sampleid index name
idx = fecal_filt_drop.index
new_idx = idx.rename('sampleid')
fecal_filt_drop.index = new_idx
fecal_filt_drop.head(3)

Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


In [9]:
fecal_filt_drop.to_csv('./feature_tables/fecal-ft-matched.tsv',sep='\t')

In [13]:
#creating .qza file for the fecal-ft-matched
fecal_table = pd.read_csv('./feature_tables/fecal-ft-matched.tsv',sep='\t',index_col=0)
qiime_table = qiime2.Artifact.import_data("FeatureTable[Frequency]", fecal_table)
qiime_table.save('./feature_tables/fecal-ft-matched.qza')

'./feature_tables/fecal-ft-matched.qza'

## Created Min/Max (0-1) Normalized Table

In [18]:
fecal_table = pd.read_csv('./feature_tables/fecal-ft-matched.tsv',sep='\t',index_col=0)
fecal_table.head(3)
print(fecal_table.isna().sum().sum())

0


In [30]:
# create a scaler object
scaler = MinMaxScaler()
# fit and transform the data
fecal_mnorm = pd.DataFrame(scaler.fit_transform(fecal_table), columns=fecal_table.columns, index = fecal_table.index)
print(fecal_mnorm.isna().sum().sum())
fecal_mnorm.head(3)

0


Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,0.0,0.0,0.0,0.0,0.068351,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.20242,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,0.336579,0.0,0.0,0.0,0.159474,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,0.061151,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.077079,0.0,0.0,0.0,0.0,0.0,0.925834


In [31]:
fecal_mnorm.to_csv('./feature_tables/fecal-ft-matched-mnorm.tsv',sep='\t')
qiime_table = qiime2.Artifact.import_data("FeatureTable[Frequency]", fecal_mnorm)
qiime_table.save('./feature_tables/fecal-ft-matched-mnorm.qza')

'./feature_tables/fecal-ft-matched-mnorm.qza'

## Generating Scaled Table

In [33]:
fecal_table = pd.read_csv('./feature_tables/fecal-ft-matched.tsv',sep='\t',index_col=0)
fecal_table.head(3)

Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


In [36]:
#using StandardScaler from sklearn to normalize our data
scaler = StandardScaler()
data = fecal_table.values
data_scaled = scaler.fit_transform(data)
table_scaled = pd.DataFrame(data_scaled, columns=fecal_table.columns,index=fecal_table.index)
print(table_scaled.shape)
table_scaled.head(3)

(167, 985)


Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,-0.110911,-0.198953,-0.167157,-0.096873,-0.338103,-0.197523,-0.084783,-0.241498,-0.182904,-0.127075,...,-0.110074,-0.235743,-0.096713,1.39657,-0.122678,-0.312546,-0.187024,-0.111917,-0.108236,-0.135164
TW-BT-001,-0.110911,-0.198953,-0.167157,-0.096873,0.995705,-0.197523,-0.084783,-0.241498,1.210357,-0.127075,...,-0.110074,-0.235743,-0.096713,-0.262231,-0.122678,-0.312546,-0.187024,-0.111917,-0.108236,-0.135164
TW-CA-002,-0.110911,-0.198953,-0.167157,-0.096873,-0.373907,-0.197523,-0.084783,-0.241498,-0.182904,-0.127075,...,-0.110074,-0.235743,-0.096713,0.369418,-0.122678,-0.312546,-0.187024,-0.111917,-0.108236,7.17927


In [38]:
#importing table back to qiime artifact
qiime_scaled_table = qiime2.Artifact.import_data("FeatureTable[Frequency]", table_scaled)
qiime_scaled_table.save('./feature_tables/fecal-ft-matched-scaled.qza')

'./feature_tables/fecal-ft-matched-scaled.qza'

In [39]:
#also export the scaled feature table as .tsv
table_scaled.to_csv('./feature_tables/fecal-ft-matched-scaled.tsv',sep='\t')

## Generating CLR Transformed Table

In [40]:
fecal_table = pd.read_csv('./feature_tables/fecal-ft-matched.tsv',sep='\t',index_col=0)
fecal_table.head(3)

Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


In [41]:
data = fecal_table.values
data_mr = composition.multiplicative_replacement(data)
data_clr = composition.clr(data_mr)
table_clr = pd.DataFrame(data_clr, columns=fecal_table.columns,index=fecal_table.index)
print(table_clr.shape)
table_clr.head(3)

(167, 985)


Unnamed: 0_level_0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
TW-CO-002,-1.216757,-1.216757,-1.216757,-1.216757,6.153988,-1.216757,-1.216757,-1.216757,-1.216757,-1.216757,...,-1.216757,-1.216757,-1.216757,7.019762,-1.216757,-1.216757,-1.216757,-1.216757,-1.216757,-1.216757
TW-BT-001,-1.400544,-1.400544,-1.400544,-1.400544,6.991458,-1.400544,-1.400544,-1.400544,7.820646,-1.400544,...,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544,-1.400544
TW-CA-002,-0.911016,-0.911016,-0.911016,-0.911016,6.331897,-0.911016,-0.911016,-0.911016,-0.911016,-0.911016,...,-0.911016,-0.911016,-0.911016,6.343465,-0.911016,-0.911016,-0.911016,-0.911016,-0.911016,8.861582


In [42]:
#importing table back to qiime artifact
qiime_clr_table = qiime2.Artifact.import_data("FeatureTable[Frequency]", table_clr)
qiime_clr_table.save('./feature_tables/fecal-ft-matched-clr.qza')
#also export the scaled feature table as .tsv
table_clr.to_csv('./feature_tables/fecal-ft-matched-clr.tsv',sep='\t')

## Exporting Songbird Differentials

In [43]:
#loading in the serum feature table
sb_diff = qiime2.Artifact.load('songbird_analysis/f_ATTRIBUTE_adv_fibrosis_dp_0.1_lr_0.0001_e_5000/differentials.qza')
sb_diffpd = sb_diff.view(pd.DataFrame)
sb_diffpd.head(3)

Unnamed: 0_level_0,Intercept,ATTRIBUTE_adv_fibrosis[T.1],ATTRIBUTE_adv_fibrosis[T.Missing: Not collected]
featureid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0035e736dc271e0ffa69ce089124dd3a,0.56053,0.683476,-0.095773
00d4834e98bff02228bc6022ad218b38,0.323889,0.604817,0.004227
018aa6775f054d00a3af02b1f142bd7f,-0.768673,-0.811345,0.004227


In [44]:
sb_diffpd.to_csv('./songbird_analysis/fecal-sb-differentials.tsv',sep='\t')