# MultiOmic Data Processing
Notebook for setting up MultiOmic and Combined feature tables. Any other data processing will also be done here.

In [1]:
import pandas as pd
import os
import numpy as np
import qiime2
import biom
from sklearn.preprocessing import StandardScaler

os.chdir('/Users/tgroth/Google Drive/knight_twin_NAFLD/multiomic_analysis')

## Creating Filtered Serum Table
Filtering serum table to only contain the best performing features

In [2]:
# load in serum feature table
serum_table = qiime2.Artifact.load('../serum_analysis/feature_tables/serum-ft-hashed-matched.qza')
serum_tablepd = serum_table.view(pd.DataFrame)
serum_tablepd.head(3)

Unnamed: 0,0004069a1f2c3f2b14511a4f367e485d,0013ada35a72573fde974722d6f46269,0074f0d3d22016bcdf28ee53c8a5062b,012ade16945709e69650957b8fdeb021,015bcc773d2fe77d5ddf3533aff00a9f,0171c4a4201b9604da3efc9305fc5264,017460a73612042af020d72a944567b4,01809bb355414a0015fd764cba9c5014,01a15bb425333e9bb45a57970a4fd744,01f940cc9606a206fc0f5f4be686142a,...,fe8cbb81d1abec83ba4cc2e9f82dd552,febe3e034a4a2cb784dec089610dd34c,fecd9732b400a12637130cb39f076b17,ff0f37b5dc7b583b0990818a3ca3bd85,ff47eed94e12e32213900023846a2b25,fface4fa9e4315823b284edf46814f27,ffc858cfa01fda2a2b2c0a1344a0b87a,ffe5d35f6eb7b6d40c0b901f6576ca3c,ffeb115b9cba8e7fde3b05c27b47a913,fff8ef7ac71077d6ddc2c538ad270385
TW-CO-002,55809.107,16961.251,76560.9505,8627.2465,9663.3085,173.5485,409293.0,16418.4475,62555.667,0.0,...,0.0,25607.2775,8736.1815,2628.3335,8261.911,115.805,200.1885,1438.5715,32596.2895,0.0
TW-BT-001,141374.9595,37664.9385,51482.278,42985.9195,944092.969,349.6905,3351079.0,29538.136,87325.661,0.0,...,0.0,53155.7155,22630.8375,5028.326,5144.877,219.7935,140.6745,2776.502,212928.942,0.0
TW-CA-002,14916.993,14055.226,40373.087,25469.6935,152745.316,169.015,683127.4,42557.0155,70843.1185,318.784,...,0.0,59003.971,29346.5395,2493.696,1345.321,0.0,2407450.0,7916.8855,22446.481,0.0


In [3]:
# load in best performing serum features
serum_feats = pd.read_csv('../serum_analysis/ml_analysis/featImp_analysis/feat1-featimp-taxa.tsv',sep='\t')
serum_feats.head(3)

Unnamed: 0,feat importance,hashedID,featueID,kingdom,superclass,class,subclass,direct_parent
0,0.064646,0004069a1f2c3f2b14511a4f367e485d,1440,unclassified,unclassified,unclassified,unclassified,unclassified
1,0.023834,0013ada35a72573fde974722d6f46269,762,unclassified,unclassified,unclassified,unclassified,unclassified
2,0.049712,0074f0d3d22016bcdf28ee53c8a5062b,4423,unclassified,unclassified,unclassified,unclassified,unclassified


In [21]:
serum_filtpd = serum_tablepd.filter(items=serum_feats.hashedID)
print(serum_filtpd.shape)

(167, 49)


In [22]:
serum_filtpd.to_csv('./feature_tables/serum-ft-mlfeat.tsv',sep='\t')

## Creating Filtered Fecal Table
Filtering fecal feature table to only contain the best performing machine learning features

In [4]:
# load in fecal feature table
fecal_table = qiime2.Artifact.load('../fecal_analysis/feature_tables/fecal-ft-matched.qza')
fecal_tablepd = fecal_table.view(pd.DataFrame)
fecal_tablepd.head(3)

Unnamed: 0,00130daba4ed7c3091cebe843923b656,0035e736dc271e0ffa69ce089124dd3a,00b990effdb1e3547be73fc221c47da9,00bf884546e085e1d7341e1b4d5b4c7f,00d4834e98bff02228bc6022ad218b38,013d75d35d7937c40332f8492e0ac45d,0178e96df0dc4d287475a41115a80ff5,018aa6775f054d00a3af02b1f142bd7f,019f30f6e7668fa412b214ddaa35c150,0285b154e241302fc53e9f2aa1cb74f2,...,fbda8d94f713b714f135c441853dc12d,fc6931386367002a15c8854e10f1411f,fc69c5f3c21f8a3c984509dfa751197a,fc81ed8373e77dc1051c32775cbe8156,fc9687d47db7ef11369d303f56b588c2,fc9ed967c73557234a3a59e5f3921dcb,fd166de6a32d83ea5d14ccf2bb4a04c9,ff56dd5a9e0f7171e04d3f576d03fd87,ff6c014126b685e755c395c10017f17d,fff3d0fb371c075ad5340cf5738683da
TW-CO-002,0.0,0.0,0.0,0.0,6801.6965,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,16166.5755,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,33493.3525,0.0,0.0,0.0,76748.562,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,6085.211,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6156.011,0.0,0.0,0.0,0.0,0.0,76366.6675


In [5]:
# load in best performing fecal features
fecal_feats = pd.read_csv('../fecal_analysis/ml_analysis/sb_analysis/sb-topbot-featimp-taxa.tsv',sep='\t')
fecal_feats.head(3)

Unnamed: 0,feat importance,hashedID,featueID,ATTRIBUTE_adv_fibrosis[T.1],kingdom,superclass,class,subclass,direct_parent
0,0.020503,cdf8d23687cfc324485e2bd3e0094ff2,1019,4.605243,Organic compounds,Lipids and lipid-like molecules,Fatty Acyls,Fatty acid esters,Fatty acid esters
1,0.028817,fb758530086adf438d5ebe8f83847f2d,1022,4.28027,unclassified,unclassified,unclassified,unclassified,unclassified
2,0.014119,d2ef3361261cc1d835ad59394bfe161c,1018,4.253552,unclassified,unclassified,unclassified,unclassified,unclassified


In [23]:
fecal_filtpd = fecal_tablepd.filter(items=fecal_feats.hashedID)
print(fecal_filtpd.shape)

(167, 50)


In [24]:
fecal_filtpd.to_csv('./feature_tables/fecal-ft-mlfeat.tsv',sep='\t')

## Creating Feature Filtered Microbiome Table
Creating a microbiome feature table that only contains the top 27 performing features as identified in a previous analysis. Will also rename the index to match our TX-XX-XXX/CIRXX-XXX format and filter samples to match the metabolite tables.

In [6]:
# load in fecal microbiome table
micro_table = qiime2.Artifact.load('./feature_tables/reference-hit9k-wtax.qza')
micro_tablepd = micro_table.view(pd.DataFrame)
micro_tablepd.head(3)

Unnamed: 0,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGG,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGCTTCCCAAGTCCCTCTTAAAAGTGCGGGGCTTAACCCCGTGATGGGAAGGAAACTGGGAAGCTGGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGG,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGACGCTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGGTGTCTTGAGTACAGTAGAGGCAGGCGGAATTCGTGG,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTCAACCCATAAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGATGCTCAACATCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCATAAAGTGGGCGGAATTCGTGG,TACGTAGGTGGCGAGCGTTATCCGGAATGATTGGGCGTAAAGGGTGCGTAGGTGGCAGAACAAGTCTGGAGTAAAAGGTATGGGCTCAACCCGTACTGGCTCTGGAAACTGTTCAGCTAGAGAACAGAAGAGGACGGCGGAACTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATCATTGGGCGTAAAGAGGGAGCAGGCGGCCGCAAGGGTCTGTGGTGAAAGACCGAAGCTAAACTTCGGTAAGCCATGGAAACCGGGCGGCTAGAGTGCGGAAGAGGATCGTGGAATTCCATGT,AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAAACCATGGGCTCAACCCATGAATTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGATGGAATTCCCGG,...,TACGTAGGTGGCAAGCGTTATCCGGATTCATTGGGCGTAAAGAGCGCGTAGGCGGCCTCTCAAGCGGGATCTCTAATCCGAGGGCTCAACCCCCGGCCGGATCCCGAACTGGGAGGCTCGAGTTCAGTAGAGGCAGGCGGAATTCCCGGT,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGACAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGGAGGATCCGAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTGTGTAAGTCTGAAGTGAAAGGCATGGGCTCAACCTGTGGACTGCTTTGGAAACTGTGCAGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGACGGACTGGCAAGTCTGATGTGAAAGGCGGGGGCTCAACCCCTGGACTGCATTGGAAACTGTTAGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCACGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGTACTGCATTGGAAACTGTCGGACTAGAGTGTCGGAGGGGTAAGTGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTGTGTAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTGGAAACTGTTCATCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGATGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTGGAAACTATGCAGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCCAG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTTTGCAAGTCTGACGTGAAACTCCGGGGCTCAACTCCGGAACTGCGTTGGGAACTGTAAGGCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGTATGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGATTGGCAAGTTGGGAGTGAAATCTATGGGCTCAACCCATAAATTGCTTTCAAAACTGTCAGTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG
11635.CIR42.002,7886.0,484.0,131.0,60.0,28.0,37.0,22.0,24.0,20.0,22.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11635.TW.BE.001,66.0,20.0,2.0,41.0,305.0,424.0,0.0,0.0,0.0,96.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11635.FS.CIR2.001,6.0,0.0,0.0,0.0,485.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
# load in table containing top performing microbiome features
micro_feats = pd.read_csv('./imp.features.G1PvG3P_ageSexBMI.csv')
micro_feats.head(3)

Unnamed: 0,index,taxonomy,_feature_id,imp.score
0,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGG...,k__Bacteria; p__Firmicutes; c__Erysipelotrichi...,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGG...,0.124039
1,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAG...,k__Bacteria; p__Firmicutes; c__Clostridia; o__...,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAG...,0.084473
2,age,,age,0.073885


In [25]:
# rename the index of the microbiome table so sample names are consistent
old_index = micro_tablepd.index
new_index = [idx[6:].replace('.','-') for idx in old_index]
micro_reidx = pd.DataFrame(index=new_index, columns=micro_tablepd.columns,data=micro_tablepd.values)
micro_reidx.head(3)

Unnamed: 0,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTTGTTAAGTCAGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCTGATACTGGCAAGCTTGAGTCTCGTAGAGGGGGGTAGAATTCCAGG,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGCTTCCCAAGTCCCTCTTAAAAGTGCGGGGCTTAACCCCGTGATGGGAAGGAAACTGGGAAGCTGGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTCTGTCAAGTCGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATCCGAAACTGGCAGGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGG,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCGGACGCTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGGTGTCTTGAGTACAGTAGAGGCAGGCGGAATTCGTGG,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCTATGGGCTCAACCCATAAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAAGGTCCGGGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGCCGGAGATTAAGCGTGTTGTGAAATGTAGATGCTCAACATCTGCACTGCAGCGCGAACTGGTTTCCTTGAGTACGCATAAAGTGGGCGGAATTCGTGG,TACGTAGGTGGCGAGCGTTATCCGGAATGATTGGGCGTAAAGGGTGCGTAGGTGGCAGAACAAGTCTGGAGTAAAAGGTATGGGCTCAACCCGTACTGGCTCTGGAAACTGTTCAGCTAGAGAACAGAAGAGGACGGCGGAACTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATCATTGGGCGTAAAGAGGGAGCAGGCGGCCGCAAGGGTCTGTGGTGAAAGACCGAAGCTAAACTTCGGTAAGCCATGGAAACCGGGCGGCTAGAGTGCGGAAGAGGATCGTGGAATTCCATGT,AACGTAGGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAAACCATGGGCTCAACCCATGAATTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGATGGAATTCCCGG,...,TACGTAGGTGGCAAGCGTTATCCGGATTCATTGGGCGTAAAGAGCGCGTAGGCGGCCTCTCAAGCGGGATCTCTAATCCGAGGGCTCAACCCCCGGCCGGATCCCGAACTGGGAGGCTCGAGTTCAGTAGAGGCAGGCGGAATTCCCGGT,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGACAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGGAGGATCCGAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTGTGTAAGTCTGAAGTGAAAGGCATGGGCTCAACCTGTGGACTGCTTTGGAAACTGTGCAGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGTGCGTAGACGGACTGGCAAGTCTGATGTGAAAGGCGGGGGCTCAACCCCTGGACTGCATTGGAAACTGTTAGTCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGTATGGTGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCACGGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGTACTGCATTGGAAACTGTCGGACTAGAGTGTCGGAGGGGTAAGTGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTGTGTAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTGGAAACTGTTCATCTAGAGTGCTGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGATGCAAGCCAGATGTGAAAGCCCGGGGCTCAACCCCGGGACTGCTTTGGAAACTATGCAGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCCAG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCTTTGCAAGTCTGACGTGAAACTCCGGGGCTCAACTCCGGAACTGCGTTGGGAACTGTAAGGCTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGTATGGTGCAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGATTGGCAAGTTGGGAGTGAAATCTATGGGCTCAACCCATAAATTGCTTTCAAAACTGTCAGTCTTGAGTGGTGTAGAGGTAGGCGGAATTCCCGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCCGGAGAGGTAAGCGGAATTCCTAG
CIR42-002,7886.0,484.0,131.0,60.0,28.0,37.0,22.0,24.0,20.0,22.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-BE-001,66.0,20.0,2.0,41.0,305.0,424.0,0.0,0.0,0.0,96.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
FS-CIR2-001,6.0,0.0,0.0,0.0,485.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
# use samples from fecal/serum table to filter microbiome samples
samples = fecal_tablepd.index
micro_sfilt = micro_reidx.filter(items=samples,axis=0)
micro_sfilt.shape

(167, 5709)

In [29]:
# using featImp table to filter microbiome features
micro_ffilt = micro_sfilt.filter(items=micro_feats._feature_id)
print(micro_ffilt.shape)

(167, 27)


In [30]:
micro_ffilt.to_csv('./feature_tables/microbiome-ft-mlfeat.tsv',sep='\t')

## Creating Combined Table of Fecal and Serum Features

In [16]:
# filtering fecal and serum tables
fecal_filtpd = fecal_tablepd.filter(items=fecal_feats.hashedID)
print(fecal_filtpd.shape)

serum_filtpd = serum_tablepd.filter(items=serum_feats.hashedID)
print(serum_filtpd.shape)

(167, 50)
(167, 49)


In [33]:
# load in filtered metabolome tables
fecal_filtpd = pd.read_csv('./feature_tables/fecal-ft-mlfeat.tsv',sep='\t',index_col=0)
print(fecal_filtpd.shape)
serum_filtpd = pd.read_csv('./feature_tables/serum-ft-mlfeat.tsv',sep='\t',index_col=0)
print(serum_filtpd.shape)

(167, 50)
(167, 49)


In [34]:
combined_tablepd = pd.concat([serum_filtpd,fecal_filtpd],axis=1)
print(combined_tablepd.shape)
combined_tablepd.head(3)

(167, 99)


Unnamed: 0,0004069a1f2c3f2b14511a4f367e485d,0013ada35a72573fde974722d6f46269,0074f0d3d22016bcdf28ee53c8a5062b,012ade16945709e69650957b8fdeb021,015bcc773d2fe77d5ddf3533aff00a9f,0171c4a4201b9604da3efc9305fc5264,017460a73612042af020d72a944567b4,01809bb355414a0015fd764cba9c5014,01a15bb425333e9bb45a57970a4fd744,01f940cc9606a206fc0f5f4be686142a,...,d49b933216498ccb080d8aa7ce5d3902,35def52a34fbd75d08ef99a87b5251da,c105049a81dcab19d86f141a691ab5fb,28a02f8e5fcae1d3b8d41678bf1276fd,2ed762ffceda0c2dc67e9ae95d1fb858,178929d3e7f149a19dbd6b0edc66633b,076048a24f2e01c5f6b07bfcde6bbcd5,d2db4c8806f5f6a40fee293cb4f88f7d,7d77e5a1807f89e7ee7cf109a12d0048,8e1376b2f43f2b926aa075c7f8c32783
TW-CO-002,55809.107,16961.251,76560.9505,8627.2465,9663.3085,173.5485,409293.0,16418.4475,62555.667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
TW-BT-001,141374.9595,37664.9385,51482.278,42985.9195,944092.969,349.6905,3351079.0,29538.136,87325.661,0.0,...,104090.6035,0.0,5868.7155,28165.106,0.0,0.0,27263.175,0.0,0.0,21152.0465
TW-CA-002,14916.993,14055.226,40373.087,25469.6935,152745.316,169.015,683127.4,42557.0155,70843.1185,318.784,...,0.0,0.0,6582.587,0.0,0.0,0.0,5959.218,0.0,0.0,567411.4285


In [42]:
combined_tablepd.to_csv('./feature_tables/combined-ft.tsv',sep='\t')

In [35]:
# Scaling the feature table together
scaler = StandardScaler()
data_scaled = scaler.fit_transform(combined_tablepd.values)
combined_scaled = pd.DataFrame(data_scaled, columns=combined_tablepd.columns,index=combined_tablepd.index)
combined_scaled.head(3)

Unnamed: 0,0004069a1f2c3f2b14511a4f367e485d,0013ada35a72573fde974722d6f46269,0074f0d3d22016bcdf28ee53c8a5062b,012ade16945709e69650957b8fdeb021,015bcc773d2fe77d5ddf3533aff00a9f,0171c4a4201b9604da3efc9305fc5264,017460a73612042af020d72a944567b4,01809bb355414a0015fd764cba9c5014,01a15bb425333e9bb45a57970a4fd744,01f940cc9606a206fc0f5f4be686142a,...,d49b933216498ccb080d8aa7ce5d3902,35def52a34fbd75d08ef99a87b5251da,c105049a81dcab19d86f141a691ab5fb,28a02f8e5fcae1d3b8d41678bf1276fd,2ed762ffceda0c2dc67e9ae95d1fb858,178929d3e7f149a19dbd6b0edc66633b,076048a24f2e01c5f6b07bfcde6bbcd5,d2db4c8806f5f6a40fee293cb4f88f7d,7d77e5a1807f89e7ee7cf109a12d0048,8e1376b2f43f2b926aa075c7f8c32783
TW-CO-002,1.057551,0.71369,0.101371,0.133053,-0.286215,-0.318617,0.065534,-0.279881,0.117167,-0.301339,...,-0.156162,-0.199077,-0.213105,-0.19671,-0.211812,-0.192874,-0.213467,-0.144658,-0.350927,-0.184709
TW-BT-001,4.324877,3.21735,-0.021519,3.134937,6.154245,-0.31619,4.300775,0.274048,0.371661,-0.301339,...,8.26051,-0.199077,0.179278,2.081977,-0.211812,-0.192874,1.263011,-0.144658,-0.350927,0.189496
TW-CA-002,-0.503911,0.362269,-0.075956,1.604561,0.699963,-0.31868,0.459769,0.823721,0.202315,-0.280759,...,-0.156162,-0.199077,0.227008,-0.19671,-0.211812,-0.192874,0.109264,-0.144658,-0.350927,9.853495


In [36]:
# exporting table to .tsv
combined_scaled.to_csv('./feature_tables/combined-ft-scaled.tsv',sep='\t')

## Creating Full MultiOmic Table
Creating table with fecal metabolites, serum metabolites and fecal microbiome

In [40]:
fecal_filtpd = pd.read_csv('./feature_tables/fecal-ft-mlfeat.tsv',sep='\t',index_col=0)
print(fecal_filtpd.shape)

serum_filtpd = pd.read_csv('./feature_tables/serum-ft-mlfeat.tsv',sep='\t',index_col=0)
print(serum_filtpd.shape)

micro_filtpd = pd.read_csv('./feature_tables/microbiome-ft-mlfeat.tsv',sep='\t',index_col=0)
print(micro_filtpd.shape)

(167, 50)
(167, 49)
(167, 27)


In [41]:
full_multi_tablepd = pd.concat([fecal_filtpd,serum_filtpd,micro_filtpd],axis=1)
print(full_multi_tablepd.shape)
full_multi_tablepd.head(3)

(167, 126)


Unnamed: 0,cdf8d23687cfc324485e2bd3e0094ff2,fb758530086adf438d5ebe8f83847f2d,d2ef3361261cc1d835ad59394bfe161c,0ba75f593218ef056eefaeab285ae839,3d7f155024af16ecbe4c9baefdd571be,c0095c69fee5c767903814b366fcb860,54e3ef405d9d82cfed4858f3191c2522,6ba039413a4b798ba234b05c568ff300,53cb1796cbb423bcd59853acdf63befe,f7dd0dbd9e8f975d44295498a1c3aa85,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,14.0,0.0,19.0,10.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,8.0,3.0,0.0,0.0


In [43]:
full_multi_tablepd.to_csv('./feature_tables/full-multiomic-ft.tsv',sep='\t')

In [45]:
# scaling the full multiomic feaature table
scaler = StandardScaler()
data_scaled = scaler.fit_transform(full_multi_tablepd.values)
full_multi_scaled = pd.DataFrame(data_scaled, columns=full_multi_tablepd.columns,index=full_multi_tablepd.index)
full_multi_scaled.head(3)

Unnamed: 0,cdf8d23687cfc324485e2bd3e0094ff2,fb758530086adf438d5ebe8f83847f2d,d2ef3361261cc1d835ad59394bfe161c,0ba75f593218ef056eefaeab285ae839,3d7f155024af16ecbe4c9baefdd571be,c0095c69fee5c767903814b366fcb860,54e3ef405d9d82cfed4858f3191c2522,6ba039413a4b798ba234b05c568ff300,53cb1796cbb423bcd59853acdf63befe,f7dd0dbd9e8f975d44295498a1c3aa85,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.409194,-0.284873,0.638761,-0.418581,-0.24482,-0.281017
TW-BT-001,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.462937,-0.284873,-0.453259,-0.464552,-0.238921,-0.281017
TW-CA-002,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.471895,-0.284873,0.006539,-0.450761,-0.24482,-0.281017


In [47]:
# exporting scaled table
full_multi_scaled.to_csv('./feature_tables/full-multiomic-ft-scaled.tsv',sep='\t')

## Creating Serum Multi-Omic Table

In [48]:
serum_filtpd = pd.read_csv('./feature_tables/serum-ft-mlfeat.tsv',sep='\t',index_col=0)
print(serum_filtpd.shape)

micro_filtpd = pd.read_csv('./feature_tables/microbiome-ft-mlfeat.tsv',sep='\t',index_col=0)
print(micro_filtpd.shape)

(167, 49)
(167, 27)


In [49]:
serum_multi_tablepd = pd.concat([serum_filtpd,micro_filtpd],axis=1)
print(serum_multi_tablepd.shape)
serum_multi_tablepd.head(3)

(167, 76)


Unnamed: 0,0004069a1f2c3f2b14511a4f367e485d,0013ada35a72573fde974722d6f46269,0074f0d3d22016bcdf28ee53c8a5062b,012ade16945709e69650957b8fdeb021,015bcc773d2fe77d5ddf3533aff00a9f,0171c4a4201b9604da3efc9305fc5264,017460a73612042af020d72a944567b4,01809bb355414a0015fd764cba9c5014,01a15bb425333e9bb45a57970a4fd744,01f940cc9606a206fc0f5f4be686142a,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,55809.107,16961.251,76560.9505,8627.2465,9663.3085,173.5485,409293.0,16418.4475,62555.667,0.0,...,0.0,0.0,0.0,0.0,14.0,0.0,19.0,10.0,0.0,0.0
TW-BT-001,141374.9595,37664.9385,51482.278,42985.9195,944092.969,349.6905,3351079.0,29538.136,87325.661,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0
TW-CA-002,14916.993,14055.226,40373.087,25469.6935,152745.316,169.015,683127.4,42557.0155,70843.1185,318.784,...,0.0,0.0,0.0,0.0,0.0,0.0,8.0,3.0,0.0,0.0


In [52]:
serum_multi_tablepd.to_csv('./feature_tables/serum-multiomic-ft.tsv',sep='\t')

In [51]:
# scaling the full multiomic feaature table
scaler = StandardScaler()
data_scaled = scaler.fit_transform(serum_multi_tablepd.values)
serum_multi_scaled = pd.DataFrame(data_scaled, columns=serum_multi_tablepd.columns,index=serum_multi_tablepd.index)
serum_multi_scaled.head(3)

Unnamed: 0,0004069a1f2c3f2b14511a4f367e485d,0013ada35a72573fde974722d6f46269,0074f0d3d22016bcdf28ee53c8a5062b,012ade16945709e69650957b8fdeb021,015bcc773d2fe77d5ddf3533aff00a9f,0171c4a4201b9604da3efc9305fc5264,017460a73612042af020d72a944567b4,01809bb355414a0015fd764cba9c5014,01a15bb425333e9bb45a57970a4fd744,01f940cc9606a206fc0f5f4be686142a,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,1.057551,0.71369,0.101371,0.133053,-0.286215,-0.318617,0.065534,-0.279881,0.117167,-0.301339,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.409194,-0.284873,0.638761,-0.418581,-0.24482,-0.281017
TW-BT-001,4.324877,3.21735,-0.021519,3.134937,6.154245,-0.31619,4.300775,0.274048,0.371661,-0.301339,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.462937,-0.284873,-0.453259,-0.464552,-0.238921,-0.281017
TW-CA-002,-0.503911,0.362269,-0.075956,1.604561,0.699963,-0.31868,0.459769,0.823721,0.202315,-0.280759,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.471895,-0.284873,0.006539,-0.450761,-0.24482,-0.281017


In [53]:
serum_multi_scaled.to_csv('./feature_tables/serum-multiomic-ft-scaled.tsv',sep='\t')

## Creating Fecal MultiOmic Table

In [54]:
fecal_filtpd = pd.read_csv('./feature_tables/fecal-ft-mlfeat.tsv',sep='\t',index_col=0)
print(fecal_filtpd.shape)

micro_filtpd = pd.read_csv('./feature_tables/microbiome-ft-mlfeat.tsv',sep='\t',index_col=0)
print(micro_filtpd.shape)

(167, 50)
(167, 27)


In [55]:
fecal_multi_tablepd = pd.concat([fecal_filtpd,micro_filtpd],axis=1)
print(fecal_multi_tablepd.shape)
fecal_multi_tablepd.head(3)

(167, 77)


Unnamed: 0,cdf8d23687cfc324485e2bd3e0094ff2,fb758530086adf438d5ebe8f83847f2d,d2ef3361261cc1d835ad59394bfe161c,0ba75f593218ef056eefaeab285ae839,3d7f155024af16ecbe4c9baefdd571be,c0095c69fee5c767903814b366fcb860,54e3ef405d9d82cfed4858f3191c2522,6ba039413a4b798ba234b05c568ff300,53cb1796cbb423bcd59853acdf63befe,f7dd0dbd9e8f975d44295498a1c3aa85,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,14.0,0.0,19.0,10.0,0.0,0.0
TW-BT-001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,2.0,0.0
TW-CA-002,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,8.0,3.0,0.0,0.0


In [56]:
fecal_multi_tablepd.to_csv('./feature_tables/fecal-multiomic-ft.tsv',sep='\t')

In [57]:
# scaling the full multiomic feaature table
scaler = StandardScaler()
data_scaled = scaler.fit_transform(fecal_multi_tablepd.values)
fecal_multi_scaled = pd.DataFrame(data_scaled, columns=fecal_multi_tablepd.columns,index=fecal_multi_tablepd.index)
fecal_multi_scaled.head(3)

Unnamed: 0,cdf8d23687cfc324485e2bd3e0094ff2,fb758530086adf438d5ebe8f83847f2d,d2ef3361261cc1d835ad59394bfe161c,0ba75f593218ef056eefaeab285ae839,3d7f155024af16ecbe4c9baefdd571be,c0095c69fee5c767903814b366fcb860,54e3ef405d9d82cfed4858f3191c2522,6ba039413a4b798ba234b05c568ff300,53cb1796cbb423bcd59853acdf63befe,f7dd0dbd9e8f975d44295498a1c3aa85,...,TACGTAGGTGGCAAGCGTTGTCCGGAATTATTGGGCGTAAAGCGCGCGCAGGCGGATAGGTCAGTCTGTCTTAAAAGTTCGGGGCTTAACCCCGTGATGGGATGGAAACTGCCAATCTAGAGTATCGGAGAGGAAAGTGGAATTCCTAGT,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGTTAAGCAAGTCTGAAGTGAAAGCCCGGGGCTCAACCCCGGTACTGCTTTGGAAACTGTTTGACTTGAGTGCAGGAGAGGTAAGTGGAATTCCTAG,TACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGTGTAAAGGGCGTGCAGCCGGGCCGGCAAGTCAGATGTGAAATCTGGAGGCTTAACCTCCAAACTGCATTTGAAACTGTAGGTCTTGAGTACCGGAGAGGTTATCGGAATTCCTTG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGAAGAGCAAGTCTGATGTGAAAGGCTGGGGCTTAACCCCAGGACTGCATTGGAAACTGTTTTTCTAGAGTGCCGGAGAGGTAAGCGGAATTCCTAG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGGTGGATTGTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGAAACTGGCAGTCTTGAGTACAGTAGAGGTGGGCGGAATTCGTGG,TACGTAGGTGGCAAGCGTTATCCGGAATCATTGGGCGTAAAGGGTGCGTAGGTGGCGTACTAAGTCTGTAGTAAAAGGCAATGGCTCAACCATTGTAAGCTATGGAAACTGGTATGCTGGAGTGCAGAAGAGGGCGATGGAATTCCATGT,TACGTAGGTGGCGAGCGTTATCCGGAATTATTGGGCGTAAAGAGGGAGCAGGCGGCGGCAGAGGTCTGTGGTGAAAGACTGAAGCTTAACTTCAGTAAGCCATAGAAACCGGGCTGCTAGAGTGCAGGAGAGGATCGTGGAATTCCATGT,AACGTAGGTCACAAGCGTTGTCCGGAATTACTGGGTGTAAAGGGAGCGCAGGCGGGAAGACAAGTTGGAAGTGAAATCCATGGGCTCAACCCATGAACTGCTTTCAAAACTGTTTTTCTTGAGTAGTGCAGAGGTAGGCGGAATTCCCGG,TACGGAGGATCCGAGCGTTATCCGGATTTATTGGGTTTAAAGGGAGCGTAGATGGATGTTTAAGTCAGTTGTGAAAGTTTGCGGCTCAACCGTAAAATTGCAGTTGATACTGGATGTCTTGAGTGCAGTTGAGGCAGGCGGAATTCGTGG,TACGTAGGGGGCAAGCGTTATCCGGATTTACTGGGTGTAAAGGGAGCGTAGACGGCGAAGCAAGTCTGAAGTGAAAACCCAGGGCTCAACCCTGGGACTGCTTTGGAAACTGTTTTGCTAGAGTGTCGGAGAGGTAAGTGGAATTCCTAG
TW-CO-002,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.409194,-0.284873,0.638761,-0.418581,-0.24482,-0.281017
TW-BT-001,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.462937,-0.284873,-0.453259,-0.464552,-0.238921,-0.281017
TW-CA-002,-0.097968,-0.129545,-0.109753,-0.113338,-0.197325,-0.202576,-0.235192,-0.261255,-0.260321,-0.243276,...,-0.090252,-0.139397,-0.481841,-0.166574,-0.471895,-0.284873,0.006539,-0.450761,-0.24482,-0.281017


In [60]:
fecal_multi_scaled.to_csv('./feature_tables/fecal-multiomic-ft-scaled.tsv',sep='\t')