## **Attempting Multi-class Regression via Random Forest**

what I want to do:
- use the relative abundances from baseline to predict relative abundances at day 3 (bloom day)
- may just start with a couple microbe families and then go from there?

In [27]:
import pandas as pd
import numpy as np
import random as rnd
from functools import reduce

from sklearn.multioutput import MultiOutputRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score, mean_squared_error

**Functions**

In [2]:
def match_ids(input_table,
              id_col,
              id_dict):
    input_table[id_col] = input_table[id_col].map(id_dict) 
    return(input_table)


def long_to_wide(input_table,
                 abun_col,
                 tax_col):
    mini_input_table = input_table.loc[:, ("mouse_id", abun_col, tax_col)]
    output_table = mini_input_table.pivot_table(abun_col, "mouse_id", tax_col)
    return(output_table)

## will run the desired ml model (or list of models via a for loop)
## output is a python dictionary (aka "named list") of a few dataframes
def run_models(wanted_model,
               x_train,
               y_train,
               x_test,
               y_test):
    
    model_out = {}
    wanted_model.fit(x_train, y_train)
    model_y_pred = wanted_model.predict(x_test)
    r2_model = round(wanted_model.score(x_train, y_train) * 100, 2)
    mse_model =  mean_squared_error(y_test, model_y_pred)
    model_out.update({"y_pred": model_y_pred,
                      "r2_score": r2_model,
                      "mse_score": mse_model})
    return(model_out)

**File paths**

In [3]:
start_tax_relAbund_fp = "../data/newExp_ml_out.tsv"
meta_fp = "../data/ml_approved_metadata.tsv"
meta_dict_fp = "../data/meta_dict_keys.tsv"

In [4]:
start_tax_relAbund = pd.read_csv(start_tax_relAbund_fp, sep="\t")
meta = pd.read_csv(meta_fp, sep="\t")
meta_dict = pd.read_csv(meta_dict_fp, sep="\t")

In [5]:
## encoding the mouse ids here the same as the mouse ids in the metadata 
mouse_id_key = dict(zip(meta_dict.mouse_id, meta_dict.assigned_num))

tax_relAbund = match_ids(input_table=start_tax_relAbund,
                         id_col="mouse_id",
                         id_dict=mouse_id_key)

tax_relAbund = tax_relAbund.dropna(subset=["abund_bloomDay", "rel_abund_bloomDay"])

tax_relAbund

Unnamed: 0.1,Unnamed: 0,abund_baseline,abund_bloomDay,asv,mouse_id,rel_abund_baseline,rel_abund_bloomDay,sampleid_baseline,sampleid_bloomDay,tax_class,tax_domain,tax_family,tax_genus,tax_order,tax_phylum,tax_species
0,0,690.0,0.0,2b7b5b3f7fc005ae8c623d6d61947eca,23,0.006900,0.000000,CDD02.Tc.HFLF.3.00,CDD02.Tc.HFLF.3.18,c__Bacteroidia,d__Bacteria,f__Muribaculaceae,g__Muribaculaceae,o__Bacteroidales,p__Bacteroidota,s__uncultured_Bacteroidales
1,1,1504.0,0.0,2b7b5b3f7fc005ae8c623d6d61947eca,14,0.015040,0.000000,CDD02.Tc.LFLF.4.00,CDD02.Tc.LFLF.4.18,c__Bacteroidia,d__Bacteria,f__Muribaculaceae,g__Muribaculaceae,o__Bacteroidales,p__Bacteroidota,s__uncultured_Bacteroidales
2,2,1135.0,0.0,2b7b5b3f7fc005ae8c623d6d61947eca,28,0.011351,0.000000,CDD02.Tc.Chow.4.00,CDD02.Tc.Chow.4.18,c__Bacteroidia,d__Bacteria,f__Muribaculaceae,g__Muribaculaceae,o__Bacteroidales,p__Bacteroidota,s__uncultured_Bacteroidales
3,3,1092.0,178.0,2b7b5b3f7fc005ae8c623d6d61947eca,27,0.010920,0.001788,CDD02.Tc.Chow.2.00,CDD02.Tc.Chow.2.18,c__Bacteroidia,d__Bacteria,f__Muribaculaceae,g__Muribaculaceae,o__Bacteroidales,p__Bacteroidota,s__uncultured_Bacteroidales
4,4,1541.0,0.0,2b7b5b3f7fc005ae8c623d6d61947eca,38,0.015410,0.000000,CDD02.Tc.Chow.1.00,CDD02.Tc.Chow.1.18,c__Bacteroidia,d__Bacteria,f__Muribaculaceae,g__Muribaculaceae,o__Bacteroidales,p__Bacteroidota,s__uncultured_Bacteroidales
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56245,56245,0.0,0.0,8f1a7336ff8430574c3cbd890815fad5,5,0.000000,0.000000,CDD02.CR.LFHF.1.00,CDD02.CR.LFHF.1.18,c__Bacilli,d__Bacteria,f__Erysipelotrichaceae,g__[Clostridium]_innocuum_group,o__Erysipelotrichales,p__Firmicutes,
56246,56246,0.0,0.0,8f1a7336ff8430574c3cbd890815fad5,49,0.000000,0.000000,CDD02.CR.LFHF.3.00,CDD02.CR.LFHF.3.18,c__Bacilli,d__Bacteria,f__Erysipelotrichaceae,g__[Clostridium]_innocuum_group,o__Erysipelotrichales,p__Firmicutes,
56247,56247,0.0,0.0,8f1a7336ff8430574c3cbd890815fad5,25,0.000000,0.000000,CDD02.CR.HFLF.4.00,CDD02.CR.HFLF.4.18,c__Bacilli,d__Bacteria,f__Erysipelotrichaceae,g__[Clostridium]_innocuum_group,o__Erysipelotrichales,p__Firmicutes,
56248,56248,0.0,0.0,8f1a7336ff8430574c3cbd890815fad5,34,0.000000,0.000000,CDD02.CR.LFHF.4.00,CDD02.CR.LFHF.4.18,c__Bacilli,d__Bacteria,f__Erysipelotrichaceae,g__[Clostridium]_innocuum_group,o__Erysipelotrichales,p__Firmicutes,


In [6]:
## lists of wanted microbes
bloomDay_list = [" f__Tannerellaceae", " f__Enterobacteriaceae", " f__Morganellaceae"]
cdiff_list = [" g__Clostridia_UCG-014", " g__Clostridia_vadinBB60_group", " g__Clostridioides", 
              " g__Clostridium_sensu_stricto_1"]

In [7]:
baseline_relAbund = long_to_wide(input_table=tax_relAbund,
                                 abun_col="rel_abund_baseline",
                                 tax_col="tax_family")

baseline_relAbund = baseline_relAbund.drop(bloomDay_list, axis=1)
baseline_relAbund

tax_family,f__AKAU3644,f__Acholeplasmataceae,f__Akkermansiaceae,f__Alicyclobacillaceae,f__Anaerofustaceae,f__Anaerovoracaceae,f__Atopobiaceae,f__Bacillaceae,f__Bacteroidaceae,f__Beggiatoaceae,...,f__Saccharimonadaceae,f__Sphingomonadaceae,f__Spirosomaceae,f__Streptococcaceae,f__Sutterellaceae,f__UCG-010,f__Weeksellaceae,f__Xanthomonadaceae,f__[Eubacterium]_coprostanoligenes_group,f__uncultured
mouse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.000365,4.4e-05,0.0,0.0,8.1e-05,0.0,0.0,0.004029,0.0,...,0.0,0.0,0.0,0.0,0.0,7.3e-05,0.0,0.0,0.001216,0.00034
1,0.0,0.00019,0.001686,0.0,0.0,0.000138,0.0,0.0,4e-06,0.0,...,0.0,0.0,0.0,0.0,0.0,7.4e-05,0.0,0.0,0.002102,0.0
2,0.0,0.00036,0.001619,0.0,7.5e-05,0.000103,0.0,0.0,0.004322,0.0,...,0.0,0.0,0.0,4.3e-05,0.000237,0.000109,0.0,0.0,0.001314,0.00024
3,0.0,0.000145,0.005177,0.0,0.0,0.000119,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000127,0.0,0.0,0.003171,0.0
4,0.0,8.5e-05,0.003157,0.0,0.0,6.2e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00014,0.0
5,0.0,0.00028,0.004306,0.0,0.0,0.000113,0.0,0.0,5e-06,0.0,...,0.0,0.0,0.0,0.0,0.0,8e-05,0.0,0.0,0.00452,0.0
6,0.0,0.000645,0.007202,0.0,0.0,7.3e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.000109,0.0,0.0,0.003071,0.0
8,0.0,0.0,0.001215,0.0,0.0,0.000154,0.0,0.0,0.00255,0.0,...,0.0,0.0,0.0,0.0,0.00143,6.1e-05,0.0,0.0,0.00041,0.0
9,0.0,0.0,0.000205,0.0,0.0,7.9e-05,0.0,0.0,0.001637,0.0,...,5e-05,0.0,0.0,6e-05,0.000257,5.9e-05,0.0,0.0,0.000272,0.0
10,0.0,0.00013,0.008524,0.0,0.0,8.6e-05,0.0,0.0,8e-06,0.0,...,0.0,0.0,0.0,0.0,0.0,8.4e-05,0.0,0.0,0.003895,0.0


In [8]:
filt_bloomDay_relAbund = tax_relAbund[tax_relAbund["tax_family"].isin(bloomDay_list)]

filt_bloomDay_relAbund

Unnamed: 0.1,Unnamed: 0,abund_baseline,abund_bloomDay,asv,mouse_id,rel_abund_baseline,rel_abund_bloomDay,sampleid_baseline,sampleid_bloomDay,tax_class,tax_domain,tax_family,tax_genus,tax_order,tax_phylum,tax_species
5650,5650,0.0,0.0,ec636dcc0ffdbc2aaa67ef136f2976d4,23,0.0,0.0,CDD02.Tc.HFLF.3.00,CDD02.Tc.HFLF.3.18,c__Gammaproteobacteria,d__Bacteria,f__Morganellaceae,g__Proteus,o__Enterobacterales,p__Proteobacteria,
5651,5651,0.0,0.0,ec636dcc0ffdbc2aaa67ef136f2976d4,14,0.0,0.0,CDD02.Tc.LFLF.4.00,CDD02.Tc.LFLF.4.18,c__Gammaproteobacteria,d__Bacteria,f__Morganellaceae,g__Proteus,o__Enterobacterales,p__Proteobacteria,
5652,5652,0.0,0.0,ec636dcc0ffdbc2aaa67ef136f2976d4,28,0.0,0.0,CDD02.Tc.Chow.4.00,CDD02.Tc.Chow.4.18,c__Gammaproteobacteria,d__Bacteria,f__Morganellaceae,g__Proteus,o__Enterobacterales,p__Proteobacteria,
5653,5653,0.0,0.0,ec636dcc0ffdbc2aaa67ef136f2976d4,27,0.0,0.0,CDD02.Tc.Chow.2.00,CDD02.Tc.Chow.2.18,c__Gammaproteobacteria,d__Bacteria,f__Morganellaceae,g__Proteus,o__Enterobacterales,p__Proteobacteria,
5654,5654,0.0,0.0,ec636dcc0ffdbc2aaa67ef136f2976d4,38,0.0,0.0,CDD02.Tc.Chow.1.00,CDD02.Tc.Chow.1.18,c__Gammaproteobacteria,d__Bacteria,f__Morganellaceae,g__Proteus,o__Enterobacterales,p__Proteobacteria,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50445,50445,0.0,0.0,93f068d8770d6572e2b23f4f6f736203,5,0.0,0.0,CDD02.CR.LFHF.1.00,CDD02.CR.LFHF.1.18,c__Gammaproteobacteria,d__Bacteria,f__Enterobacteriaceae,g__Escherichia-Shigella,o__Enterobacterales,p__Proteobacteria,
50446,50446,0.0,0.0,93f068d8770d6572e2b23f4f6f736203,49,0.0,0.0,CDD02.CR.LFHF.3.00,CDD02.CR.LFHF.3.18,c__Gammaproteobacteria,d__Bacteria,f__Enterobacteriaceae,g__Escherichia-Shigella,o__Enterobacterales,p__Proteobacteria,
50447,50447,0.0,0.0,93f068d8770d6572e2b23f4f6f736203,25,0.0,0.0,CDD02.CR.HFLF.4.00,CDD02.CR.HFLF.4.18,c__Gammaproteobacteria,d__Bacteria,f__Enterobacteriaceae,g__Escherichia-Shigella,o__Enterobacterales,p__Proteobacteria,
50448,50448,0.0,0.0,93f068d8770d6572e2b23f4f6f736203,34,0.0,0.0,CDD02.CR.LFHF.4.00,CDD02.CR.LFHF.4.18,c__Gammaproteobacteria,d__Bacteria,f__Enterobacteriaceae,g__Escherichia-Shigella,o__Enterobacterales,p__Proteobacteria,


In [9]:
wide_bloomDay_relAbund = long_to_wide(input_table=filt_bloomDay_relAbund,
                                      abun_col="rel_abund_bloomDay",
                                      tax_col="tax_family")

wide_bloomDay_relAbund

tax_family,f__Enterobacteriaceae,f__Morganellaceae,f__Tannerellaceae
mouse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.136409,0.0,0.003145
1,0.0,0.012745,0.012846
2,0.177668,0.0,0.01087
3,0.0,0.099173,0.116872
4,0.0,0.112487,0.128278
5,0.0,0.146944,0.072854
6,0.012037,0.092189,0.124755
8,0.0,0.0,0.012986
9,0.052306,0.0,0.032206
10,0.0,0.065313,0.058337


**Multi-class regression attempt**

In [10]:
## training and testing datasets
x_train,x_test,y_train,y_test = train_test_split(baseline_relAbund,
                                                 wide_bloomDay_relAbund,
                                                 test_size=0.2,
                                                 random_state=42)

performance measurment metrics:
- oob score - out of bag score gives the models generalization performance
- mean squared error (MSE)
- r2 value

In [12]:
randF = RandomForestRegressor(n_estimators=100, random_state=0, oob_score=True) 
multi_randF = MultiOutputRegressor(randF)
lin_reg = LinearRegression()
multi_linReg = MultiOutputRegressor(lin_reg)

In [14]:
multi_randF_res = run_models(wanted_model=multi_randF,
                             x_train=x_train,
                             y_train=y_train,
                             x_test=x_test,
                             y_test=y_test)

randF_res = run_models(wanted_model=randF,
                       x_train=x_train,
                       y_train=y_train,
                       x_test=x_test,
                       y_test=y_test)

multi_linReg_res = run_models(wanted_model=multi_linReg,
                              x_train=x_train,
                              y_train=y_train,
                              x_test=x_test,
                              y_test=y_test) 

linReg_res = run_models(wanted_model=lin_reg,
                        x_train=x_train,
                        y_train=y_train,
                        x_test=x_test,
                        y_test=y_test)

In [30]:
model_results = pd.DataFrame({
    "regress_method": ["multi_rf", 
                       "rf", 
                       "multi_linReg", 
                       "lin_reg"],
    "r2": [multi_randF_res["r2_score"], 
           randF_res["r2_score"], 
           multi_linReg_res["r2_score"], 
           linReg_res["r2_score"]],
    "mse": [multi_randF_res["mse_score"], 
            randF_res["mse_score"], 
            multi_linReg_res["mse_score"], 
            linReg_res["mse_score"]]
})

model_results

Unnamed: 0,regress_method,r2,mse
0,multi_rf,91.04,0.002907
1,rf,90.88,0.002681
2,multi_linReg,100.0,1.080272
3,lin_reg,100.0,1.080272


In [18]:
y_multi_randF = multi_randF_res["y_pred"]
multi_y_pred_df = pd.DataFrame(data=y_multi_randF,
                               columns=bloomDay_list)

y_mouse_ids = list(y_test.index)
multi_y_pred_df["mouse_id"] = y_mouse_ids
multi_y_pred_df = multi_y_pred_df.melt(id_vars=["mouse_id"])
multi_y_pred_df.columns = ["mouse_id", "microbe", "multi_rf_pred"]

multi_y_pred_df

Unnamed: 0,mouse_id,microbe,multi_rf_pred
0,28,f__Tannerellaceae,0.106286
1,41,f__Tannerellaceae,0.153443
2,27,f__Tannerellaceae,0.097759
3,44,f__Tannerellaceae,0.116573
4,25,f__Tannerellaceae,0.004998
5,38,f__Tannerellaceae,0.111553
6,13,f__Tannerellaceae,0.120275
7,20,f__Tannerellaceae,0.016462
8,4,f__Tannerellaceae,0.015523
9,26,f__Tannerellaceae,0.086001


In [19]:
y_randF = randF_res["y_pred"]
rf_y_pred_df = pd.DataFrame(data=y_randF,
                            columns=bloomDay_list)

rf_y_pred_df["mouse_id"] = y_mouse_ids
rf_y_pred_df = rf_y_pred_df.melt(id_vars=["mouse_id"])
rf_y_pred_df.columns = ["mouse_id", "microbe", "rf_pred"]

rf_y_pred_df

Unnamed: 0,mouse_id,microbe,rf_pred
0,28,f__Tannerellaceae,0.113686
1,41,f__Tannerellaceae,0.156192
2,27,f__Tannerellaceae,0.088439
3,44,f__Tannerellaceae,0.09589
4,25,f__Tannerellaceae,0.001943
5,38,f__Tannerellaceae,0.089932
6,13,f__Tannerellaceae,0.128537
7,20,f__Tannerellaceae,0.023655
8,4,f__Tannerellaceae,0.02008
9,26,f__Tannerellaceae,0.079082


In [23]:
y_linReg = linReg_res["y_pred"]
linReg_y_pred_df = pd.DataFrame(data=y_linReg,
                            columns=bloomDay_list)

linReg_y_pred_df["mouse_id"] = y_mouse_ids
linReg_y_pred_df = linReg_y_pred_df.melt(id_vars=["mouse_id"])
linReg_y_pred_df.columns = ["mouse_id", "microbe", "lin_reg_pred"]

linReg_y_pred_df

Unnamed: 0,mouse_id,microbe,lin_reg_pred
0,28,f__Tannerellaceae,-0.422242
1,41,f__Tannerellaceae,0.022701
2,27,f__Tannerellaceae,-0.663667
3,44,f__Tannerellaceae,-0.803574
4,25,f__Tannerellaceae,0.106783
5,38,f__Tannerellaceae,0.460915
6,13,f__Tannerellaceae,0.404493
7,20,f__Tannerellaceae,-0.099124
8,4,f__Tannerellaceae,0.04235
9,26,f__Tannerellaceae,-0.234445


In [24]:
y_multi_linReg = multi_linReg_res["y_pred"]
multi_linReg_y_pred_df = pd.DataFrame(data=y_multi_linReg,
                                      columns=bloomDay_list)

multi_linReg_y_pred_df["mouse_id"] = y_mouse_ids
multi_linReg_y_pred_df = multi_linReg_y_pred_df.melt(id_vars=["mouse_id"])
multi_linReg_y_pred_df.columns = ["mouse_id", "microbe", "multi_linRreg_pred"]

multi_linReg_y_pred_df

Unnamed: 0,mouse_id,microbe,multi_linRreg_pred
0,28,f__Tannerellaceae,-0.422242
1,41,f__Tannerellaceae,0.022701
2,27,f__Tannerellaceae,-0.663667
3,44,f__Tannerellaceae,-0.803574
4,25,f__Tannerellaceae,0.106783
5,38,f__Tannerellaceae,0.460915
6,13,f__Tannerellaceae,0.404493
7,20,f__Tannerellaceae,-0.099124
8,4,f__Tannerellaceae,0.04235
9,26,f__Tannerellaceae,-0.234445


In [21]:
proc_y_test = y_test.copy()
proc_y_test["mouse_id"] = y_mouse_ids
proc_y_test = proc_y_test.melt(id_vars=["mouse_id"])
proc_y_test.columns = ["mouse_id", "microbe", "key"]

proc_y_test

Unnamed: 0,mouse_id,microbe,key
0,28,f__Enterobacteriaceae,0.0
1,41,f__Enterobacteriaceae,0.209209
2,27,f__Enterobacteriaceae,0.0
3,44,f__Enterobacteriaceae,0.075992
4,25,f__Enterobacteriaceae,0.0
5,38,f__Enterobacteriaceae,0.0
6,13,f__Enterobacteriaceae,0.19871
7,20,f__Enterobacteriaceae,0.0
8,4,f__Enterobacteriaceae,0.0
9,26,f__Enterobacteriaceae,0.219212


idk how to tell if the regression models are able to predict the relative abundance of these families at day 3 based on the relative abundances at baseline..
- they have really good r2 values and low mses for every model I run but I'm not sure if that's enough 
- i can't figure out how to get any other metrics from the models
- should I include the families I want to predict in the baseline data or not? (right now I'm not)

In [29]:
y_pred_dfs = [rf_y_pred_df, multi_y_pred_df, linReg_y_pred_df, multi_linReg_y_pred_df, proc_y_test]
y_pred_values = reduce(lambda df_left, df_right: pd.merge(df_left, df_right, how="left", on=["mouse_id", "microbe"]), y_pred_dfs)
y_pred_values

Unnamed: 0,mouse_id,microbe,rf_pred,multi_rf_pred,lin_reg_pred,multi_linRreg_pred,key
0,28,f__Tannerellaceae,0.113686,0.106286,-0.422242,-0.422242,0.003231
1,41,f__Tannerellaceae,0.156192,0.153443,0.022701,0.022701,0.000546
2,27,f__Tannerellaceae,0.088439,0.097759,-0.663667,-0.663667,0.006177
3,44,f__Tannerellaceae,0.09589,0.116573,-0.803574,-0.803574,3.4e-05
4,25,f__Tannerellaceae,0.001943,0.004998,0.106783,0.106783,0.142166
5,38,f__Tannerellaceae,0.089932,0.111553,0.460915,0.460915,0.007154
6,13,f__Tannerellaceae,0.128537,0.120275,0.404493,0.404493,0.0
7,20,f__Tannerellaceae,0.023655,0.016462,-0.099124,-0.099124,0.043426
8,4,f__Tannerellaceae,0.02008,0.015523,0.04235,0.04235,0.128278
9,26,f__Tannerellaceae,0.079082,0.086001,-0.234445,-0.234445,0.000544


trying out a different way of identifying mice that had a bloom in a particular microbe

thoughts:
- these percent changes are wild 
- idk how this is going to be more helpful

In [6]:
test_cols = ("mouse_id", "rel_abund_baseline", "rel_abund_bloomDay", "tax_family")

test_df = start_tax_relAbund.loc[:, test_cols]

test_df = test_df.loc[test_df["rel_abund_bloomDay"] >= 0.1]

sorted_test_df = test_df.sort_values(by=["tax_family", "rel_abund_bloomDay"], ascending=False).groupby("tax_family").head(15)

sorted_test_df["pct_change"] = ((abs(sorted_test_df["rel_abund_baseline"] - sorted_test_df["rel_abund_bloomDay"])) / sorted_test_df["rel_abund_baseline"]) * 100

sorted_test_df

Unnamed: 0,mouse_id,rel_abund_baseline,rel_abund_bloomDay,tax_family,pct_change
7147,CDD02.CR.HFLF.4,0.022210,0.710830,f__Tannerellaceae,3100.559282
7127,CDD02.CR.HFLF.2,0.024170,0.678243,f__Tannerellaceae,2706.164667
7107,CDD02.CR.LFLF.1,0.030912,0.672215,f__Tannerellaceae,2074.618062
7113,CDD02.CR.HFLF.3,0.029671,0.666047,f__Tannerellaceae,2144.781339
7112,CDD02.CR.LFLF.2,0.000380,0.654523,f__Tannerellaceae,172113.733100
...,...,...,...,...,...
13944,CDD02.CR.HFHF.5,0.049381,0.198499,f__Akkermansiaceae,301.971182
13922,CDD02.Tc.LFHF.3,0.003730,0.185221,f__Akkermansiaceae,4865.602070
13929,CDD02.Tc.LFHF.4,0.000440,0.179739,f__Akkermansiaceae,40746.427850
13939,CDD02.CR.LFHF.2,0.024249,0.121115,f__Akkermansiaceae,399.456227
