## **Data wrangling to get my data machine learning-approved!**

**Needed libraries**

In [1]:
import pandas as pd
import numpy as np
from functools import reduce

**Functions**

In [2]:
def long_to_wide(input_table,
                 abun_col,
                 tax_col):
    mini_input_table = input_table.loc[:, ("mouse_id", abun_col, tax_col)]
    output_table = mini_input_table.pivot_table(abun_col, "mouse_id", tax_col)
    return(output_table)


def encode_my_data(meta_table,
                   encode_col):
    output_list = []
    ## actually encoding my data 
    col_set = set(meta_table[encode_col])
    col_dict = dict(zip(col_set, range(len(col_set))))
    meta_table[encode_col] = meta_table[encode_col].map(col_dict)
    output_list.append(meta_table)

    ## setting up an dictionary key to reverse the encoding when i want to 
    inverse_dict = {}
    for key in col_dict.keys():
        tmp_val = col_dict[key]
        inverse_dict[tmp_val] = key 
    
    output_list.append(inverse_dict)
    return(output_list)

def match_ids(input_table,
              id_col,
              id_dict):
    input_table[id_col] = input_table[id_col].map(id_dict) 
    return(input_table)


**File paths**

In [3]:
family_deltas_fp = '../data/family_relabundDeltas.tsv'
long_relAbun_fp = '../data/newExp_ml_out.tsv'
mouse_blooms_fp = '../data/mouseBlooms_families.tsv'
meta_fp = '../../data/misc/proc_newExp_d15-d3_metadata.tsv'
surv_stat_fp = '../../data/misc/survival_status.tsv'
pos_cult_fp = '../../data/misc/pos_culture_status.tsv'

**The official start of the data wrangling**

In [4]:
## reading in needed files
family_deltas_table = pd.read_csv(family_deltas_fp, sep="\t")
long_relAbun = pd.read_csv(long_relAbun_fp, sep='\t')
mouse_blooms = pd.read_csv(mouse_blooms_fp, sep="\t")
metadata = pd.read_csv(meta_fp, sep="\t")
surv_stat = pd.read_csv(surv_stat_fp, sep="\t")
pos_cult = pd.read_csv(pos_cult_fp, sep="\t")

In [5]:
## taking metadata from long to wide so mouse ids aren't duplicated
metadata = (metadata.pivot(values="high_fat", 
                           index=["mouse_id", "diet", "vendor", "high_fiber"], 
                           columns=["day_post_inf"]).reset_index(level=[0,1,2,3]))

meta_cols = ["mouse_id", "diet", "vendor", "high_fiber", "baseline", "bloom_day"]
metadata.columns = meta_cols

metadata = metadata.dropna(subset=["bloom_day"])

metadata

Unnamed: 0,mouse_id,diet,vendor,high_fiber,baseline,bloom_day
0,CDD02.CR.Chow.1,Chow,charles_river,0,0.0,0.0
1,CDD02.CR.Chow.2,Chow,charles_river,0,0.0,0.0
2,CDD02.CR.Chow.3,Chow,charles_river,0,0.0,0.0
3,CDD02.CR.Chow.4,Chow,charles_river,0,0.0,0.0
4,CDD02.CR.Chow.5,Chow,charles_river,0,0.0,0.0
5,CDD02.CR.HFHF.1,HF/HF,charles_river,1,1.0,1.0
6,CDD02.CR.HFHF.2,HF/HF,charles_river,1,1.0,1.0
7,CDD02.CR.HFHF.3,HF/HF,charles_river,1,1.0,1.0
8,CDD02.CR.HFHF.4,HF/HF,charles_river,1,1.0,1.0
9,CDD02.CR.HFHF.5,HF/HF,charles_river,1,1.0,1.0


In [6]:
## adding chow to the high_fiber diets distinction (for science)
high_fiber_diets = ["Chow", "HF/HF", "LF/HF"]
metadata["chow_highFiber"] = np.where(metadata["diet"].isin(high_fiber_diets),
                            1,
                            0)

metadata

Unnamed: 0,mouse_id,diet,vendor,high_fiber,baseline,bloom_day,chow_highFiber
0,CDD02.CR.Chow.1,Chow,charles_river,0,0.0,0.0,1
1,CDD02.CR.Chow.2,Chow,charles_river,0,0.0,0.0,1
2,CDD02.CR.Chow.3,Chow,charles_river,0,0.0,0.0,1
3,CDD02.CR.Chow.4,Chow,charles_river,0,0.0,0.0,1
4,CDD02.CR.Chow.5,Chow,charles_river,0,0.0,0.0,1
5,CDD02.CR.HFHF.1,HF/HF,charles_river,1,1.0,1.0,1
6,CDD02.CR.HFHF.2,HF/HF,charles_river,1,1.0,1.0,1
7,CDD02.CR.HFHF.3,HF/HF,charles_river,1,1.0,1.0,1
8,CDD02.CR.HFHF.4,HF/HF,charles_river,1,1.0,1.0,1
9,CDD02.CR.HFHF.5,HF/HF,charles_river,1,1.0,1.0,1


In [7]:
## creating a bloom or no bloom column in the deltas
## bloom = true
## no bloom = false
bloom_mouseID_list = list(mouse_blooms["mouse_id"])

## metadata wrangling (to get in the right format for ml models)
mini_meta = metadata.loc[:, ("mouse_id", "diet", "vendor", "high_fiber", "chow_highFiber")]

mini_meta["bloom_status"] = np.where(mini_meta["mouse_id"].isin(bloom_mouseID_list),
                                     True,
                                     False)

meta_merge_list = [mini_meta, surv_stat, pos_cult]

mini_meta = reduce(lambda df_left, df_right: pd.merge(df_left, df_right, how="left", on=["mouse_id"]), meta_merge_list)
mini_meta = mini_meta.dropna(subset=["pos_culture"])

mini_meta

Unnamed: 0,mouse_id,diet,vendor,high_fiber,chow_highFiber,bloom_status,status,pos_culture
0,CDD02.CR.Chow.1,Chow,charles_river,0,1,False,survived,0.0
1,CDD02.CR.Chow.2,Chow,charles_river,0,1,False,survived,0.0
2,CDD02.CR.Chow.3,Chow,charles_river,0,1,False,didnt_survive,0.0
3,CDD02.CR.Chow.4,Chow,charles_river,0,1,True,survived,0.0
4,CDD02.CR.Chow.5,Chow,charles_river,0,1,False,survived,0.0
5,CDD02.CR.HFHF.1,HF/HF,charles_river,1,1,False,didnt_survive,1.0
6,CDD02.CR.HFHF.2,HF/HF,charles_river,1,1,False,didnt_survive,1.0
7,CDD02.CR.HFHF.3,HF/HF,charles_river,1,1,False,survived,0.0
8,CDD02.CR.HFHF.4,HF/HF,charles_river,1,1,False,didnt_survive,1.0
9,CDD02.CR.HFHF.5,HF/HF,charles_river,1,1,False,survived,1.0


**Changing all variables from categorical to numeric so the ml models don't get mad**

In [8]:
## mouse id bc python thinks they're all strings
meta_mouse_encode = encode_my_data(mini_meta,
                                   "mouse_id")

meta_encode = meta_mouse_encode.pop(0)
inverse_mouse_dict = meta_mouse_encode.pop(0)

In [9]:
## diet
meta_diet_encode = encode_my_data(meta_encode,
                                  "diet")

meta_encode = meta_diet_encode.pop(0)
inverse_diet_dict = meta_diet_encode.pop(0)

In [10]:
## vendor
meta_vendor_encode = encode_my_data(meta_encode,
                                    "vendor")

meta_encode = meta_vendor_encode.pop(0)
inverse_vendor_dict = meta_vendor_encode.pop(0)

In [11]:
## bloom status
meta_bloom_encode = encode_my_data(meta_encode,
                                   "bloom_status")

meta_encode = meta_bloom_encode.pop(0)
inverse_bloom_dict = meta_bloom_encode.pop(0)

In [12]:
## survival status
meta_surv_encode = encode_my_data(meta_encode,
                                 "status")

meta_encode = meta_surv_encode.pop(0)
inverse_surv_dict = meta_surv_encode.pop(0)

In [13]:
## creating dictionaries for the high_fiber and chow_highFiber columns

inverse_highFiber_dict = {0: False, 1: True}
inverse_chowHF_dict = {0: False, 1: True}
inverse_posCult_dict = {0: False, 1:True}

In [14]:
## putting together a dataframe as a key for all the variables I've encoded
## this is a monstrosity

mouse_key_df = pd.DataFrame(inverse_mouse_dict.items(),
                            columns=["assigned_num", "mouse_id"])
diet_key_df = pd.DataFrame(inverse_diet_dict.items(),
                           columns=["assigned_num", "diet"])
vendor_key_df = pd.DataFrame(inverse_vendor_dict.items(),
                             columns=["assigned_num", "vendor"])
bloom_key_df = pd.DataFrame(inverse_bloom_dict.items(),
                            columns=["assigned_num", "bloom_status"])
surv_key_df = pd.DataFrame(inverse_surv_dict.items(),
                           columns=["assigned_num", "status"])
high_fiber_key_df = pd.DataFrame(inverse_highFiber_dict.items(),
                                 columns=["assigned_num", "high_fiber"])
chow_highFiber_key_df = pd.DataFrame(inverse_chowHF_dict.items(),
                                     columns=["assigned_num", "chow_highFiber"])
posCult_key_df = pd.DataFrame(inverse_posCult_dict.items(),
                              columns=["assigned_num", "pos_culture"])

key_df_list = [mouse_key_df, diet_key_df, vendor_key_df, bloom_key_df, surv_key_df, high_fiber_key_df, chow_highFiber_key_df, posCult_key_df]
all_metaKeys_df = reduce(lambda df_left, df_right: pd.merge(df_left, df_right, how="left", on=["assigned_num"]), key_df_list)

all_metaKeys_df

Unnamed: 0,assigned_num,mouse_id,diet,vendor,bloom_status,status,high_fiber,chow_highFiber,pos_culture
0,0,CDD02.CR.LFHF.1,Chow,charles_river,False,didnt_survive,False,False,False
1,1,CDD02.CR.LFLF.2,LF/LF,taconic,True,survived,True,True,True
2,2,CDD02.CR.Chow.3,HF/LF,,,,,,
3,3,CDD02.Tc.Chow.4,LF/HF,,,,,,
4,4,CDD02.Tc.HFHF.2,HF/HF,,,,,,
5,5,CDD02.Tc.LFLF.3,,,,,,,
6,6,CDD02.CR.Chow.5,,,,,,,
7,7,CDD02.CR.HFLF.3,,,,,,,
8,8,CDD02.CR.LFHF.4,,,,,,,
9,9,CDD02.CR.LFLF.4,,,,,,,


**Putting training data into wide format** \
in this case, it's the relative abundance deltas between day -15 and day 3 for all detected bacterial families woo

In [15]:
## encoding the mouse ids here the same as the mouse ids in the metadata 
mouse_id_key = dict(zip(all_metaKeys_df.mouse_id, all_metaKeys_df.assigned_num))

family_deltas_table = match_ids(input_table=family_deltas_table,
                                id_col="mouse_id",
                                id_dict=mouse_id_key)
##family_deltas_table

## tax families relative abundance deltas long to wide format
family_deltas_wide = long_to_wide(input_table=family_deltas_table,
                                   abun_col="rel_abund_diff_d3-d15",
                                   tax_col="tax_family")

family_deltas_wide

tax_family,f__AKAU3644,f__Acholeplasmataceae,f__Akkermansiaceae,f__Alicyclobacillaceae,f__Anaerofustaceae,f__Anaerovoracaceae,f__Atopobiaceae,f__Bacillaceae,f__Bacteroidaceae,f__Beggiatoaceae,...,f__Sphingomonadaceae,f__Spirosomaceae,f__Streptococcaceae,f__Sutterellaceae,f__Tannerellaceae,f__UCG-010,f__Weeksellaceae,f__Xanthomonadaceae,f__[Eubacterium]_coprostanoligenes_group,f__uncultured
mouse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,0.0,-0.00056,-0.04267,0.0,0.0,-0.00203,0.0,0.0,-8e-05,0.0,...,0.0,0.0,0.0,0.0,0.279866,-0.00056,0.0,0.0,-0.0226,0.0
1.0,0.0,0.0,-0.00075,0.0,0.0,-0.00187,0.0,0.0,-0.05691,0.0,...,0.0,0.0,-0.00026,-0.00021,0.654143,-0.00056,0.0,0.0,-0.00032,-0.00013
2.0,0.0,0.000148,0.358823,0.0,0.0,0.002724,0.000121,0.0,0.002038,0.0,...,0.0,0.0,0.0,0.0,0.114847,-0.00088,0.0,0.0,-0.020661,0.0
3.0,0.0,0.0,0.001634,0.0,-0.00023,-0.00297,0.0,0.0,0.534085,0.0,...,0.0,0.0,0.01596,0.056777,0.014594,0.0,0.0,0.0,-0.00152,0.0
4.0,0.0,0.0,-0.00024,0.0,-0.0001,-0.00338,0.0,0.0,-0.02775,0.0,...,0.0,0.0,-7e-05,0.0007,0.002391,0.0,0.0,0.0,-0.00013,0.0
5.0,0.0,0.0,-0.00361,0.0,0.0,-0.00121,0.0,0.0,0.550971,0.0,...,0.0,0.0,0.00013,-0.00077,-0.00212,0.0,0.0,0.0,-0.00053,0.0
6.0,0.0,-0.00044,0.059851,0.0,0.0,0.00578,0.0,0.0,0.00073,0.0,...,0.0,0.0,0.0,0.0,0.278905,-0.000811,0.0,0.0,-0.010361,0.0
7.0,0.0,-0.00027,-0.058112,0.0,0.0,-0.00132,0.0,0.0,0.00014,0.0,...,0.0,0.0,0.0,0.0,0.636376,-0.00023,0.0,0.0,-0.00547,0.0
8.0,0.0,-0.0001,-0.000601,0.0,0.0,-0.003491,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.318388,0.0,0.0,0.0,-0.008872,0.0
9.0,0.0,-0.00026,-0.09407,0.0,0.0,-0.0022,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.573255,-0.00078,0.0,0.0,-0.01502,0.0


matching other files that are already in wide format to the numeric mouse ids

In [16]:
## baseline family relative abundances
baselineFam_long = long_relAbun.loc[:, ("mouse_id", "rel_abund_baseline", "tax_family")]
baselineFam_long = match_ids(input_table=baselineFam_long,
                             id_col="mouse_id",
                             id_dict=mouse_id_key)

baselineFam_wide = long_to_wide(input_table=baselineFam_long,
                                abun_col="rel_abund_baseline",
                                tax_col="tax_family")

baselineFam_wide

tax_family,f__AKAU3644,f__Acholeplasmataceae,f__Akkermansiaceae,f__Alicyclobacillaceae,f__Anaerofustaceae,f__Anaerovoracaceae,f__Atopobiaceae,f__Bacillaceae,f__Bacteroidaceae,f__Beggiatoaceae,...,f__Sphingomonadaceae,f__Spirosomaceae,f__Streptococcaceae,f__Sutterellaceae,f__Tannerellaceae,f__UCG-010,f__Weeksellaceae,f__Xanthomonadaceae,f__[Eubacterium]_coprostanoligenes_group,f__uncultured
mouse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,0.0,0.00028,0.004306,0.0,0.0,0.000113,0.0,0.0,5e-06,0.0,...,0.0,0.0,0.0,0.0,0.01688,8e-05,0.0,0.0,0.00452,0.0
1.0,0.0,0.0,7.5e-05,0.0,0.0,0.000104,0.0,0.0,0.003348,0.0,...,0.0,0.0,8.7e-05,7e-05,7.6e-05,8e-05,0.0,0.0,6.4e-05,0.00013
2.0,0.0,0.00036,0.00324,0.0,0.0,0.000128,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.011411,0.000126,0.0,0.0,0.004132,0.0
3.0,0.0,0.0,6.8e-05,0.0,0.000115,0.000165,0.0,0.0,0.002912,0.0,...,0.0,0.0,0.0,0.00016,0.000312,0.0,0.0,0.0,0.000304,0.0
4.0,0.0,0.0,7.1e-05,0.0,5e-05,0.000188,0.0,0.0,0.001632,0.0,...,0.0,0.0,8.3e-05,4.3e-05,6.6e-05,0.0,0.0,0.0,2.6e-05,0.0
5.0,0.0,0.0,0.000361,0.0,0.0,6.7e-05,0.0,0.0,0.004976,0.0,...,0.0,0.0,0.0,0.000257,0.000458,0.0,0.0,0.0,0.000106,0.0
6.0,0.0,0.00022,0.000626,0.0,0.0,4.3e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.003814,0.000116,0.0,0.0,0.002072,0.0
7.0,0.0,0.000135,0.005814,0.0,0.0,7.3e-05,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.005934,3.3e-05,0.0,0.0,0.001094,0.0
8.0,0.0,5e-05,0.000423,0.0,0.0,0.000194,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.004527,0.0,0.0,0.0,0.001774,0.0
9.0,0.0,0.00013,0.009416,0.0,0.0,0.000122,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.007562,0.000111,0.0,0.0,0.003004,0.0


In [17]:
## bloom day family relative abundances
bloomDay_fam_long = long_relAbun.loc[:, ("mouse_id", "rel_abund_bloomDay", "tax_family")]
bloomDay_fam_long = match_ids(input_table=bloomDay_fam_long,
                              id_col="mouse_id",
                              id_dict=mouse_id_key)

bloomDay_fam_wide = long_to_wide(input_table=bloomDay_fam_long,
                                abun_col="rel_abund_bloomDay",
                                tax_col="tax_family")

bloomDay_fam_wide

tax_family,f__AKAU3644,f__Acholeplasmataceae,f__Akkermansiaceae,f__Alicyclobacillaceae,f__Anaerofustaceae,f__Anaerovoracaceae,f__Atopobiaceae,f__Bacillaceae,f__Bacteroidaceae,f__Beggiatoaceae,...,f__Sphingomonadaceae,f__Spirosomaceae,f__Streptococcaceae,f__Sutterellaceae,f__Tannerellaceae,f__UCG-010,f__Weeksellaceae,f__Xanthomonadaceae,f__[Eubacterium]_coprostanoligenes_group,f__uncultured
mouse_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.0,0.0,0.0,3.9e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.072854,0.0,0.0,0.0,0.0,0.0
1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.130905,0.0,0.0,0.0,0.0,0.0
2.0,0.0,0.000434,0.039123,0.0,0.0,0.000279,0.000121,0.0,0.00012,0.0,...,0.0,0.0,0.0,0.0,0.03438,0.0,0.0,0.0,0.0,0.0
3.0,0.0,0.0,0.000231,0.0,0.0,0.0,0.0,0.0,0.034328,0.0,...,0.0,0.0,0.00532,0.019086,0.003231,0.0,0.0,0.0,0.0,0.0
4.0,0.0,0.0,4.7e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,6e-05,0.000277,0.000544,0.0,0.0,0.0,0.0,0.0
5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.037386,0.0,...,0.0,0.0,4.3e-05,0.0,3.4e-05,0.0,0.0,0.0,0.0,0.0
6.0,0.0,0.0,0.006611,0.0,0.0,0.000364,0.0,0.0,4.3e-05,0.0,...,0.0,0.0,0.0,0.0,0.059595,0.0,0.0,0.0,0.0,0.0
7.0,0.0,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,8e-06,0.0,...,0.0,0.0,0.0,0.0,0.133209,0.0,0.0,0.0,0.0,0.0
8.0,0.0,0.0,0.000363,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.068205,0.0,0.0,0.0,0.0,0.0
9.0,0.0,0.0,9e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.122213,0.0,0.0,0.0,0.0,0.0


In [18]:
## joining metadata with wide family deltas table to make sure that it works

family_deltas_meta = family_deltas_wide.merge(meta_encode, how="left", on=["mouse_id"])

family_deltas_meta["mouse_id"] = family_deltas_meta["mouse_id"].map(inverse_mouse_dict)

family_deltas_meta

Unnamed: 0,mouse_id,f__AKAU3644,f__Acholeplasmataceae,f__Akkermansiaceae,f__Alicyclobacillaceae,f__Anaerofustaceae,f__Anaerovoracaceae,f__Atopobiaceae,f__Bacillaceae,f__Bacteroidaceae,...,f__Xanthomonadaceae,f__[Eubacterium]_coprostanoligenes_group,f__uncultured,diet,vendor,high_fiber,chow_highFiber,bloom_status,status,pos_culture
0,CDD02.CR.LFHF.1,0.0,-0.00056,-0.04267,0.0,0.0,-0.00203,0.0,0.0,-8e-05,...,0.0,-0.0226,0.0,3,0,1,1,0,1,0.0
1,CDD02.CR.LFLF.2,0.0,0.0,-0.00075,0.0,0.0,-0.00187,0.0,0.0,-0.05691,...,0.0,-0.00032,-0.00013,1,0,0,0,1,0,1.0
2,CDD02.CR.Chow.3,0.0,0.000148,0.358823,0.0,0.0,0.002724,0.000121,0.0,0.002038,...,0.0,-0.020661,0.0,0,0,0,1,0,0,0.0
3,CDD02.Tc.Chow.4,0.0,0.0,0.001634,0.0,-0.00023,-0.00297,0.0,0.0,0.534085,...,0.0,-0.00152,0.0,0,1,0,1,1,0,0.0
4,CDD02.Tc.HFHF.2,0.0,0.0,-0.00024,0.0,-0.0001,-0.00338,0.0,0.0,-0.02775,...,0.0,-0.00013,0.0,4,1,1,1,1,1,1.0
5,CDD02.Tc.LFLF.3,0.0,0.0,-0.00361,0.0,0.0,-0.00121,0.0,0.0,0.550971,...,0.0,-0.00053,0.0,1,1,0,0,1,0,1.0
6,CDD02.CR.Chow.5,0.0,-0.00044,0.059851,0.0,0.0,0.00578,0.0,0.0,0.00073,...,0.0,-0.010361,0.0,0,0,0,1,0,1,0.0
7,CDD02.CR.HFLF.3,0.0,-0.00027,-0.058112,0.0,0.0,-0.00132,0.0,0.0,0.00014,...,0.0,-0.00547,0.0,2,0,0,0,1,1,0.0
8,CDD02.CR.LFHF.4,0.0,-0.0001,-0.000601,0.0,0.0,-0.003491,0.0,0.0,0.0,...,0.0,-0.008872,0.0,3,0,1,1,0,1,0.0
9,CDD02.CR.LFLF.4,0.0,-0.00026,-0.09407,0.0,0.0,-0.0022,0.0,0.0,0.0,...,0.0,-0.01502,0.0,1,0,0,0,1,0,0.0


**Saving my outputs**

In [19]:
family_deltas_wide.to_csv('../data/family_deltas_wide.tsv', sep='\t')
baselineFam_wide.to_csv('../data/baselineFamily_wide.tsv', sep='\t')
bloomDay_fam_wide.to_csv('../data/bloomDay_family_wide.tsv', sep='\t')
meta_encode.to_csv('../data/ml_approved_metadata.tsv', sep='\t')
all_metaKeys_df.to_csv('../data/meta_dict_keys.tsv', sep='\t')