<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Donor-recipient-matching" data-toc-modified-id="Donor-recipient-matching-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Donor recipient matching</a></span></li><li><span><a href="#Age-sex-matching-table" data-toc-modified-id="Age-sex-matching-table-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Age sex matching table</a></span></li><li><span><a href="#Before-and-after-perturbation" data-toc-modified-id="Before-and-after-perturbation-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Before and after perturbation</a></span><ul class="toc-item"><li><span><a href="#Week-0-vs-Week-10" data-toc-modified-id="Week-0-vs-Week-10-3.1"><span class="toc-item-num">3.1&nbsp;&nbsp;</span>Week 0 vs Week 10</a></span></li><li><span><a href="#Week-0-vs-Week-18" data-toc-modified-id="Week-0-vs-Week-18-3.2"><span class="toc-item-num">3.2&nbsp;&nbsp;</span>Week 0 vs Week 18</a></span></li><li><span><a href="#Week-0-vs-week-100" data-toc-modified-id="Week-0-vs-week-100-3.3"><span class="toc-item-num">3.3&nbsp;&nbsp;</span>Week 0 vs week 100</a></span></li></ul></li><li><span><a href="#Week-10-vs-Week-18" data-toc-modified-id="Week-10-vs-Week-18-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Week 10 vs Week 18</a></span><ul class="toc-item"><li><span><a href="#week-18-vs-week-100" data-toc-modified-id="week-18-vs-week-100-4.1"><span class="toc-item-num">4.1&nbsp;&nbsp;</span>week 18 vs week 100</a></span></li></ul></li><li><span><a href="#Below-is-scratch-work" data-toc-modified-id="Below-is-scratch-work-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Below is scratch work</a></span></li><li><span><a href="#Standard-Kang-dataset-against-other-datasets" data-toc-modified-id="Standard-Kang-dataset-against-other-datasets-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Standard Kang dataset against other datasets</a></span></li><li><span><a href="#Merge-Kang-et-al-with-combined-metadata-table" data-toc-modified-id="Merge-Kang-et-al-with-combined-metadata-table-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Merge Kang et al with combined metadata table</a></span></li><li><span><a href="#Synchronize-biom-tables" data-toc-modified-id="Synchronize-biom-tables-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Synchronize biom tables</a></span></li><li><span><a href="#Split-by-time" data-toc-modified-id="Split-by-time-9"><span class="toc-item-num">9&nbsp;&nbsp;</span>Split by time</a></span></li><li><span><a href="#Split-by-donor" data-toc-modified-id="Split-by-donor-10"><span class="toc-item-num">10&nbsp;&nbsp;</span>Split by donor</a></span></li></ul></div>

In [1]:
from q2_matchmaker._matching import _matchmaker

import pandas as pd
import numpy as np

In [2]:
from scipy.spatial.distance import pdist, squareform
from scipy.optimize import linear_sum_assignment
import pandas as pd


def _standardize(x):
    return (x - x.min()) / (x.max() - x.min())


def _matchmaker(metadata, status, match_columns, types):
    """ Computes matching ids.
    Parameters
    ----------
    metadata : pd.DataFrame
        Sample metadata
    status : str
        Column for specifying case-control status
    match_columns : list of str
        List of metadata categories
    types : list of bool
        Specifies if it is categorical or not.
        True for categorical, False for continuous
    Returns
    -------
    pd.Series : List of matching ids
    """
    md = metadata.sort_values(by=status)
    dummies = []
    for col, cat in zip(match_columns, types):
        if cat:
            df = pd.get_dummies(md[col])
            dummies.append(df)
        else:
            df = pd.DataFrame(_standardize(md[col]))
            dummies.append(df)
    dm = sum(map(lambda x: squareform(pdist(x)) ** 2, dummies))
    i = (md[status].values == md[status].values[0]).sum()
    x, y = linear_sum_assignment(dm[:i, i:])
    y = y + i
    md.loc[md.index[x], 'matching_id'] = x
    md.loc[md.index[y], 'matching_id'] = x
    return md['matching_id']

def amplicon_to_ogu(table_, mapping):
    t = table_.to_dataframe()
    t = pd.merge(t, mapping['sequence'].reset_index(), left_index=True, right_on='sequence')
    t = t.groupby('genome').sum()
    t = biom.Table(t.values, list(t.index), list(t.columns))
    return t

Load Kang et al metadata

In [3]:
md = pd.read_table('Kang2017/sample_metadata.txt')
md0 = pd.read_table('Kang2017/stool-metadata.txt')
md1 = pd.read_excel('Kang2017/41598_2019_42183_MOESM2_ESM.xlsx', engine='openpyxl', skiprows=1)
md2 = pd.read_excel('Kang2017/41598_2019_42183_MOESM3_ESM.xlsx', engine='openpyxl', skiprows=2)

# some metadata massaging
md['week'] = md['weeks-since-experiment-start']
asd_donors = md0[['host_subject_id', 'bbt_donor_id']].drop_duplicates()
md = pd.merge(md, asd_donors, left_on='host_subject_id', right_on='host_subject_id', how='outer')
md = md.set_index('Run')
# filter out non stool samples
md = md.loc[md['collection-method'] == 'stool']

Load 16S to WGS mapping

In [4]:
# directory paths
amp_directory = 'Combined'
wgs_directory = '../sra_shotgun/Combined'

sam_file = f'{amp_directory}/age_sex_matched_seqs.sam'
aligns = pd.read_table(sam_file, header=None)
mapping = aligns[[0, 2]]
mapping.columns = ['sequence', 'GOTU']
taxonomy = pd.read_table('~/databases/wol/taxonomy/ranks.tsv', index_col=0)
taxid = pd.read_table('~/databases/wol/taxonomy/taxid.map', header=None, dtype=str)
taxid.columns = ['GOTU', 'genome']
mapping = pd.merge(mapping, taxid, left_on='GOTU', right_on='GOTU')
mapping = pd.merge(mapping, taxonomy, left_on='GOTU', right_index=True)
mapping = mapping.set_index('genome')

# load metagenome counts from all WGS datasets
# and filter out all non-sensical mappings
threshold = 100  # determined based on bimodality of count distribution
gotu_totals = pd.read_csv(f'{wgs_directory}/feature_counts.csv', index_col=0)
gotu_totals.index = list(map(str, gotu_totals.index))
sane_otus = set(gotu_totals.loc[gotu_totals['0'] > threshold].index)
mapping = mapping.loc[sane_otus & set(mapping.index)]

# Donor recipient matching

In [5]:
import biom
from biom.util import biom_open
table = biom.load_table('Kang2017/deblur/all.biom')

asd_donor_pairing = asd_donors.dropna().groupby('bbt_donor_id').first().reset_index()
donor_idx = md['host_subject_id'].apply(lambda x: x in set(asd_donor_pairing['bbt_donor_id']))
host_idx = md['host_subject_id'].apply(lambda x: x in set(asd_donor_pairing['host_subject_id']))
md_donor = md.loc[donor_idx]
md_host = md.loc[host_idx]
md_host0 = md_host.loc[md_host['week'] == 0]
md_donor['donor_matching'] = md_donor['host_subject_id']
md_host0['donor_matching'] = md_host0['bbt_donor_id']
md_donor = pd.concat((md_host0, md_donor), axis=0)

# prepare donor biom table
filter_f = lambda v, i, m: i in set(md_donor.index)
table_ = table.filter(filter_f, axis='sample', inplace=False)
filter_f = lambda v, i, m: np.sum(v) > 0
table_.filter(filter_f, axis='observation')
md_donor = md_donor[~md_donor.index.duplicated(keep='first')]
with biom_open('Kang2017/donor.biom', 'w') as f:
    table_.to_hdf5(f, 'kang_week0_donor')
    
md_donor.to_csv('Kang2017/donor_metadata.txt', sep='\t')

# Save separate OGU table
t = amplicon_to_ogu(table_, mapping)
with biom_open('Kang2017/donor_ogu.biom', 'w') as f:
    t.to_hdf5(f, 'kang_week0_donor')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  md_donor['donor_matching'] = md_donor['host_subject_id']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  md_host0['donor_matching'] = md_host0['bbt_donor_id']


# Age sex matching table

In [6]:
md = pd.read_table('Kang2017/sample_metadata.txt')
md2 = pd.read_excel('Kang2017/41598_2019_42183_MOESM3_ESM.xlsx', engine='openpyxl', skiprows=2)

# obtain age/sex information for the controls
md2 = md2[['Code', 'age', 'gender']]
def g(x):
    if x == 'M':
        return 'male'
    else:
        return 'female'
md2['Sex'] = md2.gender.apply(g)
md2['Age'] = md2['age']
md2 = md2.set_index('Code')[['Age', 'Sex']]
md = pd.merge(md, md2, left_on='host_subject_id', right_on='Code')
md['Sex'] = md['Sex_y']
md['Age'] = md['Age_y']
md['week'] = md['weeks-since-experiment-start']
# age sex matching
age_sex_md = md.dropna(subset=['Age', 'Sex', 'Status'])
age_sex_md_0 = age_sex_md.loc[age_sex_md['week'] == 0]
match_ids = _matchmaker(age_sex_md_0, 'Status', ['Age', 'Sex'], [False, True])
match_ids = match_ids.dropna().astype(np.int64).apply(lambda x: f'Kang_{x}')
age_sex_md_0['Match_IDs'] = match_ids
age_sex_md_0 = age_sex_md_0.dropna(subset=['Match_IDs'])
age_sex_md_0 = age_sex_md_0.set_index('Run')
# merge metadata
combined_md = pd.read_table('Combined/age_sex_match_metadata.txt', index_col=0)
combined_md0 = pd.concat((age_sex_md_0[combined_md.columns], combined_md), axis=0)
# match biom tables
table = biom.load_table('Combined/age_sex_matched.biom')
kang = biom.load_table('Kang2017/deblur/all.biom')
filter_0 = lambda v, i, m: i in combined_md0.index
kang_0 = kang.filter(filter_0, inplace=False)
# filter out low abundance microbes
filter_obs = lambda v, i, m: np.sum(v > 0) > 3
kang_0.filter(filter_obs, axis='observation')
combined_0 = table.merge(kang_0)
# save biom table and metadata
combined_md0.to_csv('Kang2017/combined_sample_metadata_0.txt', sep='\t')
with biom_open('Kang2017/age_sex_match_week0.biom', 'w') as f:
    combined_0.to_hdf5(f, 'combined_kang_week0')
    
# save biom table with ogu ids
t = amplicon_to_ogu(combined_0, mapping)
with biom_open('Kang2017/age_sex_match_week0_ogu.biom', 'w') as f:
    t.to_hdf5(f, 'combined_kang_week0')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  age_sex_md_0['Match_IDs'] = match_ids


# Before and after perturbation

## Week 0 vs Week 10

In [7]:
end = 10
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')
asd_md = md.loc[md.Status == 'ASD']
asd_md = asd_md[asd_md['collection-method'] == 'stool']
asd_md['week'] = asd_md['weeks-since-experiment-start'] 
week0 = asd_md['weeks-since-experiment-start'] == 0
weeklr = asd_md['weeks-since-experiment-start'] == end
asd_md = asd_md.loc[np.logical_or(week0, weeklr)]
asd_md = asd_md.groupby('host_subject_id').filter(lambda x: len(x) == 2)
filter_asd = lambda v, i, m: i in set(asd_md.index)
kang_asd = kang.filter(filter_asd, inplace=False)
filter_obs = lambda v, i, m: np.sum(v > 0) > 0
#kang_asd.filter(filter_obs, axis='observation')
with biom_open(f'Kang2017/week0_week{end}.biom', 'w') as f:
    kang_asd.to_hdf5(f, f'kang_week0_{end}')
# save OGU table
t = amplicon_to_ogu(kang_asd, mapping)
with biom_open(f'Kang2017/week0_week{end}_ogu.biom', 'w') as f:
    t.to_hdf5(f, f'kang_week0_{end}')
    
asd_md['week'] = asd_md['week'].astype(np.int64)
asd_md['week_time'] = asd_md['week'].apply(lambda x: f'A{x}')
asd_md.loc[kang_asd.ids()].to_csv(f'Kang2017/asd_metadata_w{end}.txt', sep='\t')

## Week 0 vs Week 18

In [8]:
end = 18
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')
asd_md = md.loc[md.Status == 'ASD']
asd_md = asd_md[asd_md['collection-method'] == 'stool']
asd_md['week'] = asd_md['weeks-since-experiment-start'] 
week0 = asd_md['weeks-since-experiment-start'] == 0
weeklr = asd_md['weeks-since-experiment-start'] == end
asd_md = asd_md.loc[np.logical_or(week0, weeklr)]
asd_md = asd_md.groupby('host_subject_id').filter(lambda x: len(x) == 2)
filter_asd = lambda v, i, m: i in set(asd_md.index)
kang_asd = kang.filter(filter_asd, inplace=False)
filter_obs = lambda v, i, m: np.sum(v > 0) > 0
#kang_asd.filter(filter_obs, axis='observation')
with biom_open(f'Kang2017/week0_week{end}.biom', 'w') as f:
    kang_asd.to_hdf5(f, f'kang_week0_{end}')
# save OGU table
t = amplicon_to_ogu(kang_asd, mapping)
with biom_open(f'Kang2017/week0_week{end}_ogu.biom', 'w') as f:
    t.to_hdf5(f, f'kang_week0_{end}')
    
asd_md['week'] = asd_md['week'].astype(np.int64)
asd_md['week_time'] = asd_md['week'].apply(lambda x: f'A{x}')
asd_md.loc[kang_asd.ids()].to_csv(f'Kang2017/asd_metadata_w{end}.txt', sep='\t')

## Week 0 vs week 100

In [9]:
end = 100
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')
asd_md = md.loc[md.Status == 'ASD']
asd_md = asd_md[asd_md['collection-method'] == 'stool']
asd_md['week'] = asd_md['weeks-since-experiment-start'] 
week0 = asd_md['weeks-since-experiment-start'] == 0
weeklr = asd_md['weeks-since-experiment-start'] == end
asd_md = asd_md.loc[np.logical_or(week0, weeklr)]
asd_md = asd_md.groupby('host_subject_id').filter(lambda x: len(x) == 2)
filter_asd = lambda v, i, m: i in set(asd_md.index)
kang_asd = kang.filter(filter_asd, inplace=False)
filter_obs = lambda v, i, m: np.sum(v > 0) > 0
#kang_asd.filter(filter_obs, axis='observation')
with biom_open(f'Kang2017/week0_week{end}.biom', 'w') as f:
    kang_asd.to_hdf5(f, f'kang_week0_{end}')
# save OGU table
t = amplicon_to_ogu(kang_asd, mapping)
with biom_open(f'Kang2017/week0_week{end}_ogu.biom', 'w') as f:
    t.to_hdf5(f, f'kang_week0_{end}')
    
asd_md['week'] = asd_md['week'].astype(np.int64)
asd_md['week_time'] = asd_md['week'].apply(lambda x: f'A{x}')
asd_md.loc[kang_asd.ids()].to_csv(f'Kang2017/asd_metadata_w{end}.txt', sep='\t')

# Week 10 vs Week 18

In [10]:
start = 10
end = 100
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')
asd_md = md.loc[md.Status == 'ASD']
asd_md = asd_md[asd_md['collection-method'] == 'stool']
asd_md['week'] = asd_md['weeks-since-experiment-start'] 
week0 = asd_md['weeks-since-experiment-start'] == start
weeklr = asd_md['weeks-since-experiment-start'] == end
asd_md = asd_md.loc[np.logical_or(week0, weeklr)]
asd_md = asd_md.groupby('host_subject_id').filter(lambda x: len(x) == 2)
filter_asd = lambda v, i, m: i in set(asd_md.index)
kang_asd = kang.filter(filter_asd, inplace=False)
filter_obs = lambda v, i, m: np.sum(v > 0) > 0
#kang_asd.filter(filter_obs, axis='observation')
with biom_open(f'Kang2017/week{start}_week{end}.biom', 'w') as f:
    kang_asd.to_hdf5(f, f'kang_week{start}_{end}')
# save OGU table
t = amplicon_to_ogu(kang_asd, mapping)
with biom_open(f'Kang2017/week0_week{end}_ogu.biom', 'w') as f:
    t.to_hdf5(f, f'kang_week0_{end}')
    
asd_md['week'] = asd_md['week'].astype(np.int64)
asd_md['week_time'] = asd_md['week'].apply(lambda x: f'A{x}')
asd_md.loc[kang_asd.ids()].to_csv(f'Kang2017/asd_metadata_w_{start}w{end}.txt', sep='\t')

## week 18 vs week 100

In [11]:
start = 18
end = 100
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')
asd_md = md.loc[md.Status == 'ASD']
asd_md = asd_md[asd_md['collection-method'] == 'stool']
asd_md['week'] = asd_md['weeks-since-experiment-start'] 
week0 = asd_md['weeks-since-experiment-start'] == start
weeklr = asd_md['weeks-since-experiment-start'] == end
asd_md = asd_md.loc[np.logical_or(week0, weeklr)]
asd_md = asd_md.groupby('host_subject_id').filter(lambda x: len(x) == 2)
filter_asd = lambda v, i, m: i in set(asd_md.index)
kang_asd = kang.filter(filter_asd, inplace=False)
filter_obs = lambda v, i, m: np.sum(v > 0) > 0
#kang_asd.filter(filter_obs, axis='observation')
with biom_open(f'Kang2017/week{start}_week{end}.biom', 'w') as f:
    kang_asd.to_hdf5(f, f'kang_week{start}_{end}')
# save OGU table
t = amplicon_to_ogu(kang_asd, mapping)
with biom_open(f'Kang2017/week{start}_week{end}_ogu.biom', 'w') as f:
    t.to_hdf5(f, f'kang_week{start}_{end}')
    
asd_md['week'] = asd_md['week'].astype(np.int64)
asd_md['week_time'] = asd_md['week'].apply(lambda x: f'A{x}')
asd_md.loc[kang_asd.ids()].to_csv(f'Kang2017/asd_metadata_w_{start}w{end}.txt', sep='\t')

In [12]:
md.groupby('GROUP').mean()

Unnamed: 0_level_0,weeks-since-experiment-start,Age,cars
GROUP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
autism,13.692015,11.086312,35.924242
donor-initial,,,
donor-maintenance,,,
neurotypical,8.223464,,
neurotypical_mom,0.0,,


# Below is scratch work

In [13]:
def filter_f(x):
    return (0 not in x['week']) and (18 not in x['week'])

md_ = md.dropna(subset=['week', 'Age', 'Sex'])

KeyError: ['week']

# Standard Kang dataset against other datasets

In [None]:
def filter_f(x):
    return (0 not in x['week']) and (18 not in x['week'])
# select only week 0 and filter out subjects that didn't collect at least 2 time points
md0['week'] = md0['week'].astype(np.int64)
md0 = md0.groupby(['host_subject_id']).filter(lambda x: len(x) >= 2)
# merge metadata
md = pd.merge(md0, md2[['Code', 'age', 'gender']], left_on='host_subject_id', right_on='Code')
md = md.set_index('sampleid')
md = md[['gender', 'age', 'week', 'host_subject_id', 'experimental_group']]

# perform matching
md_week0 = md.loc[md.week == 0.0]
match_ids = _matchmaker(md_week0, 'experimental_group', ['age', 'gender'], [False, True])
match_ids = match_ids.dropna().astype(np.int64).apply(lambda x: f'Kang_{x}')
md_week0['match_ids'] = match_ids

md = pd.merge(md.reset_index(), md_week0[['host_subject_id', 'match_ids']], 
              left_on='host_subject_id', right_on='host_subject_id')

md = md.dropna()

md = md.rename(columns={
    'gender': 'Sex', 'age': 'Age',
    'experimental_group': 'Status'
})
# sync
def f(x):
    if x == 'autism':
        return 'ASD'
    else:
        return 'Control'
    

md['Status'] = md.Status.apply(f)

md = md.set_index('sampleid')


md['Sex'] = md.Sex.apply(g)

# drop all samples that weren't collected at 0 or 18
idx = np.logical_or(md.week == 0.0, md.week == 18.0)
sub_md = md.loc[idx]
sub_md['Match_IDs'] = sub_md.apply(lambda x: x['match_ids'] + '_' + str(int(x['week'])), axis=1)
# add other information
sub_md['Cohort'] = 'Kang2017'
sub_md['Control_Type'] = 'Intervention'
sub_md['Subjects_Location'] = 'USA'
sub_md['Variable_Region'] = 'V4'
sub_md.to_csv('Kang2017/sample_metadata_matched.txt', sep='\t')

In [None]:
md0.loc[md0['experimental_group'] == 'neurotypical']['week'].value_counts()

# Merge Kang et al with combined metadata table

In [None]:
combined_md = pd.read_table('Combined/age_sex_match_metadata.txt', index_col=0)
combined_md0 = pd.concat((sub_md.loc[sub_md.week == 0, combined_md.columns], combined_md), axis=0)
combined_md18 = pd.concat((sub_md.loc[sub_md.week == 18, combined_md.columns], combined_md), axis=0)
combined_md0.to_csv('Kang2017/combined_sample_metadata_0.txt', sep='\t')
combined_md18.to_csv('Kang2017/combined_sample_metadata_18.txt', sep='\t')

# Synchronize biom tables

In [None]:
import biom

table = biom.load_table('Combined/age_sex_matched.biom')
kang = biom.load_table('Kang2017/deblur/qiita.biom')

filter_0 = lambda v, i, m: i in combined_md0.index
kang_0 = kang.filter(filter_0, inplace=False)

filter_18 = lambda v, i, m: i in combined_md18.index
kang_18 = kang.filter(filter_18, inplace=False)

# filter out low abundance microbes
filter_obs = lambda v, i, m: np.sum(v > 0) > 3
kang_0.filter(filter_obs, axis='observation')
kang_18.filter(filter_obs, axis='observation')

combined_0 = table.merge(kang_0)
combined_18 = table.merge(kang_18)

In [None]:
from biom.util import biom_open

with biom_open('Kang2017/age_sex_match_week0.biom', 'w') as f:
    combined_0.to_hdf5(f, 'combined_kang_week0')
    
with biom_open('Kang2017/age_sex_match_week18.biom', 'w') as f:
    combined_18.to_hdf5(f, 'combined_kang_week18')

# Split by time

We need to compute diff abundance wrt time. 

In [None]:
asd_md = sub_md.loc[sub_md.Status == 'ASD']

filter_asd = lambda v, i, m: i in asd_md.index
kang_asd = kang.filter(filter_asd, inplace=False)

filter_obs = lambda v, i, m: np.sum(v > 0) > 3
kang_asd.filter(filter_obs, axis='observation')

with biom_open('Kang2017/week0_week18.biom', 'w') as f:
    kang_asd.to_hdf5(f, 'kang_week0_18')
    
asd_md.to_csv('Kang2017/asd_metadata.txt', sep='\t')

# Split by donor

In [None]:
md = pd.read_table('Kang2017/sample_metadata.txt')
md = md.set_index('Run')

table = biom.load_table('Kang2017/deblur/all.biom')

In [None]:
table.ids()

In [None]:
md

In [None]:
full_md = pd.merge(md0, md2[['Code', 'age', 'gender']], left_on='host_subject_id', right_on='Code')
full_md = full_md.set_index('sampleid')

In [None]:
full_md.columns

In [None]:
md0['mom_kid'].value_counts()

In [None]:
full_md.host_subject_id.sort_values()

In [None]:
full_md.bbt_donor_id

In [None]:
full_md.maintenance_bbt_donor_id

In [None]:
md2