In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from skbio.stats.distance import permanova
from skbio.stats.distance import DistanceMatrix
from scipy import stats
from scikit_posthocs import posthoc_dunn as dunn
from statsmodels.stats.multitest import multipletests
from itertools import combinations

%matplotlib inline

!mkdir Results/Figures

mkdir: cannot create directory ‘Results/Figures’: File exists


In [None]:
!pip install scikit-posthocs
!pip install scikit-bio

# Alpha diversity

In [1]:
import pandas as pd
def alpha_div(qza):  
  a = !unzip $qza
  digest = a[1].split('/')[0].replace('  inflating: ','')
  inf = digest + '/data/alpha-diversity.tsv'
  data = pd.read_csv(inf, sep='\t',index_col=0)
  !rm -r $digest
  return data 

alpha = pd.read_csv('metadata.tsv', sep='\t', index_col='#SampleID')
diversity = {'observed_features':'Observed ASVs',
             'shannon':'Shannon\'s entropy',
             'faith_pd':'Faith\'s PD',
             'evenness':'Pielou\'s evenness'}

for div in diversity:
  vector = 'Results/Core-metrics/%s_vector.qza' % div
  metric = div
  if div == 'shannon':
    metric = 'shannon_entropy'
  if div == 'evenness':
    metric = 'pielou_evenness'
  alpha_df = alpha_div(vector)
  alpha = pd.concat([alpha, alpha_df[metric]], axis=1)
alpha.index.name = '#SampleID'
alpha = alpha[alpha['faith_pd'].notna()]
alpha['Day_Trt'] = alpha.Day+'_'+alpha.Treatment
alpha.to_csv('Results/Core-metrics/alpha.tsv',sep='\t')

def kw_dunn(df,col,metric,pairwise):
  phoc = pd.DataFrame(columns=['Group1','Group2','p','q'])
  kw = stats.kruskal(*[g[metric].values for n,g in df.groupby(col)])
  if kw[1] < 0.05 and pairwise==True:
    dunn_p = dunn(df,val_col=metric,group_col=col)
    dunn_q = dunn(df,val_col=metric,group_col=col,p_adjust='fdr_bh')
    for i,pair in enumerate(combinations(set(df[col].tolist()),2)):
      phoc.loc[len(phoc)]=[pair[0],pair[1],dunn_p.loc[pair[0],pair[1]],dunn_q.loc[pair[0],pair[1]]]
  return kw,phoc

def color_df(df):
  dfcol = df.style.applymap(lambda x: "background-color: red" if x<0.05 \
  else "background-color: grey",subset=pd.IndexSlice[:, [c for c in df.columns if c in 'pq']])
  return dfcol

diversity = {'observed_features':'Observed ASVs','shannon_entropy':'Shannon\'s entropy',
             'faith_pd':'Faith\'s PD','pielou_evenness':'Pielou\'s evenness'}
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')

### Kruskal-Wallis overall test

In [None]:
diversity = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon\'s entropy'}
cols = ['BS','Day_hour','Treatment','Source','rstc_run']

alpha = alpha.sort_values(['Day_num','BS'],ascending=[True,True])
groups = alpha.Day.unique().tolist()+alpha.BS_Day.unique().tolist()

!mkdir Results/Alpha_comp
summary,i = pd.DataFrame(),0
for div in diversity:
  for g in groups:
    for col in cols:
      data = alpha.loc[alpha.BS_Day==g].copy()
      if col == 'BS' and g in alpha.Day.unique():
        data = alpha.loc[alpha.Day==g].copy()
      df = data.loc[~data[col].str.contains('not_appl')].copy()
      if len(df[col].unique()) < 2: continue
      kw,phoc = kw_dunn(df,col,div,pairwise=False)
      summary.loc[i,'Metric'] = div
      summary.loc[i,'Group'] = g
      summary.loc[i,'Column'] = col
      summary.loc[i,'KWstats'] = round(kw[0],5)
      summary.loc[i,'p'] = round(kw[1],5)
      if kw[1] > 0.05:
        summary.loc[i,'Comment'] = 'no_pairwise'
      if kw[1] <= 0.1:
        summary.loc[i,'Comment'] = 'trend'
      if kw[1] <= 0.05:
        summary.loc[i,'Comment'] = 'pairwise'
      if kw[1] <= 0.05 and len(df[col].unique()) == 2:
        summary.loc[i,'Comment'] = 'p = %s'%round(kw[1],5)
      i += 1
summary.to_csv('Results/Alpha_comp/Summary_KW_all.tsv',sep='\t')
disp = summary.copy().drop(['KWstats','Comment'],axis=1).set_index('Metric')
display(disp.loc[disp.p<=0.1])

### Plot pairwise sample types within days

In [None]:
from statannot import add_stat_annotation
sns.set_style("darkgrid",{"grid.linewidth":0.005})

!mkdir Results/Figures/alpha_diversity
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
alpha = alpha.sort_values(['Day_num','BS'],ascending=[True,True])
groups = alpha.Day.unique()
ratios = [len(alpha.loc[alpha.Day==g].BS.unique()) for g in groups]
cdict = {'RF':'yellow','RSP':'grey','E':'red','FL':'green','FR':'blue','SAM':'orange'}
alphas = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon entropy'}

fig, axes = plt.subplots(len(alphas),len(groups),dpi=600,figsize=(1,2),sharey='row',sharex='col',\
    gridspec_kw={'hspace':0.05,'wspace':0.08,'width_ratios':ratios})
axs,i = [[n,m] for n in range(len(alphas)) for m in range(len(groups))],0

for alp in alphas:
  for g in groups:
    data = alpha.loc[(alpha.Day==g)].copy()
    ax, i = axes[axs[i][0]][axs[i][1]], i+1
    sns.boxplot(x='BS',y=alp,data=data,ax=ax,linewidth=0.25,fliersize=0.5,palette=cdict)
    ax.tick_params(axis='both', labelsize=3, length=1, pad=1, width=0.5, direction='inout')
    ax.tick_params(axis='x',labelsize=3,labelrotation = 90)
    ax.set_ylabel('')
    ax.set_xlabel('')
    if g == groups[0]: ax.set_ylabel(alphas[alp],fontsize=4,labelpad=1)
    if alp == 'shannon_entropy': ax.set_xlabel(g.replace('_',' '),fontsize=3.5,labelpad=1)
    ax.set_ylim(auto=True)
    # Statistics
    qs = {}
    kw,phoc = kw_dunn(data.copy(),'BS',alp,pairwise=True)
    ax.text(0.5,.95,'p '+str(round(kw[1],3)),size=2,transform=ax.transAxes,horizontalalignment='center')
    if kw[1] <= 0.05:
      for j,q in enumerate(phoc['q']):
        if q <= 0.05:
          qs.update({(phoc.iloc[j]['Group1'],phoc.iloc[j]['Group2']):q})
      phoc.to_csv('Results/Alpha_comp/Dunn_pairs_%s-%s_byDays.tsv'%(g,alp),sep='\t')
      print(g,alp)
      display(color_df(phoc))
    if len(qs) == 0: continue
    x,y = 'BS',alp
    order = data.BS.unique().tolist()
    add_stat_annotation(ax,x=x,y=y,box_pairs=list(qs.keys()),pvalues=list(qs.values()),line_height=0.01,\
                        data=data,fontsize=3,loc='inside',perform_stat_test=False,order=order,\
                        text_offset=-0.8,line_offset=0,line_offset_to_box=0.01,text_format='star',linewidth=0.3)
fig.align_labels()
plt.savefig('Results/Figures/alpha_diversity/Shannon-faithpd-byDays.png', bbox_inches='tight')

### Plot pairwise treatments with sample types

In [None]:
from statannot import add_stat_annotation
sns.set_style("darkgrid",{"grid.linewidth":0.005})

!mkdir Results/Figures/alpha_diversity
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
alpha = alpha.sort_values(['Day_num','BS'],ascending=[True,True])
groups = alpha.BS_Day.unique()
ratios = [len(alpha.loc[alpha.BS_Day==g].Treatment.unique()) for g in groups]
cdict = {'Trt1':'grey','Trt2':'yellow','Trt3':'red','Trt4':'orange',\
         'Trt5':'green','Trt6':'blue','not_appl':'whitesmoke'}
alphas = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon entropy'}

fig, axes = plt.subplots(len(alphas),len(groups),dpi=600,figsize=(2.5,2),sharey='row',sharex='col',\
    gridspec_kw={'hspace':0.02,'wspace':0.02,'width_ratios':ratios})
axs,i = [[n,m] for n in range(len(alphas)) for m in range(len(groups))],0

for alp in alphas:
  for g in groups:
    data = alpha.loc[(alpha.BS_Day==g)].copy()
    ax, i = axes[axs[i][0]][axs[i][1]], i+1
    sns.boxplot(x='Treatment',y=alp,data=data,ax=ax,linewidth=0.25,fliersize=0.5,palette=cdict)
    ax.tick_params(axis='both', labelsize=3, length=1, pad=1, width=0.5, direction='inout')
    ax.tick_params(axis='x',labelsize=3,labelrotation = 90)
    ax.set_ylabel('')
    ax.set_xlabel('')
    if g == groups[0]: ax.set_ylabel(alphas[alp],fontsize=4,labelpad=1)
    if alp == 'shannon_entropy': ax.set_xlabel(g.replace('_',' '),fontsize=3.5,labelpad=1)
    labels = [item.get_text() for item in ax.get_xticklabels()]
    labels = [item if 'not_appl' not in item else g.split('_')[0] for item in labels]
    ax.set_xticklabels(labels)
    ax.set_ylim(auto=True)
    # Statistics
    if g in groups[:2]: 
      ax.set_xlabel('')
      continue
    qs = {}
    kw,phoc = kw_dunn(data.copy(),'Treatment',alp,pairwise=True)
    ax.text(0.5,.95,'p '+str(round(kw[1],3)),size=2,transform=ax.transAxes,horizontalalignment='center')
    if kw[1] <= 0.05:
      for j,q in enumerate(phoc['q']):
        if q <= 0.05:
          qs.update({(phoc.iloc[j]['Group1'],phoc.iloc[j]['Group2']):q})
      phoc.to_csv('Results/Alpha_comp/Dunn_pairs_%s-%s_byBS.tsv'%(g,alp),sep='\t')
      print(g,alp)
      display(color_df(phoc))
    if len(qs) == 0: continue
    x,y = 'Treatment',alp
    add_stat_annotation(ax,x=x,y=y,box_pairs=list(qs.keys()),pvalues=list(qs.values()),line_height=0.01,\
                        data=data,fontsize=3,loc='inside',perform_stat_test=False,order=list(cdict.keys()),\
                        text_offset=-0.8,line_offset=0,line_offset_to_box=0.01,text_format='star',linewidth=0.3)
fig.align_labels()
plt.savefig('Results/Figures/alpha_diversity/Shannon-faithpd-byBS.png', bbox_inches='tight')

### Plot sample types within treatments

In [None]:
from statannot import add_stat_annotation
sns.set_style("darkgrid",{"grid.linewidth":0.005})

!mkdir Results/Figures/alpha_diversity
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
alpha = alpha.sort_values(['Day_num','Treatment','BS'],ascending=[True,True,True])
no_d0 = alpha.loc[alpha.Day != 'd0']
groups = no_d0.Day_Trt.unique()
ratios = [len(no_d0.loc[no_d0.Day_Trt==g].BS.unique()) for g in groups]
cdict = {'FR':'grey','E':'red','SAM':'green','FL':'blue','not_appl':'whitesmoke'}
alphas = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon entropy'}

fig, axes = plt.subplots(len(alphas),len(groups),dpi=600,figsize=(2.5,2),sharey='row',sharex='col',\
    gridspec_kw={'hspace':0.02,'wspace':0.1,'width_ratios':ratios})
axs,i = [[n,m] for n in range(len(alphas)) for m in range(len(groups))],0

for alp in alphas:
  for g in groups:
    data = no_d0.loc[(no_d0.Day_Trt==g)].copy()
    ax, i = axes[axs[i][0]][axs[i][1]], i+1
    sns.boxplot(x='BS',y=alp,data=data,ax=ax,linewidth=0.25,fliersize=0.5,palette=cdict)
    ax.tick_params(axis='both', labelsize=3, length=1, pad=1, width=0.5, direction='inout')
    ax.tick_params(axis='x',labelsize=3,labelrotation = 90)
    ax.set_ylabel('')
    ax.set_xlabel('')
    if g == groups[0]: ax.set_ylabel(alphas[alp],fontsize=4,labelpad=1)
    if alp == 'shannon_entropy': ax.set_xlabel(g.replace('_',' '),fontsize=3,labelpad=1,rotation=45)
    ax.set_ylim(auto=True)
    ax.set_xlim(auto=True)    
    # Statistics
    qs = {}
    kw,phoc = kw_dunn(data.copy(),'BS',alp,pairwise=True)
    ax.text(0.5,.95,'p '+str(round(kw[1],3)),size=2,transform=ax.transAxes,horizontalalignment='center')
    if kw[1] <= 0.05:
      for j,q in enumerate(phoc['q']):
        if q <= 0.05:
          qs.update({(phoc.iloc[j]['Group1'],phoc.iloc[j]['Group2']):q})
      phoc.to_csv('Results/Alpha_comp/Dunn_pairs_%s-%s_byTrt.tsv'%(g,alp),sep='\t')
      print(g,alp)
      display(color_df(phoc))
    if len(qs) == 0: continue
    x,y = 'BS',alp
    add_stat_annotation(ax,x=x,y=y,box_pairs=list(qs.keys()),pvalues=list(qs.values()),line_height=0.01,\
                        data=data,fontsize=3,loc='inside',perform_stat_test=False,\
                        text_offset=-0.8,line_offset=0,line_offset_to_box=0.01,text_format='star',linewidth=0.3)
    ax.set_ylim(auto=True)
fig.align_labels()
plt.savefig('Results/Figures/alpha_diversity/Shannon-faithpd-byTrt.png', bbox_inches='tight')

In [None]:
from statannot import add_stat_annotation
sns.set_style("darkgrid",{"grid.linewidth":0.005})

!mkdir Results/Figures/alpha_diversity
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
alpha = alpha.sort_values(['Day_num','Treatment','BS'],ascending=[True,True,True])
no_d0 = alpha.loc[alpha.Day == 'd13']
groups = no_d0.Day_Trt.unique()
ratios = [len(no_d0.loc[no_d0.Day_Trt==g].BS.unique()) for g in groups]
cdict = {'FR':'grey','E':'red','SAM':'green','FL':'blue','not_appl':'whitesmoke'}
alphas = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon entropy'}

fig, axes = plt.subplots(len(alphas),len(groups),dpi=600,figsize=(1.5,2),sharey='row',sharex='col',\
    gridspec_kw={'hspace':0.02,'wspace':0.1,'width_ratios':ratios})
axs,i = [[n,m] for n in range(len(alphas)) for m in range(len(groups))],0

for alp in alphas:
  for g in groups:
    data = no_d0.loc[(no_d0.Day_Trt==g)].copy()
    ax, i = axes[axs[i][0]][axs[i][1]], i+1
    sns.boxplot(x='BS',y=alp,data=data,ax=ax,linewidth=0.25,fliersize=0.5,palette=cdict)
    ax.tick_params(axis='both', labelsize=3, length=1, pad=1, width=0.5, direction='inout')
    ax.tick_params(axis='x',labelsize=3,labelrotation = 90)
    ax.set_ylabel('')
    ax.set_xlabel('')
    if g == groups[0]: ax.set_ylabel(alphas[alp],fontsize=4,labelpad=1)
    if alp == 'shannon_entropy': ax.set_xlabel(g.replace('_',' '),fontsize=3,labelpad=1)
    ax.set_ylim(auto=True)
    ax.set_xlim(auto=True)    
    # Statistics
    qs = {}
    kw,phoc = kw_dunn(data.copy(),'BS',alp,pairwise=True)
    ax.text(0.5,.95,'p '+str(round(kw[1],3)),size=2,transform=ax.transAxes,horizontalalignment='center')
    if kw[1] <= 0.05:
      for j,q in enumerate(phoc['q']):
        if q <= 0.05:
          qs.update({(phoc.iloc[j]['Group1'],phoc.iloc[j]['Group2']):q})
      phoc.to_csv('Results/Alpha_comp/Dunn_pairs_%s-%s_byTrt_d13.tsv'%(g,alp),sep='\t')
      print(g,alp)
      display(color_df(phoc))
    if len(qs) == 0: continue
    x,y = 'BS',alp
    add_stat_annotation(ax,x=x,y=y,box_pairs=list(qs.keys()),pvalues=list(qs.values()),line_height=0.01,\
                        data=data,fontsize=3,loc='inside',perform_stat_test=False,\
                        text_offset=-0.8,line_offset=0,line_offset_to_box=0.01,text_format='star',linewidth=0.3)
    ax.set_ylim(auto=True)
fig.align_labels()
plt.savefig('Results/Figures/alpha_diversity/Shannon-faithpd-byTrt_d13.png', bbox_inches='tight')

In [None]:
from statannot import add_stat_annotation
sns.set_style("darkgrid",{"grid.linewidth":0.01})
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
diversity = {'faith_pd':'Faith\'s PD','shannon_entropy':'Shannon\'s entropy'}
fig, axes = plt.subplots(1,2,dpi=600,figsize=(1,0.8),gridspec_kw={'wspace':0.3})

for i,alp in enumerate(diversity):
  for d in ['d13']:
    data = alpha.loc[(alpha.Day==d)&(alpha.BS=='FR')].copy()
    data['hour'] = data['Day_hour'].str[-3:]
    data = data.sort_values('hour')
    ax = axes[i]
    sns.boxplot(x='hour',y=metric,data=data,ax=ax,linewidth=0.25,fliersize=0.5,color='whitesmoke')
    ax.tick_params(axis='both', labelsize=3, length=1, pad=1, width=0.5, direction='inout')
    ax.tick_params(axis='x',labelsize=3,labelrotation = 90)
    ax.set_ylabel('')
    ax.set_xlabel(diversity[alp],fontsize=3.5,labelpad=2)

    # Statistics
    div = metric
    kw,phoc1 = kw_dunn(data,'hour',alp,pairwise=True)
    if kw[1] < 0.05:
      #add stats annotations from Permanova analysis
      qs = {}
      phoc1['G1'] = phoc1['Group1'].str[-3:]
      phoc1['G2'] = phoc1['Group2'].str[-3:]
      for j,q in enumerate(phoc1['q']):
        print(q)
        if q < 0.05:
          qs.update({(phoc1.iloc[j]['G1'],phoc1.iloc[j]['G2']):q})
      if len(qs) > 0:
          x, y = 'hour',metric
          add_stat_annotation(ax,data=data,x=x,y=y,box_pairs=list(qs.keys()),pvalues=list(qs.values()),linewidth=0.3,\
                              line_height=0.01,fontsize=3,text_format='star',loc='inside',perform_stat_test=False,\
                              text_offset=0,line_offset=0,line_offset_to_box=0.01)

fig.align_labels()
!mkdir Results/Figures/alpha_diversity
plt.savefig('Results/Figures/alpha_diversity/Shannon-faithpd-FRhour.png', bbox_inches='tight')

# Beta diversity

In [19]:
!conda install -y -c bioconda git
!git clone https://github.com/Auerilas/ecopy.git ecopy_source
!pip install ecopy_source/
!rm -f ecopy_source

In [2]:
import numpy as np
import pandas as pd
import ecopy as ep

def beta_div(qza):  
  a = !unzip $qza
  digest = a[1].split('/')[0].replace('  inflating: ','')
  inf = digest + '/data/distance-matrix.tsv'
  data = pd.read_csv(inf, sep='\t',index_col=0)
  !rm -r $digest
  return data 

def ecopy_nmds(dist,col):
  sns.set_style("ticks")
  !mkdir Results/Biplots/Filtered
  MDS = ep.MDS(dist,naxes=2,transform='monotone')
  stress=round(MDS.stress,3)
  scores_df=pd.DataFrame(MDS.scores,index=dist.columns)
  scores_df.columns = ['NMDS1','NMDS2']
  met = pd.read_csv('metadata.tsv', sep='\t', index_col='#SampleID')
  scores_df['BS']=[met.loc[n,'BS'] for n in dist.index]
  scores_df['Day']=[met.loc[n,'Day_num'] for n in dist.index]
  scores_df['Trt']=[met.loc[n,'Treatment'] for n in dist.index]
  scores_df['HP']=[met.loc[n,'Source'] for n in dist.index]
  mdict = {'E':'^', 'FL':'o', 'FR':'X', 'RF':'d', 'RSP':'s', 'SAM':'*'}
  hue,s = 'Trt','Day'
  cdict = {'Trt1':'grey','Trt2':'yellow','Trt3':'red','Trt4':'orange',\
           'Trt5':'green','Trt6':'blue','not_appl':'brown'}
  if 'Src' in col:
    hue = 'HP'
    cdict = {'HP1':'red','HP2':'blue','not_appl':'white'}   
  elif 'Dh' in col or 'Day_hour' in col: 
    s = 'Hour'
    scores_df['Hour']=[int(met.loc[n,'Day_hour'][-2:]) for n in dist.index]
  fig,ax=plt.subplots(figsize=(2.5,2.5),dpi=300)
  sns.scatterplot(x='NMDS1',y='NMDS2',data=scores_df,ax=ax,size=s,style='BS',\
                  markers=mdict,hue=hue,palette=cdict)
  ax.legend(bbox_to_anchor=(1,1),loc=2,fontsize=6,title=None,frameon=False,markerscale=0.7,\
           borderpad=0,handletextpad=0.1)
  ax.tick_params(axis='both', length=0, pad=0)       
  ax.set_xticklabels([])
  ax.set_yticklabels([])
  ax.set_ylabel('NMDS2',fontdict={'fontsize': 6})
  ax.set_xlabel('NMDS1',fontdict={'fontsize': 6})
  ax.text(0.01,0.01,'Stress '+str(stress),size=4,transform=ax.transAxes)
  
def to_matrix(df1,df2):
  df1 = df1[df1.index.isin(df2.index)]
  df1 = df1[df2.index]
  df1.sort_index(inplace=True)
  df1 = df1.reindex(sorted(df1.columns),axis=1)
  matrix = DistanceMatrix(df1,ids=df1.index)
  return matrix, df1

def permanova_pairwise(df,col,met_d,pairwise):
  matrix, df1 = to_matrix(df.copy(),met_d)
  perm = permanova(matrix,met_d,column=col,permutations=999)
  perm_pair = pd.DataFrame(columns=['Group1','Group2','p'])
  if perm[5] < 0.05 and pairwise==True:
    for pair in combinations(set(met_d[col].tolist()),2):
      metapairs = met_d.loc[(met_d[col]==pair[0])|(met_d[col]==pair[1])].copy()
      matrix2 = to_matrix(df.copy(),metapairs)[0]
      pp = permanova(matrix2,metapairs,column=col,permutations=999)[5]
      perm_pair.loc[len(perm_pair)]=[pair[0],pair[1],pp]
    perm_pair['q'] = multipletests(perm_pair.p,method='fdr_bh')[1]      
  return perm, perm_pair, df1

def mk_biplot(distances,group,div):
  distances.to_csv('Results/Biplots/Filtered/%s_%s_distances.tsv'%(div,group),sep='\t')
  dist = 'Results/Biplots/Filtered/%s_%s_distances.tsv'%(div,group)
  matr = 'Results/Biplots/Filtered/%s_%s_distances.qza'%(div,group)
  pcoa = 'Results/Biplots/Filtered/%s_%s_pcoa.qza'     %(div,group)
  relt = 'Data/Relative_tables/%s_%s_rel_table.qza'    %(div,group)
  bipl = 'Results/Biplots/Filtered/%s_%s_biplot.qza'   %(div,group)
  bipv = 'Results/Biplots/Filtered/%s_%s_biplot.qzv'   %(div,group)
          
  !qiime tools import \
    --input-path $dist \
    --output-path $matr \
    --type DistanceMatrix
  !qiime diversity pcoa \
    --i-distance-matrix $matr \
    --o-pcoa $pcoa
          
  distances.index.name = '#SampleID'
  distances.to_csv('Results/Biplots/Filtered/%s_%s_distances.tsv'%(div,group),sep='\t')
          
  !qiime feature-table filter-samples \
    --i-table Data/Relative_tables/full-relative_table.qza \
    --m-metadata-file $dist \
    --o-filtered-table $relt
  !qiime diversity pcoa-biplot \
    --i-pcoa $pcoa \
    --i-features $relt \
    --o-biplot $bipl
  !qiime emperor biplot \
    --i-biplot $bipl \
    --m-sample-metadata-file metadata.tsv \
    --p-number-of-features 5 \
    --o-visualization $bipv
  
def color_df(df):
  dfcol = df.style.applymap(lambda x: "background-color: red" if x<=0.05 \
  else "background-color: grey",subset=pd.IndexSlice[:, [c for c in df.columns if c in 'pq']])
  return dfcol

def tovector(df,col,meta):
  df = df.stack().reset_index().rename(columns={'level_0':'Sample1','level_1':'Sample2', 0:'Distance'}).copy()
  for i in meta.index:
    df.loc[df.Sample1==i,'Group1'] = meta.loc[i,col]
    df.loc[df.Sample2==i,'Group2'] = meta.loc[i,col]
  return(df)

meta = pd.read_csv('Results/Core-metrics/alpha.tsv', sep='\t', index_col='#SampleID')
diversity = {'jaccard':'Jaccard','bray_curtis':'Bray-Curtis'}
alpha = pd.read_csv('Results/Core-metrics/alpha.tsv',sep='\t',index_col='#SampleID')
!mkdir Results/Biplots/Filtered

### Permanova overall test

In [None]:
cols = ['BS','Day_hour','Treatment','Source','rstc_run']

!mkdir Results/Beta_comp
meta = meta.sort_values(['Day_num','BS'],ascending=[True,True])
groups = meta.Day.unique().tolist()+meta.BS_Day.unique().tolist()
summary,i = pd.DataFrame(),0
def summarize():
  summary.loc[i,'Metric'] = div
  summary.loc[i,'Group'] = g
  summary.loc[i,'Column'] = col
  summary.loc[i,'Perm.'] = str(perm[6])
  summary.loc[i,'PERMstats'] = round(perm[4],5)
  summary.loc[i,'p'] = round(perm[5],5)
  if perm[5] > 0.05:
    summary.loc[i,'Comment'] = 'no_pairwise'
  if perm[5] <= 0.1:
    summary.loc[i,'Comment'] = 'trend'
  if perm[5] <= 0.05:
    summary.loc[i,'Comment'] = 'pairwise'
  if perm[5] <= 0.05 and len(met_d[col].unique()) == 2:
    summary.loc[i,'Comment'] = 'p = %s'%round(perm[5],5)
  
for div in diversity:
  data = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%div)
  col,g,met_d = 'Day','All',meta.copy()
  perm,perm_pair,distances = permanova_pairwise(data.copy(),col,meta,pairwise=False)
  summarize()
  i += 1
  for g in groups:
    for col in cols:
      met_d = meta.loc[((meta.BS_Day==g)&(~meta[col].str.contains('not_appl')))].copy()
      if col == 'BS' and g in alpha.Day.unique():
        met_d = meta.loc[meta.Day==g].copy()
      if len(met_d[col].unique()) < 2: continue
      perm,perm_pair,distances = permanova_pairwise(data.copy(),col,met_d,pairwise=False)
      summarize()
      i += 1
summary.to_csv('Results/Beta_comp/Summary_Permanova_all.tsv',sep='\t')
display(color_df(summary))

### NMDS by sample-types

In [None]:
for div in diversity:
  print(div)
  data = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%div)
  met_d = meta.copy()
  perm,perm_pair,distances = permanova_pairwise(data.copy(),'BS_Day',met_d,pairwise=True)
  display(perm)
  display(color_df(perm_pair))
  ecopy_nmds(distances,'BS')
  plt.savefig('Results/Figures/NMDS/All_%s_%s.png'%(div,'BS'), bbox_inches='tight') 
  plt.show()

In [None]:
cols = ['Day_hour','BS_Trt']
days = ['d0','d7','d13']
!mkdir Results/Figures/NMDS Results/Biplots/Filtered 

for div in diversity:
  data = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%div)
  for d in days:
    for col in cols:
      met_d = meta.loc[((meta.Day==d)&(~meta[col].str.contains('not_appl')))].copy()
      if len(met_d[col].unique()) < 2: continue
      perm,perm_pair,distances = permanova_pairwise(data.copy(),col,met_d,pairwise=False)
      if col == 'BS_Trt':
        mk_biplot(distances,d,div,col)
        ecopy_nmds(distances,col)
        plt.savefig('Results/Figures/NMDS/%s_%s_%s.png'%(d,div,col), bbox_inches='tight') 
        plt.show()
      if d == 'd13' and col=='Day_hour':
        mk_biplot(distances,d,div,col)
        ecopy_nmds(distances,col)
        plt.savefig('Results/Figures/NMDS/%s_%s_%s.png'%(d,div,col), bbox_inches='tight') 
        plt.show()

### Pairwise differences between sample-types by days

In [None]:
meta = meta.sort_values(['Day_num','BS'],ascending=[True,True])
groups = meta.Day.unique().tolist()

for bet in diversity:
  beta = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%bet)
  for g in groups:
    data = alpha.loc[(alpha.Day==g)].copy()
    perm,perm_pair,distances = permanova_pairwise(beta,'BS',data,pairwise=True)
    perm.to_csv('Results/Beta_comp/Perm_%s-%s_BSbyDays.tsv'%(g,bet),sep='\t')
    perm_pair.to_csv('Results/Beta_comp/Perm_pairs_%s-%s_BSbyDays.tsv'%(g,bet),sep='\t')
    print(g,bet,'******************************************')
    display(perm)
    if perm[5] <= 0.05:
      display(color_df(perm_pair))

### Pairwise treatments with sample types

In [None]:
meta = meta.sort_values(['Day_num','BS'],ascending=[True,True])
groups = meta.BS_Day.unique().tolist()

for bet in diversity:
  beta = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%bet)
  for g in groups:
    data = alpha.loc[(alpha.BS_Day==g)].copy()
    if g in groups[:2]: 
      continue
    perm,perm_pair,distances = permanova_pairwise(beta,'Treatment',data,pairwise=True)
    perm.to_csv('Results/Beta_comp/Perm_%s-%s_byBs.tsv'%(g,bet),sep='\t')
    perm_pair.to_csv('Results/Beta_comp/Perm_pairs_%s-%s_byBs.tsv'%(g,bet),sep='\t')
    print(g,bet,'******************************************')
    display(perm)
    if perm[5] <= 0.05:
      display(color_df(perm_pair))

In [None]:
groups = meta.BS_Day.unique().tolist()

for bet in diversity:
  beta = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%bet)
  for g in groups:
    data = meta.loc[(meta.BS_Day==g)].copy()
    if 'd0' in  g: continue
    perm,perm_pair,distances = permanova_pairwise(beta,'Treatment',data,pairwise=True)
    mk_biplot(distances,g,bet)
    
    dist = 'Results/Biplots/Filtered/%s_%s_distances.qza'%(bet,g)
    vizn = 'Results/Beta_comp/%s_%s_Trt_permanova.qzv'%(bet,g)
    !qiime diversity beta-group-significance \
      --i-distance-matrix $dist \
      --m-metadata-file metadata.tsv \
      --m-metadata-column Treatment \
      --p-method 'permanova' \
      --p-pairwise \
      --o-visualization $vizn
    
    if g == 'FR_d13':
      vizn = 'Results/Beta_comp/%s_%s_Hours_permanova.qzv'%(bet,g)
      !qiime diversity beta-group-significance \
        --i-distance-matrix $dist \
        --m-metadata-file metadata.tsv \
        --m-metadata-column Day_hour \
        --p-method 'permanova' \
        --p-pairwise \
        --o-visualization $vizn

### Sample types within treatments

In [None]:
meta = meta.sort_values(['Day_num','BS'],ascending=[True,True])
groups = meta.Day_Trt.unique().tolist()

for bet in diversity:
  beta = beta_div('Results/Core-metrics/%s_distance_matrix.qza'%bet)
  for g in groups:
    data = alpha.loc[(alpha.Day_Trt==g)].copy()
    if g in groups[:2]: 
      continue
    perm,perm_pair,distances = permanova_pairwise(beta,'BS',data,pairwise=True)
    perm.to_csv('Results/Beta_comp/Perm_%s-%s_byTrt.tsv'%(g,bet),sep='\t')
    perm_pair.to_csv('Results/Beta_comp/Perm_pairs_%s-%s_byTrt.tsv'%(g,bet),sep='\t')
    print(g,bet,'******************************************')
    display(perm)
    if perm[5] <= 0.05:
      display(color_df(perm_pair))