In [1]:
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization

%matplotlib inline

In [2]:
data = 'Data'

In [3]:
data_classified = 'Data/classified'

In [4]:
data_raw = 'Data/raw'

In [5]:
data_alpha = 'Data/diversity'

In [23]:
! qiime diversity alpha-rarefaction \
    --i-table $data_classified/table-filtered.qza \
    --p-max-depth 10000 \
    --p-iterations 10 \
    --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
    --o-visualization $data_alpha/alpha-rarefaction.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/alpha-rarefaction.qzv[0m
[0m[?25h

In [6]:
Visualization.load(f"{data_alpha}/alpha-rarefaction.qzv")

In [30]:
Visualization.load(f"{data_classified}/table-filtered.qzv")

### we chose a sampling depth of 3000 because it can retain 87% of the samples


***core metrics creation***

In [19]:
! qiime diversity core-metrics \
  --i-table $data_classified/table-filtered.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --p-sampling-depth 3000 \
  --output-dir $data_alpha/core-metrics-results

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Data/diversity/core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/jaccard_pcoa_results.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/bray_curtis_pcoa_results.qza[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/jaccard_emperor.qzv[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/bray_curtis_emperor.qzv

***Test for parametric data: 1) data normally distributed?***

In [7]:
#hier stehen geblieben!! Testen, ob normally distributed!
from scipy.stats import shapiro

shannon_path = f"{data_alpha}/core-metrics-results/shannon_vector.qza"
shannon = q2.Artifact.load(shannon_path).view(pd.Series)

stat, p = shapiro(shannon)
print('Statistics=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print("Daten wahrscheinlich normalverteilt")
else:
    print("Daten wahrscheinlich nicht normalverteilt")

  import pkg_resources


Statistics=0.934, p=0.000
Daten wahrscheinlich nicht normalverteilt


  df[cols] = df[cols].apply(pd.to_numeric, errors='ignore')


***Kruskal-Wallis: Non-parametric testing, because data not normally distributed***

In [20]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/core-metrics-results/shannon-group-significance.qzv[0m
[0m[?25h

In [8]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance.qzv")

In [None]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-numeric.qzv

In [9]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-numeric.qzv")