In [1]:
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization

%matplotlib inline

In [2]:
data = 'Data'

In [3]:
data_classified = 'Data/classified'

In [4]:
data_raw = 'Data/raw'

In [5]:
data_alpha = 'Data/diversity'

### 1) Alpha rarefaction: choose the sampling depth ###

In [23]:
! qiime diversity alpha-rarefaction \
    --i-table $data_classified/table-filtered.qza \
    --p-max-depth 10000 \
    --p-iterations 10 \
    --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
    --o-visualization $data_alpha/alpha-rarefaction.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/alpha-rarefaction.qzv[0m
[0m[?25h

In [9]:
Visualization.load(f"{data_alpha}/alpha-rarefaction.qzv")

In [30]:
Visualization.load(f"{data_classified}/table-filtered.qzv")

### 2) we chose a sampling depth of 3000 because it can retain 87% of the samples


### 3) Core metrics creation with sampling depth 3000 ###

In [19]:
! qiime diversity core-metrics \
  --i-table $data_classified/table-filtered.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --p-sampling-depth 3000 \
  --output-dir $data_alpha/core-metrics-results

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Data/diversity/core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/jaccard_pcoa_results.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/bray_curtis_pcoa_results.qza[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/jaccard_emperor.qzv[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/bray_curtis_emperor.qzv

### 4) Test data for normal distribution (parametric testing) ###

In [7]:
#hier stehen geblieben!! Testen, ob normally distributed!
from scipy.stats import shapiro

shannon_path = f"{data_alpha}/core-metrics-results/shannon_vector.qza"
shannon = q2.Artifact.load(shannon_path).view(pd.Series)

stat, p = shapiro(shannon)
print('Statistics=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print("Daten wahrscheinlich normalverteilt")
else:
    print("Daten wahrscheinlich nicht normalverteilt")

  import pkg_resources


Statistics=0.934, p=0.000
Daten wahrscheinlich nicht normalverteilt


  df[cols] = df[cols].apply(pd.to_numeric, errors='ignore')


### 5) Kruskal-Wallis: Non-parametric testing ###

### 5.1) Kruskal-Wallis for the metadata ITS: ###

In [20]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/core-metrics-results/shannon-group-significance.qzv[0m
[0m[?25h

In [10]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance.qzv")

### 5.2) Kruskal-Wallis for the metadata personal environment: ###

**But first, merge the two metadata files**

In [11]:
# ITS-Metadaten (Hauptquelle für IDs)
its = pd.read_csv("Data/raw/20250913_metadata_ITS.tsv", sep="\t")

# Sensory-Metadaten
sensory = pd.read_csv("Data/raw/20250914_metadata_personal_environmental_sensory_details.tsv", sep="\t")

# Merge: alle ITS-Samples behalten, Sensory optional
merged = its.merge(
    sensory,
    left_on='person-id',
    right_on='person-id',
    how='left'
)
# Speichern
merged.to_csv("Data/raw/merged_output.tsv", sep="\t", index=False)


left_on='person-id' → Wir nehmen die IDs aus der ITS-Tabelle.
right_on='sample-id' → Wir vergleichen sie mit den IDs aus der Sensory-Tabelle.
how='left' → Wir behalten alle ITS-Samples, auch wenn es keinen passenden Eintrag in Sensory gibt.
Ergebnis: Eine neue Tabelle (merged), die alle ITS-Samples enthält, und Sensory-Daten dort hinzufügt, wo sie verfügbar sind.

Wichtiger Punkt:

Wenn ein Sample keine Sensory-Daten hat, sind die entsprechenden Spalten NaN (leer).

In [68]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/merged_output.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-environment.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/core-metrics-results/shannon-group-significance-environment.qzv[0m
[0m[?25h

In [6]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-environment.qzv")

### 6) Alpha Correlation ###

### 6.1) Alpha Correlation for the metadata ITS ###

In [None]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-numeric.qzv

In [6]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-numeric.qzv")

### ???????
2)nochmals eine neue merged version machen, die gefiltert ist und nur numerische drin lassen un dann nochmals probieren.***
1) schauen, ob der shannon vector auch 550 zeilen hat?
oder alles direkt in python das machen oder ***


### 6.1) Alpha Correlation for the metadata personal environment: ###

In [7]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/merged_output.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-numeric-environment.qzv

  import pkg_resources
[31m[1mPlugin error from diversity:

  [Errno 2] No such file or directory: '/tmp/qiime2-temp-v03wym7k/column-SILAGE/FERMENTED%20GRASS_D28.jsonp'

Debug info has been saved to /tmp/qiime2-q2cli-err-7c272kp1.log[0m
[0m[?25h

In [8]:
! qiime diversity alpha-correlation --help


Usage: [94mqiime diversity alpha-correlation[0m [OPTIONS]

  Determine whether numeric sample metadata columns are correlated with alpha
  diversity.

[1mInputs[0m:
  [94m[4m--i-alpha-diversity[0m ARTIFACT [32mSampleData[AlphaDiversity][0m
                         Vector of alpha diversity values by sample.
                                                                    [35m[required][0m
[1mParameters[0m:
  [94m[4m--m-metadata-file[0m METADATA...
    (multiple            The sample metadata.
     arguments will be   
     merged)                                                        [35m[required][0m
  [94m--p-method[0m TEXT [32mChoices('spearman', 'pearson')[0m
                         The correlation test to be applied.
                                                         [35m[default: 'spearman'][0m
  [94m--p-intersect-ids[0m / [94m--p-no-intersect-ids[0m
                         If supplied, IDs that are not found in both the
                   

In [73]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-numeric-environment.qzv")

ValueError: Data/diversity/core-metrics-results/shannon-group-significance-numeric-environment.qzv does not exist.

***to do: look at csv between pariwise groups of Kruskal-Wallis in pandas***