In [13]:
import os
import pandas as pd
import qiime2 as q2
from qiime2 import Visualization

%matplotlib inline

In [14]:
data = 'Data'

In [15]:
data_classified = 'Data/classified'

In [16]:
data_raw = 'Data/raw'

In [55]:
data_alpha = 'Data/diversity'

### 1) Alpha rarefaction: choose the sampling depth ###

In [23]:
! qiime diversity alpha-rarefaction \
    --i-table $data_classified/table-filtered.qza \
    --p-max-depth 10000 \
    --p-iterations 10 \
    --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
    --o-visualization $data_alpha/alpha-rarefaction.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/alpha-rarefaction.qzv[0m
[0m[?25h

In [9]:
Visualization.load(f"{data_alpha}/alpha-rarefaction.qzv")

In [30]:
Visualization.load(f"{data_classified}/table-filtered.qzv")

### 2) we chose a sampling depth of 3000 because it can retain 87% of the samples


### 3) Core metrics creation with sampling depth 3000 ###

In [19]:
! qiime diversity core-metrics \
  --i-table $data_classified/table-filtered.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --p-sampling-depth 3000 \
  --output-dir $data_alpha/core-metrics-results

  import pkg_resources
[32mSaved FeatureTable[Frequency] to: Data/diversity/core-metrics-results/rarefied_table.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/observed_features_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/shannon_vector.qza[0m
[32mSaved SampleData[AlphaDiversity] to: Data/diversity/core-metrics-results/evenness_vector.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/jaccard_distance_matrix.qza[0m
[32mSaved DistanceMatrix to: Data/diversity/core-metrics-results/bray_curtis_distance_matrix.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/jaccard_pcoa_results.qza[0m
[32mSaved PCoAResults to: Data/diversity/core-metrics-results/bray_curtis_pcoa_results.qza[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/jaccard_emperor.qzv[0m
[32mSaved Visualization to: Data/diversity/core-metrics-results/bray_curtis_emperor.qzv

### 4) Test data for normal distribution (parametric testing) ###

In [7]:
#hier stehen geblieben!! Testen, ob normally distributed!
from scipy.stats import shapiro

shannon_path = f"{data_alpha}/core-metrics-results/shannon_vector.qza"
shannon = q2.Artifact.load(shannon_path).view(pd.Series)

stat, p = shapiro(shannon)
print('Statistics=%.3f, p=%.3f' % (stat, p))
if p > 0.05:
    print("Daten wahrscheinlich normalverteilt")
else:
    print("Daten wahrscheinlich nicht normalverteilt")

  import pkg_resources


Statistics=0.934, p=0.000
Daten wahrscheinlich nicht normalverteilt


  df[cols] = df[cols].apply(pd.to_numeric, errors='ignore')


### 5) Kruskal-Wallis: Non-parametric testing ###

### 5.1) Kruskal-Wallis for the metadata ITS: ###

In [20]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/core-metrics-results/shannon-group-significance.qzv[0m
[0m[?25h

In [10]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance.qzv")

### 5.2) Kruskal-Wallis for the metadata personal environment: ###

**But first, merge the two metadata files**

In [67]:
# ITS-Metadaten (Hauptquelle für IDs)
its = pd.read_csv("Data/raw/20250913_metadata_ITS.tsv", sep="\t")

# Sensory-Metadaten
sensory = pd.read_csv("Data/raw/20250914_metadata_personal_environmental_sensory_details.tsv", sep="\t")

# Merge: alle ITS-Samples behalten, Sensory optional
merged = its.merge(
    sensory,
    left_on='person-id',
    right_on='sample-id',
    how='left'
)


# Spalte 'sample ID' an den Anfang und 'person-id' entfernen
cols = ['sample ID'] + [c for c in merged.columns if c not in ['sample ID', 'person-id', 'sample-id']]
merged = merged[cols]

# Umbenennen für QIIME
merged.rename(columns={'sample ID': '#SampleID'}, inplace=True)

# Speichern
merged.to_csv("Data/raw/merged_output.tsv", sep="\t", index=False)


left_on='person-id' → Wir nehmen die IDs aus der ITS-Tabelle.
right_on='sample-id' → Wir vergleichen sie mit den IDs aus der Sensory-Tabelle.
how='left' → Wir behalten alle ITS-Samples, auch wenn es keinen passenden Eintrag in Sensory gibt.
Ergebnis: Eine neue Tabelle (merged), die alle ITS-Samples enthält, und Sensory-Daten dort hinzufügt, wo sie verfügbar sind.

Wichtiger Punkt:

Wenn ein Sample keine Sensory-Daten hat, sind die entsprechenden Spalten NaN (leer).

In [68]:
! qiime diversity alpha-group-significance \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/merged_output.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-environment.qzv

  import pkg_resources
[32mSaved Visualization to: Data/diversity/core-metrics-results/shannon-group-significance-environment.qzv[0m
[0m[?25h

In [69]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-environment.qzv")

### 6) Alpha Correlation ###

### 6.1) Alpha Correlation for the metadata ITS ###

In [None]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/20250913_metadata_ITS.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-numeric.qzv

In [61]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-numeric.qzv")

### ???????

### 6.1) Alpha Correlation for the metadata personal environment: ###

In [75]:
! qiime diversity alpha-correlation \
  --i-alpha-diversity $data_alpha/core-metrics-results/shannon_vector.qza \
  --m-metadata-file $data_raw/merged_output.tsv \
  --o-visualization $data_alpha/core-metrics-results/shannon-group-significance-numeric-environment.qzv

  import pkg_resources
[31m[1mPlugin error from diversity:

  The following IDs are not present in the metadata: '366291_001-LP4-ITS-0001', '366291_002-LP4-ITS-0002', '366291_003-LP4-ITS-0003', '366291_004-LP4-ITS-0004', '366291_005-LP4-ITS-0005', '366291_006-LP4-ITS-0006', '366291_007-LP4-ITS-0007', '366291_008-LP4-ITS-0008', '366291_009-LP4-ITS-0009', '366291_010-LP4-ITS-0010', '366291_011-LP4-ITS-0011', '366291_012-LP4-ITS-0012', '366291_013-LP4-ITS-0013', '366291_014-LP4-ITS-0014', '366291_015-LP4-ITS-0015', '366291_016-LP4-ITS-0016', '366291_017-LP4-ITS-0017', '366291_018-LP4-ITS-0018', '366291_019-LP4-ITS-0019', '366291_020-LP4-ITS-0020', '366291_021-LP4-ITS-0021', '366291_022-LP4-ITS-0022', '366291_023-LP4-ITS-0023', '366291_024-LP4-ITS-0024', '366291_025-LP4-ITS-0025', '366291_026-LP4-ITS-0026', '366291_027-LP4-ITS-0027', '366291_028-LP4-ITS-0028', '366291_029-LP4-ITS-0029', '366291_030-LP4-ITS-0030', '366291_031-LP4-ITS-0031', '366291_032-LP4-ITS-0032', '366291_033-LP4-ITS-0

In [73]:
Visualization.load(f"{data_alpha}/core-metrics-results/shannon-group-significance-numeric-environment.qzv")

ValueError: Data/diversity/core-metrics-results/shannon-group-significance-numeric-environment.qzv does not exist.

***to do: look at csv between pariwise groups of Kruskal-Wallis in pandas***