# Import libraries 

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import spearmanr

# Data

In [2]:
ASD_data = pd.read_csv("ASD_meta_abundance.csv")
OTU_data = pd.read_csv("GSE113690_Autism_16S_rRNA_OTU_assignment_and_abundance.csv")

Data Cleaning: Drop or impute missing values. Exclude non-numeric columns from median calculation

In [3]:
ASD_data_numeric = ASD_data.select_dtypes(include=[np.number])
ASD_data[ASD_data_numeric.columns] = ASD_data_numeric.fillna(ASD_data_numeric.median())

OTU_data_numeric = OTU_data.select_dtypes(include=[np.number])
OTU_data[OTU_data_numeric.columns] = OTU_data_numeric.fillna(OTU_data_numeric.median())

Align data by using common indices to ensure they have the same samples. We want to analyze the samples present in both tables.

In [4]:
common_indices = ASD_data_numeric.index.intersection(OTU_data_numeric.index)
ASD_data_aligned = ASD_data_numeric.loc[common_indices]
OTU_data_aligned = OTU_data_numeric.loc[common_indices]

# Correlation Analysis between OTU abundance and clinical features

We want to study the abundance of OTU and the clinical traits. Obtain the Spearman correlation, it is useful to measure the relation between two variables, more if they are not lineal.

In [5]:
correlation_results = []
for otu in OTU_data_aligned.columns:
    for feature in ASD_data_aligned.columns:
        corr, p_value = spearmanr(OTU_data_aligned[otu], ASD_data_aligned[feature])
        correlation_results.append((otu, feature, corr, p_value))

Convert results to DataFrame

In [6]:
correlation_df = pd.DataFrame(correlation_results, columns=['OTU', 'Clinical Feature', 'Spearman Correlation', 'P-value'])

Filter significant correlations (e.g., p-value < 0.05). Those below a p value of 0.05 are considere significant, observing which OTUs are significant correlated.

In [7]:
significant_correlations = correlation_df[correlation_df['P-value'] < 0.05]
print("Significant Correlations between OTU abundance and Clinical Features:")
print(significant_correlations)

Significant Correlations between OTU abundance and Clinical Features:
       OTU Clinical Feature  Spearman Correlation   P-value
3       A1               A9             -0.070453  0.010396
30      A1               B1             -0.055813  0.042460
32      A1               B3             -0.059806  0.029674
35      A1               B7             -0.054412  0.047932
41      A1              B36             -0.054955  0.045745
...    ...              ...                   ...       ...
15030  B59               B1             -0.054938  0.045812
15047  B59             B111             -0.065270  0.017622
15048  B59             B114             -0.055588  0.043300
15056  B59             B152             -0.056130  0.041297
15107   B6             B111             -0.056955  0.038400

[1914 rows x 4 columns]
