**UMAP**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Ellipse
import umap

# input data
train_data_path = 'input_data.csv'
df_train = pd.read_csv(train_data_path)

# Assuming the second column is the labels and the rest are features
y_train = df_train.iloc[:, 0]   # Labels
X_train = df_train.iloc[:, 1:]  # Features

# Perform UMAP on the input data
umap_model = umap.UMAP(n_components=2)  # Reduce to 2 dimensions for visualization
X_umap_train = umap_model.fit_transform(X_train)

**LDA & NPMANOVA**

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np
from matplotlib.patches import Ellipse
from skbio.stats.distance import DistanceMatrix, permanova
from scipy.spatial import distance
from statsmodels.multivariate.manova import MANOVA
import matplotlib.cm as cm
from scipy.spatial.distance import pdist

# Load the input data
train_data_path = 'input_data.csv'
df_train = pd.read_csv(train_data_path)

# Assuming the first column is the labels and the rest are features
X_train = df_train.iloc[:, 1:]  # Features
y_train = df_train.iloc[:, 0]   # Labels

# Convert string labels to numeric values
label_map = {label: i for i, label in enumerate(set(y_train))}
y_train_numeric = [label_map[label] for label in y_train]

# Perform Linear Discriminant Analysis (LDA) on the training data
lda = LinearDiscriminantAnalysis(n_components=2)  # Reduce to 2 dimensions for visualization
X_lda_train = lda.fit_transform(X_train, y_train_numeric)

# Get pairwise distances between samples in the transformed space
pairwise_distances = distance.squareform(distance.pdist(X_lda_train))

# Create unique IDs for each sample
unique_ids = [f'Sample_{i}' for i in range(len(y_train))]

# Convert pairwise distances to DistanceMatrix object
dist_matrix = DistanceMatrix(pairwise_distances, ids=unique_ids)

# Get unique class labels
unique_labels = set(y_train)

# Define a colormap
cmap = cm.get_cmap('tab10')  # You can choose any colormap you prefer

# Create a dictionary to store class colors
class_colors = {label: cmap(i) for i, label in enumerate(unique_labels)}

# Perform NPMANOVA
permanova_results = permanova(dist_matrix, grouping=y_train_numeric)
print('NPMANOVA results:')
print(permanova_results)

# Extract p-value from PERMANOVA results
p_value = permanova_results['p-value']

# Perform Bonferroni correction
num_comparisons = len(unique_labels)  # Number of comparisons
adjusted_p_value = p_value * num_comparisons

# Print adjusted p-value
print('Adjusted p-value:', adjusted_p_value)

# Perform MANOVA
manova = MANOVA.from_formula('X_lda_train ~ C(y_train)', data=df_train)

# Print MANOVA results
print('MANOVA results:')
print(manova.mv_test())

**SPEARMAN'S CORRELATION**

In [None]:
import pandas as pd
from scipy.stats import spearmanr

# input data
data = pd.read_csv('input_data.csv')

# Extract PC and DL variables
PC_variables = data.iloc[:, 1:25]
DL_variables = data.iloc[:, 25:]

# Dictionary to store significant correlations
significant_correlations = {}

# Iterate through each PC variable
for pc_col in PC_variables.columns:
    # Iterate through each DL variable
    for dl_col in DL_variables.columns:
        # Calculate Spearman correlation coefficient
        correlation, p_value = spearmanr(data[pc_col], data[dl_col])
        # Check if correlation is significant and greater than 0.3
        if abs(correlation) >= 0.3 and p_value < 0.05:
            # Store the significant correlation
            significant_correlations[(pc_col, dl_col)] = correlation

# Print summary
print("Summary:")
print(f"Number of significant correlations (correlation >= 0.3): {len(significant_correlations)}")
print("Significant Correlations:")
for key, value in significant_correlations.items():
    print(f"{key[0]} with {key[1]}: Correlation = {value}")