# Comprehensive Analysis of Datasets with Various Metrics, Norms, and Methods

In [1]:
import sys
sys.path.append('../')

import numpy as np
from AnalysisTools import Ana

# Initialize the analysis object
analysis = Ana(showPlots=True, execution_mode='cpu', cacheStoragePath='/home/diego/disks/ANACACHE')

# Add datasets
analysis.add_dataset(label="IMR90SIM", folder="data/IMR90SIM")
analysis.add_dataset(label='IMR90OPT', folder='data/IMR90OPT')
analysis.add_dataset(label="IMR90OPT57", folder='data/IMR90OPT56')

# Process trajectories for each dataset
analysis.process_trajectories(label="IMR90SIM", filename="traj_chr_IMR90OPT_0.cndb", folder_pattern=['iteration_', [1, 20]])
analysis.process_trajectories(label="IMR90OPT", filename="traj_0.cndb", folder_pattern=['iteration_', [1, 20]])
analysis.process_trajectories(label="IMR90OPT57", filename="traj_0.cndb", folder_pattern=['iteration_', [1, 20]])

  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'cuml'

## Generate and Cache Distance Matrices

In [None]:
norms = ['ice', 'kr', 'log_transform', 'vc']
metrics = ['euclidean', 'pearsons', 'spearman', 'contact', 'log2_contact']
methods = ['single', 'complete', 'average', 'weighted']

for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.calc_XZ("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, norm=norm, method=method)

## Dimensionality Reduction Techniques

### PCA Analysis

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.pca("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, n_components=-1, norm=norm, method=method)

### UMAP Analysis

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.umap("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, num_clusters=-1, norm=norm, method=method)

### t-SNE Analysis

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.tsne("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, num_clusters=-1, norm=norm, method=method)

### MDS Analysis

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.mds("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, n_components=-1, norm=norm, method=method)

### SVD Analysis

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.svd("IMR90OPT", "IMR90OPT56", "IMR90SIM", metric=metric, n_components=-1, norm=norm, method=method)

## Clustering Techniques

### K-means Clustering

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.kmeans_clustering("IMR90OPT", "IMR90OPT56", "IMR90SIM", n_clusters=5, metric=metric, norm=norm, method=method)

### DBSCAN Clustering

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.dbscan_clustering("IMR90OPT", "IMR90OPT56", "IMR90SIM", eps=0.5, min_samples=5, metric=metric, norm=norm, method=method)

### Hierarchical Clustering

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.hierarchical_clustering("IMR90OPT", "IMR90OPT56", "IMR90SIM", n_clusters=5, metric=metric, norm=norm, method=method)

### Spectral Clustering

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.spectral_clustering("IMR90OPT", "IMR90OPT56", "IMR90SIM", num_clusters=-1, metric=metric, norm=norm, method=method)

### OPTICS Clustering

In [None]:
for norm in norms:
    for metric in metrics:
        for method in methods:
            analysis.optics_clustering("IMR90OPT", "IMR90OPT56", "IMR90SIM", min_samples=5, xi=0.05, min_cluster_size=0.05, metric=metric, norm=norm, method=method)