In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import sys
import os
from pathlib import Path

# Add project root to path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.append(project_root)

# Local imports
from api.utils.duckdb_utils import DuckDBHelper
from src.helpers import initialize_logging
from src.get_prototypes import run_clustering_pipeline, Reducer

# Configure plotting
plt.style.use('default')
sns.set_palette("husl")
%matplotlib inline

# Initialize logging
logger = initialize_logging("training_input_analysis")

print("✅ Libraries imported successfully")
print(f"📁 Project root: {project_root}")


2025-08-19 10:20:40,110 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/duckdb_utils.log
2025-08-19 10:20:40,110 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/duckdb_utils.log
2025-08-19 10:20:49,414 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/get_prototypes.log
2025-08-19 10:20:49,414 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/get_prototypes.log
2025-08-19 10:20:49,414 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/get_prototypes.log
2025-08-19 10:20:50,300 - src.helpers - INFO - Logging initialized for /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025/logs/training_input_analysis.log
2025-08-19 10:20:50,300 - src.helpers - INFO - Logging initialized for /Users

✅ Libraries imported successfully
📁 Project root: /Users/taishajoseph/Documents/Projects/MDC-Challenge-2025


In [2]:
# Default paths and configurations
DEFAULT_DUCKDB_PATH = "../artifacts/mdc_challenge.db"
DEFAULT_CHROMA_CONFIG = "configs/chunking.yaml"
DEFAULT_COLLECTION_NAME = "dataset-aggregates-train"
DEFAULT_FEATURE_CLUSTERS_PATH = "reports/clustering/dataset_clusters.json"

# UMAP parameters
DEFAULT_N_NEIGHBORS = 15
DEFAULT_MIN_DIST = 0.1
DEFAULT_N_COMPONENTS = 2
DEFAULT_RANDOM_SEED = 42

# Clustering parameters
DEFAULT_K_NEIGHBORS = 10
DEFAULT_SIMILARITY_THRESHOLD = None
DEFAULT_THRESHOLD_METHOD = "degree_target"
DEFAULT_RESOLUTION = 1.5
DEFAULT_MIN_CLUSTER_SIZE = 5
DEFAULT_MAX_CLUSTER_SIZE = 9999
DEFAULT_SPLIT_FACTOR = 1.3
DEFAULT_RANDOM_SEED = 42
DEFAULT_TARGET_N = 100
DEFAULT_TOL = 2

In [3]:
# Step 1: Load embeddings from ChromaDB
print("🔄 Loading embeddings from ChromaDB...")

# Initialize DuckDB helper
from chromadb import PersistentClient
client = PersistentClient(path=os.path.join(project_root, "local_chroma"))
print(client.list_collections())


🔄 Loading embeddings from ChromaDB...


2025-08-19 10:20:56,912 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-08-19 10:20:56,912 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-08-19 10:20:56,912 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2025-08-19 10:20:56,912 - chromadb.telemetry.product.posthog - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


[Collection(name=mdc_training_data), Collection(name=dataset-aggregates-train)]


In [4]:
collection = client.get_collection(DEFAULT_COLLECTION_NAME)
dat = collection.get(include=["embeddings"])
dat

{'ids': ['https://doi.org/10.5061/dryad.4dj6042',
  'https://doi.org/10.5061/dryad.r7sqv9s8n',
  'https://doi.org/10.11583/dtu.20555586',
  'https://doi.org/10.11583/dtu.20555586.v3',
  'https://doi.org/10.5061/dryad.8153g',
  'https://doi.org/10.5281/zenodo.8014149',
  'https://doi.org/10.5281/zenodo.8014150',
  'ENSOARG00000003950',
  'ENSOARG00000012128',
  'ENSOARG00000012835',
  'ENSOARG00000013782',
  'ENSOARG00000013966',
  'ENSOARG00000014129',
  'IPR000264',
  'IPR002172',
  'IPR014760',
  'IPR020857',
  'IPR020858',
  'IPR021177',
  'IPR023415',
  'ENSBTAG00000011038',
  'ENSBTAG00000013718',
  'ENSBTAG00000017121',
  'ENSBTAG00000017131',
  'ENSBTAG00000021275',
  'ENSBTAG00000047833',
  'NM_001078656',
  'https://doi.org/10.15468/dl.354f8k',
  'https://doi.org/10.15468/dl.nbku3v',
  'https://doi.org/10.15468/dl.pdjqte',
  'https://doi.org/10.15468/dl.uejpg6',
  'https://doi.org/10.17862/cranfield.rd.19146182',
  'https://doi.org/10.17862/cranfield.rd.19146182.v1',
  '3.10.1

In [5]:
# Convert to DataFrame
df = pd.DataFrame(dat["embeddings"])
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
0,-0.017331,-0.003289,0.007024,-0.003306,-0.015948,0.029291,0.036715,0.035468,-0.001300,-0.046639,...,0.001059,0.043030,-0.053122,-0.065123,-0.001324,0.056817,-0.013290,-0.043261,0.097732,0.100580
1,-0.043640,-0.000003,0.034032,0.023671,0.092774,-0.053444,-0.056502,0.008318,-0.013348,0.008250,...,0.004956,-0.022488,-0.000035,-0.050601,-0.009106,0.036639,-0.024997,-0.035046,0.015330,-0.036408
2,-0.008052,0.095518,0.013060,-0.051372,0.059878,0.036095,-0.028284,0.019578,-0.041390,0.037182,...,-0.031841,0.058538,-0.017772,0.042047,0.015369,-0.025719,0.020821,-0.032335,0.059899,0.068338
3,-0.008052,0.095518,0.013060,-0.051372,0.059878,0.036095,-0.028284,0.019578,-0.041390,0.037182,...,-0.031841,0.058538,-0.017772,0.042047,0.015369,-0.025719,0.020821,-0.032335,0.059899,0.068338
4,-0.048955,0.002556,-0.031699,0.001135,0.054505,0.035323,-0.017979,0.003956,-0.019208,0.023924,...,0.003554,0.009065,-0.043255,-0.020902,-0.032191,0.020013,-0.020206,-0.039565,0.081096,0.006233
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
687,-0.065858,-0.054596,0.001030,0.017521,0.025454,0.020686,0.022257,0.005512,0.024903,-0.014037,...,0.025834,-0.005543,-0.040120,0.014040,-0.015782,-0.025307,0.003425,0.014606,0.090468,-0.025789
688,-0.087525,-0.028583,-0.024390,-0.053067,0.009462,-0.001504,0.024789,0.024796,-0.033924,-0.023509,...,0.003505,0.004181,0.009770,-0.014121,-0.047272,-0.000210,0.055508,-0.008958,0.104666,-0.014266
689,-0.087525,-0.028583,-0.024390,-0.053067,0.009462,-0.001504,0.024789,0.024796,-0.033924,-0.023509,...,0.003505,0.004181,0.009770,-0.014121,-0.047272,-0.000210,0.055508,-0.008958,0.104666,-0.014266
690,-0.087525,-0.028583,-0.024390,-0.053067,0.009462,-0.001504,0.024789,0.024796,-0.033924,-0.023509,...,0.003505,0.004181,0.009770,-0.014121,-0.047272,-0.000210,0.055508,-0.008958,0.104666,-0.014266


In [6]:
df.to_numpy

<bound method DataFrame.to_numpy of           0         1         2         3         4         5         6    \
0   -0.017331 -0.003289  0.007024 -0.003306 -0.015948  0.029291  0.036715   
1   -0.043640 -0.000003  0.034032  0.023671  0.092774 -0.053444 -0.056502   
2   -0.008052  0.095518  0.013060 -0.051372  0.059878  0.036095 -0.028284   
3   -0.008052  0.095518  0.013060 -0.051372  0.059878  0.036095 -0.028284   
4   -0.048955  0.002556 -0.031699  0.001135  0.054505  0.035323 -0.017979   
..        ...       ...       ...       ...       ...       ...       ...   
687 -0.065858 -0.054596  0.001030  0.017521  0.025454  0.020686  0.022257   
688 -0.087525 -0.028583 -0.024390 -0.053067  0.009462 -0.001504  0.024789   
689 -0.087525 -0.028583 -0.024390 -0.053067  0.009462 -0.001504  0.024789   
690 -0.087525 -0.028583 -0.024390 -0.053067  0.009462 -0.001504  0.024789   
691  0.025712  0.039187 -0.002140  0.048915  0.027948  0.040256  0.031754   

          7         8         9    ... 

In [7]:
dataset_cluster_map = run_clustering_pipeline(
    dataset_embeddings=df.to_numpy(),
    feature_names=dat["ids"],
    k_neighbors=DEFAULT_K_NEIGHBORS,
    similarity_threshold=DEFAULT_SIMILARITY_THRESHOLD,
    threshold_method=DEFAULT_THRESHOLD_METHOD,
    # resolution=DEFAULT_RESOLUTION,
    min_cluster_size=DEFAULT_MIN_CLUSTER_SIZE,
    max_cluster_size=DEFAULT_MAX_CLUSTER_SIZE,
    split_factor=DEFAULT_SPLIT_FACTOR,
    random_seed=DEFAULT_RANDOM_SEED,
    target_n=DEFAULT_TARGET_N,
    tol=DEFAULT_TOL,
    output_dir="reports/clustering"
)
dataset_cluster_map


2025-08-19 10:20:58,238 - src.helpers - INFO - Running feature clustering pipeline on (692, 384) matrix
2025-08-19 10:20:58,238 - src.helpers - INFO - Running feature clustering pipeline on (692, 384) matrix
2025-08-19 10:20:58,238 - src.helpers - INFO - Running feature clustering pipeline on (692, 384) matrix
2025-08-19 10:20:58,238 - src.helpers - INFO - Running feature clustering pipeline on (692, 384) matrix
2025-08-19 10:20:58,246 - src.helpers - INFO - Building k-NN graph for 692 features
2025-08-19 10:20:58,246 - src.helpers - INFO - Building k-NN graph for 692 features
2025-08-19 10:20:58,246 - src.helpers - INFO - Building k-NN graph for 692 features
2025-08-19 10:20:58,246 - src.helpers - INFO - Building k-NN graph for 692 features
2025-08-19 10:20:58,256 - src.helpers - INFO - Computing 10-NN with sklearn
2025-08-19 10:20:58,256 - src.helpers - INFO - Computing 10-NN with sklearn
2025-08-19 10:20:58,256 - src.helpers - INFO - Computing 10-NN with sklearn
2025-08-19 10:20:58,

Executing run_clustering_pipeline...
Executing build_knn_similarity_graph...
Executing determine_similarity_threshold...
Function determine_similarity_threshold took 0.0118 seconds to complete.
Function build_knn_similarity_graph took 0.1408 seconds to complete.
Executing find_resolution_for_target...
Executing run_leiden_clustering...


2025-08-19 10:20:58,438 - src.helpers - INFO - Final: 33 clusters (min=9 │ median=18 │ max=42)
2025-08-19 10:20:58,438 - src.helpers - INFO - Final: 33 clusters (min=9 │ median=18 │ max=42)
2025-08-19 10:20:58,438 - src.helpers - INFO - Final: 33 clusters (min=9 │ median=18 │ max=42)
2025-08-19 10:20:58,446 - src.helpers - INFO - [res search] step 00  γ=4.250 → 33 clusters
2025-08-19 10:20:58,446 - src.helpers - INFO - [res search] step 00  γ=4.250 → 33 clusters
2025-08-19 10:20:58,446 - src.helpers - INFO - [res search] step 00  γ=4.250 → 33 clusters
2025-08-19 10:20:58,446 - src.helpers - INFO - [res search] step 00  γ=4.250 → 33 clusters
2025-08-19 10:20:58,477 - src.helpers - INFO - Initial Leiden: 37 clusters; modularity=0.8228
2025-08-19 10:20:58,477 - src.helpers - INFO - Initial Leiden: 37 clusters; modularity=0.8228
2025-08-19 10:20:58,477 - src.helpers - INFO - Initial Leiden: 37 clusters; modularity=0.8228
2025-08-19 10:20:58,477 - src.helpers - INFO - Initial Leiden: 37 clu

Function run_leiden_clustering took 0.0585 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.0583 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.0607 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.0483 seconds to complete.
Executing run_leiden_clustering...


2025-08-19 10:20:58,671 - src.helpers - INFO - Initial Leiden: 42 clusters; modularity=0.8063
2025-08-19 10:20:58,671 - src.helpers - INFO - Initial Leiden: 42 clusters; modularity=0.8063
2025-08-19 10:20:58,671 - src.helpers - INFO - Initial Leiden: 42 clusters; modularity=0.8063
2025-08-19 10:20:58,671 - src.helpers - INFO - Initial Leiden: 42 clusters; modularity=0.8063
2025-08-19 10:20:58,683 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,683 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,683 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,683 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,750 - src.helpers - INFO - Final: 42 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:58,750 - src.helpers - INFO - Final: 42 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:58,750 - src.helpers - INFO - Final: 42 clusters (min=7 │ median=15 │

Function run_leiden_clustering took 0.1234 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.1431 seconds to complete.
Executing run_leiden_clustering...


2025-08-19 10:20:58,970 - src.helpers - INFO - Initial Leiden: 43 clusters; modularity=0.8024
2025-08-19 10:20:58,978 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,978 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,978 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,978 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:58,992 - src.helpers - INFO - Final: 43 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:58,992 - src.helpers - INFO - Final: 43 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:58,992 - src.helpers - INFO - Final: 43 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:58,992 - src.helpers - INFO - Final: 43 clusters (min=7 │ median=15 │ max=31)
2025-08-19 10:20:59,002 - src.helpers - INFO - [res search] step 06  γ=7.941 → 43 clusters
2025-08-19 10:20:59,002 - src.helpers - INFO - [res search] step 06  γ=7.941 → 43 clust

Function run_leiden_clustering took 0.0566 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.0624 seconds to complete.
Executing run_leiden_clustering...
Function run_leiden_clustering took 0.1039 seconds to complete.
Executing run_leiden_clustering...


2025-08-19 10:20:59,232 - src.helpers - INFO - Initial Leiden: 44 clusters; modularity=0.8012
2025-08-19 10:20:59,232 - src.helpers - INFO - Initial Leiden: 44 clusters; modularity=0.8012
2025-08-19 10:20:59,232 - src.helpers - INFO - Initial Leiden: 44 clusters; modularity=0.8012
2025-08-19 10:20:59,232 - src.helpers - INFO - Initial Leiden: 44 clusters; modularity=0.8012
2025-08-19 10:20:59,247 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:59,247 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:59,247 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:59,247 - src.helpers - INFO - Cycle detected; stopping at iteration 2
2025-08-19 10:20:59,265 - src.helpers - INFO - Final: 44 clusters (min=5 │ median=15 │ max=31)
2025-08-19 10:20:59,265 - src.helpers - INFO - Final: 44 clusters (min=5 │ median=15 │ max=31)
2025-08-19 10:20:59,265 - src.helpers - INFO - Final: 44 clusters (min=5 │ median=15 │

Function run_leiden_clustering took 0.0766 seconds to complete.
Function find_resolution_for_target took 0.9203 seconds to complete.
Executing export_feature_clusters...
Function export_feature_clusters took 0.0329 seconds to complete.
Executing export_clustering_report...
Function export_clustering_report took 0.0132 seconds to complete.
Function run_clustering_pipeline took 1.1405 seconds to complete.


{'https://doi.org/10.5061/dryad.4dj6042': 'cluster_3',
 'https://doi.org/10.5061/dryad.r7sqv9s8n': 'cluster_33',
 'https://doi.org/10.11583/dtu.20555586': 'cluster_7',
 'https://doi.org/10.11583/dtu.20555586.v3': 'cluster_7',
 'https://doi.org/10.5061/dryad.8153g': 'cluster_15',
 'https://doi.org/10.5281/zenodo.8014149': 'cluster_29',
 'https://doi.org/10.5281/zenodo.8014150': 'cluster_7',
 'ENSOARG00000003950': 'cluster_29',
 'ENSOARG00000012128': 'cluster_21',
 'ENSOARG00000012835': 'cluster_29',
 'ENSOARG00000013782': 'cluster_27',
 'ENSOARG00000013966': 'cluster_27',
 'ENSOARG00000014129': 'cluster_27',
 'IPR000264': 'cluster_27',
 'IPR002172': 'cluster_27',
 'IPR014760': 'cluster_27',
 'IPR020857': 'cluster_10',
 'IPR020858': 'cluster_27',
 'IPR021177': 'cluster_10',
 'IPR023415': 'cluster_10',
 'ENSBTAG00000011038': 'cluster_27',
 'ENSBTAG00000013718': 'cluster_27',
 'ENSBTAG00000017121': 'cluster_27',
 'ENSBTAG00000017131': 'cluster_27',
 'ENSBTAG00000021275': 'cluster_27',
 'EN

In [8]:
# initialize reducer
pca = Reducer(
        collection_name=DEFAULT_COLLECTION_NAME,
        cfg_path=DEFAULT_CHROMA_CONFIG,
        db_path=DEFAULT_DUCKDB_PATH,
        n_neighbors=DEFAULT_N_NEIGHBORS,
        min_dist=DEFAULT_MIN_DIST,
        n_components=DEFAULT_N_COMPONENTS,
        random_seed=DEFAULT_RANDOM_SEED
)

2025-08-17 12:36:48,109 - api.database.duckdb_schema - INFO - Starting DuckDB schema creation...
2025-08-17 12:36:48,109 - api.database.duckdb_schema - INFO - Starting DuckDB schema creation...
2025-08-17 12:36:48,109 - api.database.duckdb_schema - INFO - Starting DuckDB schema creation...
2025-08-17 12:36:48,109 - api.database.duckdb_schema - INFO - Starting DuckDB schema creation...
2025-08-17 12:36:48,149 - api.database.duckdb_schema - INFO - Creating documents table...
2025-08-17 12:36:48,149 - api.database.duckdb_schema - INFO - Creating documents table...
2025-08-17 12:36:48,149 - api.database.duckdb_schema - INFO - Creating documents table...
2025-08-17 12:36:48,149 - api.database.duckdb_schema - INFO - Creating documents table...
2025-08-17 12:36:48,158 - api.database.duckdb_schema - INFO - Documents table created successfully
2025-08-17 12:36:48,158 - api.database.duckdb_schema - INFO - Documents table created successfully
2025-08-17 12:36:48,158 - api.database.duckdb_schema -

In [9]:
# dat["ids"]

In [10]:
results = pca.run_per_cluster_pca(df.to_numpy(), dataset_cluster_map, dat["ids"])
results

2025-08-17 12:36:48,416 - src.helpers - INFO - 🔄 Running feature-cluster PCA...
2025-08-17 12:36:48,416 - src.helpers - INFO - 🔄 Running feature-cluster PCA...
2025-08-17 12:36:48,416 - src.helpers - INFO - 🔄 Running feature-cluster PCA...
2025-08-17 12:36:48,416 - src.helpers - INFO - 🔄 Running feature-cluster PCA...
2025-08-17 12:36:48,425 - src.helpers - INFO - Loading datasets from DuckDB...
2025-08-17 12:36:48,425 - src.helpers - INFO - Loading datasets from DuckDB...
2025-08-17 12:36:48,425 - src.helpers - INFO - Loading datasets from DuckDB...
2025-08-17 12:36:48,425 - src.helpers - INFO - Loading datasets from DuckDB...
2025-08-17 12:36:48,546 - src.helpers - INFO - Retrieved 692 datasets from database
2025-08-17 12:36:48,546 - src.helpers - INFO - Retrieved 692 datasets from database
2025-08-17 12:36:48,546 - src.helpers - INFO - Retrieved 692 datasets from database
2025-08-17 12:36:48,546 - src.helpers - INFO - Retrieved 692 datasets from database
2025-08-17 12:36:48,551 - sr

Executing run_per_cluster_pca...


2025-08-17 12:36:48,606 - src.helpers - INFO - cluster_66: PC1 var ratio = 0.9546
2025-08-17 12:36:48,600 - src.helpers - INFO - cluster_26: PC1 var ratio = 0.7884
2025-08-17 12:36:48,587 - src.helpers - INFO - cluster_21: PC1 var ratio = 0.7994
2025-08-17 12:36:48,594 - src.helpers - INFO - cluster_56: PC1 var ratio = 0.8289
2025-08-17 12:36:48,594 - src.helpers - INFO - cluster_56: PC1 var ratio = 0.8289
2025-08-17 12:36:48,614 - src.helpers - INFO - cluster_67: PC1 var ratio = 1.0000
2025-08-17 12:36:48,606 - src.helpers - INFO - cluster_66: PC1 var ratio = 0.9546
2025-08-17 12:36:48,600 - src.helpers - INFO - cluster_26: PC1 var ratio = 0.7884
2025-08-17 12:36:48,606 - src.helpers - INFO - cluster_66: PC1 var ratio = 0.9546
2025-08-17 12:36:48,600 - src.helpers - INFO - cluster_26: PC1 var ratio = 0.7884
2025-08-17 12:36:48,614 - src.helpers - INFO - cluster_67: PC1 var ratio = 1.0000
2025-08-17 12:36:48,614 - src.helpers - INFO - cluster_46: PC1 var ratio = 1.0000
2025-08-17 12:36

Function run_per_cluster_pca took 1.2333 seconds to complete.


True

In [12]:
import pickle

with open("../artifacts/models_20250814_1652/prototypes.pkl", "rb") as f:
    prototypes = pickle.load(f)

prototypes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,374,375,376,377,378,379,380,381,382,383
cluster_0,-0.181621,-0.232658,-0.296897,-0.029748,0.224288,0.133283,0.423142,0.090797,-0.002589,-0.052888,...,-0.069480,0.231679,0.036610,-0.201909,0.087907,0.503794,0.074356,-0.173186,0.173881,0.174883
cluster_1,-0.227442,-0.172776,-0.213857,-0.045653,0.187073,-0.321099,-0.144781,0.201479,-0.091904,0.033552,...,-0.092440,0.063449,-0.212436,-0.187928,-0.126429,0.121726,-0.101457,-0.006310,0.256800,0.108787
cluster_10,0.034032,-0.130928,0.071096,-0.163160,0.490804,0.043438,-0.157107,0.024211,0.195754,0.110804,...,-0.031022,0.261265,-0.055955,-0.217523,-0.234507,0.137448,-0.216561,-0.088510,0.085535,0.021107
cluster_11,-0.283475,-0.100257,-0.182065,-0.235532,0.071826,-0.029336,0.345193,0.055378,-0.035396,-0.166141,...,0.064649,0.106232,-0.232219,-0.053746,0.009778,-0.050773,-0.050919,-0.227342,0.155494,0.015169
cluster_12,-0.341272,-0.192031,-0.141555,0.038234,-0.158584,-0.069578,0.234931,0.108887,-0.075545,-0.199041,...,-0.041863,0.247351,-0.388903,-0.020788,-0.110720,0.046505,0.144547,-0.415289,0.405223,0.246274
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
cluster_75,-0.133517,-0.060325,-0.028585,-0.037454,0.045671,-0.051599,-0.038659,0.058219,-0.083000,0.036892,...,0.032675,-0.023107,-0.076571,0.004528,-0.019251,0.016620,-0.024382,-0.109960,0.192415,0.014495
cluster_76,-0.065351,-0.028557,0.023636,0.012569,0.130888,0.003051,-0.004010,0.059031,-0.045567,0.013411,...,0.101525,-0.042894,-0.021473,-0.019469,-0.017947,0.039516,-0.053599,-0.061169,0.010300,0.088806
cluster_77,0.011106,-0.049103,-0.066851,0.001985,0.098039,-0.059022,0.001528,0.122797,0.050994,-0.074510,...,-0.017738,-0.040451,0.130467,-0.070377,-0.058316,0.025985,0.013508,-0.119283,0.147537,0.009916
cluster_8,-0.095542,-0.058941,0.056315,0.090701,0.040170,0.127071,0.115694,0.139067,-0.020962,-0.027743,...,-0.036184,0.024980,-0.080174,-0.033710,-0.109864,0.106272,-0.065363,-0.183911,0.258223,0.147318
