In [1]:
import os
import scipy.io as sio
import numpy as np
import pandas as pd

data_dir = "./"   # folder with all .mat files

for patient_id in range(1, 13):  # Patients 1â€“12
    mat_file = os.path.join(data_dir, f"cluster_dataPatients{patient_id}.mat")
    print(f"Loading: {mat_file}")
    
    data = sio.loadmat(mat_file)
    
    # Identify the cell array variable (ignore __metadata__)
    cell_key = [k for k in data.keys() if not k.startswith("__")][0]
    clusters = data[cell_key]  # (1,16) cell array

    out_dir = f"Patient{patient_id}"
    os.makedirs(out_dir, exist_ok=True)

    for idx in range(16):
        old_cluster = idx + 1
        
        # skip cluster 11
        if old_cluster == 11:
            continue
        
        # reindex clusters > 11
        new_cluster = old_cluster - 1 if old_cluster > 11 else old_cluster
        
        # extract MATLAB matrix from cell
        mat = clusters[0, idx]

        # Convert MATLAB object to numpy array properly
        mat = np.array(mat, dtype=object)

        # First column = gene names (string), others = numbers
        # Convert to DataFrame
        df = pd.DataFrame(mat)

        # Save CSV
        out_path = os.path.join(out_dir, f"cluster{new_cluster}.csv")
        df.to_csv(out_path, index=False, header=False)

        print(f"Saved: {out_path} (old cluster {old_cluster})")

print("MATLAB data to CSV: Done!")


Loading: ./cluster_dataPatients1.mat
Saved: Patient1/cluster1.csv (old cluster 1)
Saved: Patient1/cluster2.csv (old cluster 2)
Saved: Patient1/cluster3.csv (old cluster 3)
Saved: Patient1/cluster4.csv (old cluster 4)
Saved: Patient1/cluster5.csv (old cluster 5)
Saved: Patient1/cluster6.csv (old cluster 6)
Saved: Patient1/cluster7.csv (old cluster 7)
Saved: Patient1/cluster8.csv (old cluster 8)
Saved: Patient1/cluster9.csv (old cluster 9)
Saved: Patient1/cluster10.csv (old cluster 10)
Saved: Patient1/cluster11.csv (old cluster 12)
Saved: Patient1/cluster12.csv (old cluster 13)
Saved: Patient1/cluster13.csv (old cluster 14)
Saved: Patient1/cluster14.csv (old cluster 15)
Saved: Patient1/cluster15.csv (old cluster 16)
Loading: ./cluster_dataPatients2.mat
Saved: Patient2/cluster1.csv (old cluster 1)
Saved: Patient2/cluster2.csv (old cluster 2)
Saved: Patient2/cluster3.csv (old cluster 3)
Saved: Patient2/cluster4.csv (old cluster 4)
Saved: Patient2/cluster5.csv (old cluster 5)
Saved: Patient