In [1]:
from sklearn.cluster import SpectralClustering
import numpy as np
import os

In [2]:
class_names = [ 'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia',
                'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia', 'Normal']

from collections import defaultdict

def group_classes(category_array):
    """
    Groups class names based on their corresponding category values in the category_array.    
    :param class_names: List of class names
    :param category_array: Array of category values
    :return: Dictionary with category labels as keys and corresponding class names as values
    """
    if len(class_names) != len(category_array):
        raise ValueError("class_names and category_array must have the same length")
    
    groups = defaultdict(list)
    cat_arr = []
    
    for idx, category in enumerate(category_array):
        groups[f"Category_{category}"].append(idx)
        cat_arr.append(category)

    for category, attributes in groups.items():
        print(f"{category}: {attributes}")

    return cat_arr
           

def group_classes_by_category(class_names, category_array):
    """
    Groups class names based on their corresponding category values in the category_array.
    
    :param class_names: List of class names
    :param category_array: Array of category values
    :return: Dictionary with category labels as keys and corresponding class names as values
    """
    if len(class_names) != len(category_array):
        raise ValueError("class_names and category_array must have the same length")
    
    groups = defaultdict(list)
    
    for name, category in zip(class_names, category_array):
        groups[f"Category_{category}"].append(name)
    
    return dict(groups)


In [10]:
dirname = "/data/mariammaa/nih_multi_label/nih_results_manipulate_saliencymap/"
filename = "rho_score_nihfull_full_resnext-lr:0.01-wd:0.0_generous-sea-6.csv"
X = np.genfromtxt(os.path.join(dirname, filename), delimiter=',')



In [11]:
clustering = SpectralClustering(n_clusters=2,
        assign_labels='discretize',
        random_state=0).fit(X)
print(clustering.labels_)
cat_array = group_classes(clustering.labels_)

category_array = cat_array

class_groups = group_classes_by_category(class_names, category_array)
for category, attributes in class_groups.items():
    print(f"{category}: {attributes}")


[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
Category_0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Category_1: [14]
Category_0: ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass', 'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
Category_1: ['Normal']




In [12]:
clustering = SpectralClustering(n_clusters=3,
        assign_labels='discretize',
        random_state=0).fit(X)
print(clustering.labels_)
cat_array = group_classes(clustering.labels_)

category_array = cat_array

class_groups = group_classes_by_category(class_names, category_array)
for category, attributes in class_groups.items():
    print(f"{category}: {attributes}")


[0 0 0 0 2 2 0 2 0 0 2 2 2 2 1]
Category_0: [0, 1, 2, 3, 6, 8, 9]
Category_2: [4, 5, 7, 10, 11, 12, 13]
Category_1: [14]
Category_0: ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Pneumonia', 'Consolidation', 'Edema']
Category_2: ['Mass', 'Nodule', 'Pneumothorax', 'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia']
Category_1: ['Normal']




In [13]:
clustering = SpectralClustering(n_clusters=4,
        assign_labels='discretize',
        random_state=0).fit(X)
print(clustering.labels_)
cat_array = group_classes(clustering.labels_)

category_array = cat_array

class_groups = group_classes_by_category(class_names, category_array)
for category, attributes in class_groups.items():
    print(f"{category}: {attributes}")


[0 0 0 0 2 2 0 3 2 0 3 3 2 3 1]
Category_0: [0, 1, 2, 3, 6, 9]
Category_2: [4, 5, 8, 12]
Category_3: [7, 10, 11, 13]
Category_1: [14]
Category_0: ['Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Pneumonia', 'Edema']
Category_2: ['Mass', 'Nodule', 'Consolidation', 'Pleural_Thickening']
Category_3: ['Pneumothorax', 'Emphysema', 'Fibrosis', 'Hernia']
Category_1: ['Normal']




In [14]:
clustering = SpectralClustering(n_clusters=5,
        assign_labels='discretize',
        random_state=0).fit(X)
print(clustering.labels_)
cat_array = group_classes(clustering.labels_)

category_array = cat_array

class_groups = group_classes_by_category(class_names, category_array)
for category, attributes in class_groups.items():
    print(f"{category}: {attributes}")


[2 0 2 2 3 3 0 4 2 0 4 4 3 4 1]
Category_2: [0, 2, 3, 8]
Category_0: [1, 6, 9]
Category_3: [4, 5, 12]
Category_4: [7, 10, 11, 13]
Category_1: [14]
Category_2: ['Atelectasis', 'Effusion', 'Infiltration', 'Consolidation']
Category_0: ['Cardiomegaly', 'Pneumonia', 'Edema']
Category_3: ['Mass', 'Nodule', 'Pleural_Thickening']
Category_4: ['Pneumothorax', 'Emphysema', 'Fibrosis', 'Hernia']
Category_1: ['Normal']


