In [1]:
import numpy as np
from spectral_clustering import spectral_clustering
import functions_for_plotting
from asymmetric_laplacian_distribution import get_index_per_class, get_labels, labels_to_layout_mapping
from sklearn.cluster import KMeans
import training_set_split
import seaborn as sns
import prediction_strength
import importlib
import matplotlib.pyplot as plt



# Load Data and True Labels

In [2]:
#----------------------------------------------- DATA ------------------------------------------------------------------
data_dir = "data/"

clear_data = np.load(data_dir + "clearly_separated_data_F_signal_noise.npy")
ambig_data = np.load(data_dir + "ambiguous_data_tau_amplitude_F_signal_noise.npy")
#np.load(data_dir + "ambiguous_data_tau_amplitude_F_signal_noise.npy") #np.load(data_dir + "clearly_separated_data_F_signal_noise.npy")

clear_amplitude_conditions = ["S", "M", "L"]  #["S", "S/M", "M", "M/L", "L"] #["S", "M", "L"]
ambig_amplitude_conditions = ["S", "S/M", "M", "M/L", "L"]

clear_time_constant_conditions = ["equal_sharp", "equal_wide", "wide_sharp_negative_skew", "sharp_wide_positive_skew"]
ambig_time_constant_conditions = ["equal_sharp", "equal_medium", "equal_wide", "wide_sharp_negative_skew", "wide_medium_negative_skew","medium_sharp_negative_skew","sharp_wide_positive_skew", "medium_wide_positive_skew" ,"sharp_medium_positive_skew"]

#["equal_sharp", "equal_medium", "equal_wide", "wide_sharp_negative_skew", "wide_medium_negative_skew","medium_sharp_negative_skew","sharp_wide_positive_skew", "medium_wide_positive_skew" ,"sharp_medium_positive_skew"]
#["equal_sharp", "equal_wide", "wide_sharp_negative_skew", "sharp_wide_positive_skew"]

ambiguous_conditions = ["S/M", "M/L", "equal_medium", "wide_medium_negative_skew", "medium_sharp_negative_skew", "medium_wide_positive_skew", "sharp_medium_positive_skew"]

samples_per_condition = 1000
samples_per_ambiguous_condition = 400

ambig_cluster_dict = get_index_per_class(ambig_amplitude_conditions,ambig_time_constant_conditions, ambiguous_conditions, samples_per_condition, samples_per_ambiguous_condition)
clear_cluster_dict = get_index_per_class(clear_amplitude_conditions,clear_time_constant_conditions, [], samples_per_condition, samples_per_ambiguous_condition)


clear_true_labels = get_labels(clear_data, clear_cluster_dict)
ambig_true_labels = get_labels(ambig_data, ambig_cluster_dict)

clear_clusters_ordered = list(range(0,len(clear_cluster_dict)+1))
clear_layout_label_mapping = labels_to_layout_mapping(clear_clusters_ordered, 4, (1,4)) #labels_to_layout_mapping(clusters_ordered, 4, (1,4)) #labels_to_layout_mapping(clusters_ordered, 9, (2,5))

ambig_clusters_ordered = list(range(0,len(ambig_cluster_dict)+1))
ambig_layout_label_mapping = labels_to_layout_mapping(ambig_clusters_ordered, 9, (2,5))

## Cluster Balanced Split

In [3]:
clear_train_fold_indices, _ = training_set_split.get_training_folds(clear_data, clear_cluster_dict,cluster_split="balanced",folds = 2)
ambig_train_fold_indices, _ = training_set_split.get_training_folds(ambig_data, ambig_cluster_dict,cluster_split="balanced",folds = 2)


clear_training_set = clear_data[clear_train_fold_indices[0]]
clear_validation_set = clear_data[clear_train_fold_indices[1]]

ambig_training_set = ambig_data[ambig_train_fold_indices[0]]
ambig_validation_set = ambig_data[ambig_train_fold_indices[1]]

clear_true_labels_training = clear_true_labels[clear_train_fold_indices[0]]
clear_true_labels_validation = clear_true_labels[clear_train_fold_indices[1]]

ambig_true_labels_training = ambig_true_labels[ambig_train_fold_indices[0]]
ambig_true_labels_validation = ambig_true_labels[ambig_train_fold_indices[1]]

## Spectral Clustering Configuration

In [37]:
k = 10
regs = [0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09]  
ambig_prediction_strength_dir = "Toy_data/Ambiguous/Ambiguous_Tau_Amplitude/Prediction_Strength/"

In [38]:
for reg in regs:
    labels_train, eigvec_train, eigval_train = spectral_clustering(ambig_training_set, "euclidean", range(1,50),  k=k, mutual = False, weighting = "distance", normalize = True, reg_lambda = reg, save_laplacian = False, save_eigenvalues_and_vectors = False)
    np.save(ambig_prediction_strength_dir + "labels/labels_k=%d_reg=%s_training" % (k, str(reg)),labels_train)
    np.save(ambig_prediction_strength_dir + "eigenvalues/eigval_k=%d_reg=%s_training" % (k, str(reg)),eigval_train)
    labels_valid, eigvec_valid, eigval_valid = spectral_clustering(ambig_validation_set, "euclidean", range(1,50),  k=k, mutual = False, weighting = "distance", normalize = True, reg_lambda = reg, save_laplacian = False, save_eigenvalues_and_vectors = False)
    np.save(ambig_prediction_strength_dir + "labels/labels_k=%d_reg=%s_validation" % (k, str(reg)),labels_valid)
    np.save(ambig_prediction_strength_dir + "eigenvalues/eigval_k=%d_reg=%s_validation" % (k, str(reg)),eigval_valid)
    

Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0200
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0200
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0300
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0300
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0400
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0400
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0500
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0500
Calculate Eigenvalues and Vectors of Laplacian
Calculate Normalized Laplacians
Apply regularization!
lamda = 0.0600
Cal

In [9]:
clusters_from_ambig_dataset, counts = np.unique(ambig_true_labels, return_counts = True)
clear_clusters_from_ambig = clusters_from_ambig_dataset[np.where(counts!= 400)]

In [11]:
list(clear_clusters_from_ambig)

[0.0, 2.0, 3.0, 6.0, 18.0, 20.0, 21.0, 24.0, 36.0, 38.0, 39.0, 42.0]

# Graph Analysis

In [12]:
from spectral_clustering import calculate_dist_matrix
from spectral_clustering import construct_knn_graph

In [28]:
dist_matrix, sorted_dist_matrix = calculate_dist_matrix(ambig_data, "euclidean")

In [29]:
A = construct_knn_graph(dist_matrix,sorted_dist_matrix,k=len(ambig_data)-1, mutual = False, weighting = "distance")

In [30]:
np.mean(A)

115.34996749952657

In [None]:
import igraph as ig
import cairocffi as cairo
g = ig.Graph.Famous("petersen")
ig.plot(g)

In [None]:
color_dict = {}
for i in range(10):
    color_dict[i] = "C%d" % i

color_dict[10] = "black"
color_dict[11] = "white"

In [None]:
g = ig.Graph.Adjacency((A > 0).tolist())

# Add edge weights and node labels.
g.es['weight'] = A[A.nonzero()]
#g.vs['color'] = [color_dict[i] for i in clear_true_labels]  # or a.index/a.columns

In [None]:
layout = g.layout_lgl()

#ig.plot(g,layout = "layout_lgl")

In [None]:
ig.plot(g, layout = layout)