In [1]:
import numpy as np
from spectral_clustering import spectral_clustering
import functions_for_plotting
from asymmetric_laplacian_distribution import get_index_per_class, get_labels, labels_to_layout_mapping
from sklearn.cluster import KMeans
import training_set_split
import seaborn as sns
import prediction_strength
import importlib
import matplotlib.pyplot as plt



# Load Data and True Labels

In [2]:
#----------------------------------------------- DATA ------------------------------------------------------------------
data_dir = "data/"

clear_data = np.load(data_dir + "clearly_separated_data_F_signal_noise.npy")
ambig_data = np.load(data_dir + "ambiguous_data_tau_amplitude_F_signal_noise.npy")
#np.load(data_dir + "ambiguous_data_tau_amplitude_F_signal_noise.npy") #np.load(data_dir + "clearly_separated_data_F_signal_noise.npy")

clear_amplitude_conditions = ["S", "M", "L"]  #["S", "S/M", "M", "M/L", "L"] #["S", "M", "L"]
ambig_amplitude_conditions = ["S", "S/M", "M", "M/L", "L"]

clear_time_constant_conditions = ["equal_sharp", "equal_wide", "wide_sharp_negative_skew", "sharp_wide_positive_skew"]
ambig_time_constant_conditions = ["equal_sharp", "equal_medium", "equal_wide", "wide_sharp_negative_skew", "wide_medium_negative_skew","medium_sharp_negative_skew","sharp_wide_positive_skew", "medium_wide_positive_skew" ,"sharp_medium_positive_skew"]

#["equal_sharp", "equal_medium", "equal_wide", "wide_sharp_negative_skew", "wide_medium_negative_skew","medium_sharp_negative_skew","sharp_wide_positive_skew", "medium_wide_positive_skew" ,"sharp_medium_positive_skew"]
#["equal_sharp", "equal_wide", "wide_sharp_negative_skew", "sharp_wide_positive_skew"]

ambiguous_conditions = ["S/M", "M/L", "equal_medium", "wide_medium_negative_skew", "medium_sharp_negative_skew", "medium_wide_positive_skew", "sharp_medium_positive_skew"]

samples_per_condition = 1000
samples_per_ambiguous_condition = 400

ambig_cluster_dict = get_index_per_class(ambig_amplitude_conditions,ambig_time_constant_conditions, ambiguous_conditions, samples_per_condition, samples_per_ambiguous_condition)
clear_cluster_dict = get_index_per_class(clear_amplitude_conditions,clear_time_constant_conditions, [], samples_per_condition, samples_per_ambiguous_condition)


clear_true_labels = get_labels(clear_data, clear_cluster_dict)
ambig_true_labels = get_labels(ambig_data, ambig_cluster_dict)

clear_clusters_ordered = list(range(0,len(clear_cluster_dict)+1))
clear_layout_label_mapping = labels_to_layout_mapping(clear_clusters_ordered, 4, (1,4)) #labels_to_layout_mapping(clusters_ordered, 4, (1,4)) #labels_to_layout_mapping(clusters_ordered, 9, (2,5))

ambig_clusters_ordered = list(range(0,len(ambig_cluster_dict)+1))
ambig_layout_label_mapping = labels_to_layout_mapping(ambig_clusters_ordered, 9, (2,5))

## Cluster Balanced Split

In [3]:
clear_train_fold_indices, _ = training_set_split.get_training_folds(clear_data, clear_cluster_dict,cluster_split="balanced",folds = 2)
ambig_train_fold_indices, _ = training_set_split.get_training_folds(ambig_data, ambig_cluster_dict,cluster_split="balanced",folds = 2)


clear_training_set = clear_data[clear_train_fold_indices[0]]
clear_validation_set = clear_data[clear_train_fold_indices[1]]

ambig_training_set = ambig_data[ambig_train_fold_indices[0]]
ambig_validation_set = ambig_data[ambig_train_fold_indices[1]]

clear_true_labels_training = clear_true_labels[clear_train_fold_indices[0]]
clear_true_labels_validation = clear_true_labels[clear_train_fold_indices[1]]

ambig_true_labels_training = ambig_true_labels[ambig_train_fold_indices[0]]
ambig_true_labels_validation = ambig_true_labels[ambig_train_fold_indices[1]]

## Cluster Unbalanced Split

In [3]:
seed = np.random.seed(42)
clear_clusters = np.arange(12)
ambig_clusters = np.arange(45)
np.random.shuffle(clear_clusters)
np.random.shuffle(ambig_clusters)

clear_training_clusters = clear_clusters[0:6]
ambig_training_clusters = ambig_clusters[0:23]

In [4]:
clear_train_fold_indices, clear_valid_fold_indices = training_set_split.get_training_folds(clear_data, clear_cluster_dict,cluster_split="unbalanced",training_clusters = clear_training_clusters,folds = 2)
ambig_train_fold_indices, ambig_valid_fold_indices = training_set_split.get_training_folds(ambig_data, ambig_cluster_dict,cluster_split="unbalanced",training_clusters = ambig_training_clusters, folds = 2)


clear_training_set = clear_data[clear_train_fold_indices]
clear_validation_set = clear_data[clear_valid_fold_indices]

ambig_training_set = ambig_data[ambig_train_fold_indices]
ambig_validation_set = ambig_data[ambig_train_fold_indices]

clear_true_labels_training = clear_true_labels[clear_train_fold_indices]
clear_true_labels_validation = clear_true_labels[clear_valid_fold_indices]

ambig_true_labels_training = ambig_true_labels[ambig_train_fold_indices]
ambig_true_labels_validation = ambig_true_labels[ambig_valid_fold_indices]

In [69]:
print(np.unique(clear_true_labels_training,return_counts = True))
print(np.unique(clear_true_labels_validation,return_counts = True))
print(np.unique(ambig_true_labels_training,return_counts = True))
print(np.unique(ambig_true_labels_validation,return_counts = True))

(array([ 0.,  2.,  5.,  8.,  9., 10.]), array([1000, 1000, 1000, 1000, 1000, 1000]))
(array([ 1.,  3.,  4.,  6.,  7., 11.]), array([1000, 1000, 1000, 1000, 1000, 1000]))
(array([ 0.,  3.,  4.,  5.,  6.,  7.,  8., 10., 12., 13., 17., 18., 19.,
       22., 25., 30., 31., 34., 35., 36., 38., 42., 44.]), array([1000, 1000,  400,  400, 1000,  400,  400,  400,  400,  400,  400,
       1000,  400,  400,  400,  400,  400,  400,  400, 1000, 1000, 1000,
        400]))
(array([ 1.,  2.,  9., 11., 14., 15., 16., 20., 21., 23., 24., 26., 27.,
       28., 29., 32., 33., 37., 39., 40., 41., 43.]), array([ 400, 1000,  400,  400,  400,  400,  400, 1000, 1000,  400, 1000,
        400,  400,  400,  400,  400,  400,  400, 1000,  400,  400,  400]))


In [6]:
#k = 10
#reg = None

In [7]:
#labels, eigvec, eigval = spectral_clustering(clear_training_set, "euclidean", range(1,50),  k=k, mutual = False, weighting = "distance", normalize = True, reg_lambda = reg, save_laplacian = False, save_eigenvalues_and_vectors = False)
#np.save("labels_k=%d_reg=%s_clear_training_unbalanced" % (k, str(reg)),labels)

Calculate Normalized Laplacians
Calculate Eigenvalues and Vectors of Laplacian


In [8]:
#labels, eigvec, eigval = spectral_clustering(clear_validation_set, "euclidean", range(1,50),  k=k, mutual = False, weighting = "distance", normalize = True, reg_lambda = reg, save_laplacian = False, save_eigenvalues_and_vectors = False)
#np.save("labels_k=%d_reg=%s_clear_validation_unbalanced" % (k, str(reg)),labels)

Calculate Normalized Laplacians
Calculate Eigenvalues and Vectors of Laplacian


In [23]:
#save_file_clusters = "true_clusters_clear_unbalanced_validation_mean.pdf" 

In [24]:
#functions_for_plotting.plot_clusters(clear_validation_set, clear_true_labels_validation,clear_true_labels_validation, 3,4, clear_layout_label_mapping,figsize=(20,20),n_bursts = 100,y_lim = (0,16),save_file=save_file_clusters ,subplot_adjustments= [0.05,0.95,0.03,0.9,0.4, 0.15], plot_mean=True, title= "Validation Set Clusters (Mean)")

## Prediction strength vs. F1 score 
- True positives: same cluster in  training, same cluster in validation
- False positives: different cluster in training, same cluster in validation
- True negatives: different cluster in training, different cluster in validation
- False negatives: same cluster in training, different cluster in validation 

In [4]:
from prediction_strength import calculate_centroids_and_predict_validation_data
from prediction_strength import get_confusion_matrix
from prediction_strength import get_recall
from prediction_strength import get_precision
from prediction_strength import get_F1_score
from math import factorial
from itertools import permutations
from itertools import product

In [5]:
def get_F1_score_per_k(data, train_indices, valid_indices, train_labels, valid_labels,combination_type = "full", true_train_labels = None, own_combinations=None):
    k_clusters = list(valid_labels.keys())
    training_set = data[train_indices]
    validation_set = data[valid_indices]
    
    if combination_type == "true":
        print("Calculate F1 score based on true training centroids!")
        if not true_train_labels is None:
            true_F1_score_per_k = {} 
            for k in k_clusters:
                train_labels_k = true_train_labels
                valid_labels_k = valid_labels[k]
                
                if len(np.unique(train_labels_k))>1:
                    centroids_k, labels_centroids_based = calculate_centroids_and_predict_validation_data(training_set,train_labels_k,validation_set)

                else:
                    labels_centroids_based = np.zeros(len(validation_set))

                unique_valid_labels = list(range(k))
                unique_centroid_labels = list(np.unique(train_labels_k))

                true_positives, false_positives, true_negatives, false_negatives = get_confusion_matrix(valid_labels_k,labels_centroids_based,unique_valid_labels, unique_centroid_labels)
                precision = get_precision(true_positives,false_positives)
                recall = get_recall(true_positives, false_negatives)
                f1 = get_F1_score(recall, precision)

                true_F1_score_per_k[k] = f1 

            return true_F1_score_per_k
        else:
            print("True labels for the training set not found..Please provide true labels!!")
    
    elif combination_type == "equal":
        print("Calculate F1 score based on training centroids assuming same number of clusters in both sets!")
        F1_score_per_k = {}
        for k in k_clusters:
            train_labels_k = train_labels[k]
            valid_labels_k = valid_labels[k]

            if k>1:
                centroids_k, labels_centroids_based = calculate_centroids_and_predict_validation_data(training_set,train_labels_k,validation_set)

            else:
                labels_centroids_based = np.zeros(len(validation_set))

            unique_valid_labels = list(range(k))
            unique_centroid_labels = list(range(k))

            true_positives, false_positives, true_negatives, false_negatives = get_confusion_matrix(valid_labels_k,labels_centroids_based,unique_valid_labels, unique_centroid_labels)
            precision = get_precision(true_positives,false_positives)
            recall = get_recall(true_positives, false_negatives)
            f1 = get_F1_score(recall, precision)

            F1_score_per_k[k] = f1 

        return F1_score_per_k
        
        
    elif combination_type == "full":
        print("Calculate F1 score based on training centroids with full permutation of possible clusters in both sets!")
        F1_score_per_k_combination = {} 
        counter = 0
        for k1k2 in product(k_clusters, repeat=2):
            counter += 1
            if counter%50 == 0:
                print("Step:%d" % counter)
            train_labels_k = train_labels[k1k2[0]]
            valid_labels_k = valid_labels[k1k2[1]]

            if k1k2[0] >1:
                centroids_k, labels_centroids_based = calculate_centroids_and_predict_validation_data(training_set,train_labels_k,validation_set)

            else:
                labels_centroids_based = np.zeros(len(validation_set))

            unique_valid_labels = list(range(k1k2[1]))
            unique_centroid_labels = list(range(k1k2[0]))

            true_positives, false_positives, true_negatives, false_negatives = get_confusion_matrix(valid_labels_k,labels_centroids_based,unique_valid_labels, unique_centroid_labels)
            precision = get_precision(true_positives,false_positives)
            recall = get_recall(true_positives, false_negatives)
            f1 = get_F1_score(recall, precision)

            F1_score_per_k_combination[k1k2] = f1

        return F1_score_per_k_combination
    
    elif combination_type == "own":
        print("Calculate F1 score based on training centroids with provided combination of possible clusters in both sets!")
        F1_score_per_k_combination = {} 
        for k1k2 in own_combinations:
            train_labels_k = train_labels[k1k2[0]]
            valid_labels_k = valid_labels[k1k2[1]]

            if k1k2[0] >1:
                centroids_k, labels_centroids_based = calculate_centroids_and_predict_validation_data(training_set,train_labels_k,validation_set)

            else:
                labels_centroids_based = np.zeros(len(validation_set))

            unique_valid_labels = list(range(k1k2[1]))
            unique_centroid_labels = list(range(k1k2[0]))

            true_positives, false_positives, true_negatives, false_negatives = get_confusion_matrix(valid_labels_k,labels_centroids_based,unique_valid_labels, unique_centroid_labels)
            precision = get_precision(true_positives,false_positives)
            recall = get_recall(true_positives, false_negatives)
            f1 = get_F1_score(recall, precision)

            F1_score_per_k_combination[k1k2] = f1

        return F1_score_per_k_combination       

## Spectral Clustering Configuration 

In [108]:
k = 10
reg = None

clear_prediction_strength_dir = "Toy_data/Clearly_Separated/Prediction_Strength/"
ambig_prediction_strength_dir = "Toy_data/Ambiguous/Ambiguous_Tau_Amplitude/Prediction_Strength/"

## Load labels
### balanced

In [109]:
clear_training_set_labels = np.load(clear_prediction_strength_dir + "Labels/labels_k=%d_reg=%s_training.npy" % (k, str(reg)))
clear_validation_set_labels = np.load(clear_prediction_strength_dir + "Labels/labels_k=%d_reg=%s_validation.npy" % (k, str(reg)))

ambig_training_set_labels = np.load(ambig_prediction_strength_dir + "Labels/labels_k=%d_reg=%s_training.npy" % (k, str(reg)))
ambig_validation_set_labels = np.load(ambig_prediction_strength_dir + "Labels/labels_k=%d_reg=%s_validation.npy" % (k, str(reg)))


clear_train_labels = {}
clear_valid_labels = {}
for i, labels in enumerate(clear_training_set_labels):
    clear_train_labels[i+1] = labels
    clear_valid_labels[i+1] = clear_validation_set_labels[i]
    
ambig_train_labels = {}
ambig_valid_labels = {}
for i, labels in enumerate(ambig_training_set_labels):
    ambig_train_labels[i+1] = labels
    ambig_valid_labels[i+1] = ambig_validation_set_labels[i]

In [58]:
clear_validation_set_labels[12]

array([0, 0, 0, ..., 4, 4, 4], dtype=int32)

### unbalanced

In [27]:
clear_training_set_labels = np.load("labels_k=%d_reg=%s_clear_training_unbalanced.npy" % (k, str(reg)))
clear_validation_set_labels = np.load("labels_k=%d_reg=%s_clear_validation_unbalanced.npy" % (k, str(reg)))

#ambig_training_set_labels = np.load("labels_k=%d_reg=%s_ambig_training_unbalanced.npy" % (k, str(reg)))
#ambig_validation_set_labels = np.load("labels_k=%d_reg=%s_ambig_validation_unbalanced.npy" % (k, str(reg)))

clear_train_labels = {}
clear_valid_labels = {}
for i, labels in enumerate(clear_training_set_labels):
    clear_train_labels[i+1] = labels
    clear_valid_labels[i+1] = clear_validation_set_labels[i]
    
#ambig_train_labels = {}
#ambig_valid_labels = {}
#for i, labels in enumerate(ambig_training_set_labels):
#    ambig_train_labels[i+1] = labels
#    ambig_valid_labels[i+1] = ambig_validation_set_labels[i]

## F1 Score & Prediction Strength

### Clear Data

In [100]:
clear_F1_score_per_k = get_F1_score_per_k(clear_data, clear_train_fold_indices[0], clear_train_fold_indices[1], clear_train_labels, clear_valid_labels, combination_type = "true" ,true_train_labels = clear_true_labels_training)

Calculate F1 score based on true training centroids!


In [101]:
clear_prediction_strengths_per_k,_ = prediction_strength.get_prediction_strength_per_k(clear_data, clear_train_fold_indices[0], clear_train_fold_indices[1], clear_train_labels, clear_valid_labels, per_sample = False, true_train_labels = clear_true_labels_training)

Calculate Predictions Strength per Cluster for each Clustering!


### Ambiguous Data

In [110]:
ambig_F1_score_per_k = get_F1_score_per_k(ambig_data, ambig_train_fold_indices[0], ambig_train_fold_indices[1], ambig_train_labels, ambig_valid_labels,combination_type = "true" ,true_train_labels = ambig_true_labels_training)

Calculate F1 score based on true training centroids!


In [111]:
ambig_prediction_strengths_per_k,_ = prediction_strength.get_prediction_strength_per_k(ambig_data, ambig_train_fold_indices[0], ambig_train_fold_indices[1], ambig_train_labels, ambig_valid_labels, per_sample = False, true_train_labels = ambig_true_labels_training)

Calculate Predictions Strength per Cluster for each Clustering!


## Plot Mean Prediction Strength vs. F1 Score

In [112]:
figsize=(20,10)
plot_adjustments = [0.05,0.08,0.95, 0.91]
configuration = "k=%d - reg=%s" % (k,str(reg))
save_file = "PS_vs_F1_k=%d_reg=%s_ambig_balanced_true.pdf" % (k,str(reg))

In [113]:
prediction_strengths_per_k = ambig_prediction_strengths_per_k
F1_score_per_k = ambig_F1_score_per_k

In [114]:
fig, ax = plt.subplots(figsize=figsize)

k_clusters = list(prediction_strengths_per_k.keys())

mean_prediction_strengths = []
err_prediction_strengths = []
min_prediction_strengths = []
F1_scores = []

for k in k_clusters:
    mean_prediction_strengths.append(np.mean(prediction_strengths_per_k[k]))
    err_prediction_strengths.append(np.std(prediction_strengths_per_k[k]))
    min_prediction_strengths.append(np.amin(prediction_strengths_per_k[k]))
    F1_scores.append(F1_score_per_k[k])


upper_err = np.asarray(err_prediction_strengths) - np.maximum(0,(np.asarray(err_prediction_strengths)+np.asarray(mean_prediction_strengths)-1))
lower_err = np.asarray(err_prediction_strengths)
err = np.stack((lower_err,upper_err), axis=0)

ax.plot(k_clusters, F1_scores, "o-", label="F1-Scores",color = "C03",linewidth=3)
ax.plot(k_clusters, mean_prediction_strengths, "o-",label="Mean PS",color = "C0",linewidth=3)
ax.plot(k_clusters, min_prediction_strengths, "o-", color = "C01", label="Min PS",linewidth=3)

argmax_f1 = np.argmax(F1_scores[1:]) + 1
argmax_mean_ps = np.argmax(mean_prediction_strengths[1:]) +1
argmax_min_ps = np.argmax(min_prediction_strengths[1:])+1


if np.abs(argmax_mean_ps-argmax_f1) <8:
    if F1_scores[argmax_f1] <= mean_prediction_strengths[argmax_mean_ps]:
        ps_shift = 0.03
        f1_shift = 0
    else:
        ps_shift = 0
        f1_shift = 0.03
else:
    ps_shift = 0
    f1_shift = 0

ax.annotate("#%d|Score=%.3f" % (argmax_mean_ps+1, mean_prediction_strengths[argmax_mean_ps]), (k_clusters[argmax_mean_ps] - 1, mean_prediction_strengths[argmax_mean_ps] + 0.03 + ps_shift),fontsize = 16, color = "C0")
ax.annotate("#%d|Score=%.3f" % (argmax_f1+1, F1_scores[argmax_f1]), (k_clusters[argmax_f1] - 1, F1_scores[argmax_f1] + 0.03 + f1_shift), fontsize=16, color = "C03")
ax.annotate("#%d|Score=%.3f" % (argmax_min_ps+1, min_prediction_strengths[argmax_min_ps]), (k_clusters[argmax_min_ps] - 1, min_prediction_strengths[argmax_min_ps] + 0.03), fontsize=16, color = "C01")

title = "Prediction Strength vs. F1-Score for Clustering with k Clusters \n" + configuration 

ax.set_title(title, fontsize=22, pad=20)
ax.set_xticks(k_clusters)
ax.set_xlabel("# Number of clusters", fontsize=18, labelpad=10)
ax.set_ylabel("Score", fontsize=18, labelpad=10),
ax.set_ylim((0, 1.1))
ax.tick_params(axis='y',labelsize=14)
ax.tick_params(axis='x',labelsize=14)

ax.set_yticks(np.arange(0, 1.1,0.1))
left = plot_adjustments[0]
bottom = plot_adjustments[1]
right = plot_adjustments[2]
top = plot_adjustments[3]

plt.subplots_adjust(left,bottom,right, top)

ax.legend(fontsize = 14, loc="lower right")

plt.savefig(save_file)
plt.close()

In [105]:
k = 10
reg = 10
k_clusters = 35
save_file_clusters = "PS_vs_F1_clusters_k=%d_reg=%s_clear_balanced_true_kclusters=%d_validation.pdf" % (k,str(reg),k_clusters)

In [106]:
functions_for_plotting.plot_clusters(clear_validation_set, clear_true_labels[clear_train_fold_indices[1]],clear_valid_labels[k_clusters], 3,4, clear_layout_label_mapping,figsize=(20,20),n_bursts = 100,y_lim = (0,16),save_file=save_file_clusters ,subplot_adjustments= [0.05,0.95,0.03,0.9,0.4, 0.15], plot_mean=False, title= "Validation Set Clusters \n k=%d, $\lambda$=%s" % (k,str(reg)))

  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])


In [46]:
k = 10
reg = None
k_clusters = 49
save_file_clusters = "PS_vs_F1_clusters_k=%d_reg=%s_clear_balanced_true_kclusters=%d.pdf" % (k,str(reg),k_clusters)

In [45]:
functions_for_plotting.plot_clusters(ambig_validation_set, ambig_true_labels[ambig_train_fold_indices[1]],ambig_valid_labels[k_clusters], 10,5, ambig_layout_label_mapping,figsize=(40,30),n_bursts = 100,y_lim = (0,16),save_file=save_file_clusters ,subplot_adjustments= [0.05,0.93,0.02,0.92,0.9, 0.2], plot_mean=False, title= "Validation Set Clusters \n k=%d, $\lambda$=%s" % (k,str(reg)))

  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row

  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])
  ax = fig.add_subplot(outer_grid[row_start:row_end, corresponding_column])
  ax = fig.add_subplot(outer_grid[row_i:(row_i + 1), column_i])


In [95]:
own_combinations = list(product([8,9,10,11,12,13,14], repeat=2))

In [96]:
F1_score_per_k = get_F1_score_per_k(data, train_fold_indices[0], train_fold_indices[1], train_labels, valid_labels,combination_type = "own" ,own_combinations = own_combinations)

In [101]:
F1_score_per_k

{(8, 8): 0.6398728741897797,
 (8, 9): 0.6523157422484107,
 (8, 10): 0.747866441051456,
 (8, 11): 0.7676779124301703,
 (8, 12): 0.7643628278290526,
 (8, 13): 0.7561196701895473,
 (8, 14): 0.7517187506416196,
 (9, 8): 0.6206285457021343,
 (9, 9): 0.6348035795466058,
 (9, 10): 0.54128442367322,
 (9, 11): 0.561106675340261,
 (9, 12): 0.5566062570062832,
 (9, 13): 0.5430555708250097,
 (9, 14): 0.5369686305500886,
 (10, 8): 0.6586809967770928,
 (10, 9): 0.6740591614866355,
 (10, 10): 0.8230800217314856,
 (10, 11): 0.8504598894390333,
 (10, 12): 0.8465166780851942,
 (10, 13): 0.8306834166226942,
 (10, 14): 0.825293948204031,
 (11, 8): 0.6611515311436493,
 (11, 9): 0.6769921408954339,
 (11, 10): 0.8286083600749147,
 (11, 11): 0.856079256417438,
 (11, 12): 0.8542812373582425,
 (11, 13): 0.8382913727952211,
 (11, 14): 0.8328617080469184,
 (12, 8): 0.5921517504884093,
 (12, 9): 0.607038716831298,
 (12, 10): 0.7748125031675737,
 (12, 11): 0.8052822141267895,
 (12, 12): 0.8032479813523717,
 (12, 13

In [110]:
argmax = np.argmax(list(F1_score_per_k.values()))

In [113]:
combination = np.asarray(list(F1_score_per_k.keys()))[argmax]
f1_score = np.asarray(list(F1_score_per_k.values()))[argmax]

In [114]:
combination

47

In [120]:
F1_score_per_k[12]

0.31849359707172603

In [115]:
f1_score

0.6195245612672132