In [None]:
import os
import numpy as np
from spectral_clustering import spectral_clustering
import seaborn as sns
import functions_for_plotting
from asymmetric_laplacian_distribution import get_index_per_class, get_labels, labels_to_layout_mapping
from sklearn.cluster import KMeans
import training_set_split
import seaborn as sns
import prediction_strength
import importlib
import matplotlib.pyplot as plt
from prediction_strength import get_F1_score_per_k
from matplotlib.legend import Legend
from training_set_split import get_training_folds
import wagenaar_dataset

# Toy Dataset

## Tiny Burst Overloaded Wagenaar Data inspired 

In [None]:
data_dir = "data/raw_data/daily_spontanous_dense/day20/"
day20_data = np.load(data_dir + "data_burst_by_time_day_20.npy").T

### Parameter

In [None]:
TIME_RANGE = [0, day20_data.shape[1]]
EQUAL_NOISE = [0, 0.2]
NOISE_TYPE = "gaussian"
USE_EQUAL_NOISE = True

AMPLITUDE_CONDITIONS = ["S", "M", "L"]
TIME_CONSTANT_CONDITIONS = ["equal_sharp", "equal_wide", "wide_sharp_negative_skew", "sharp_wide_positive_skew"]
AMBIGUOUS_CONDITIONS = []
SAMPLES_PER_AMBIGUOUS_CONDITION = 0
MU = day20_data.shape[1]/2

In [None]:
n_tiny_bursts_in_real_data = len(np.where(np.amax(day20_data,axis=1) <= 1)[0])
n_non_tiny_bursts_in_real_data = len(day20_data) - n_tiny_bursts_in_real_data

In [None]:
print("Tiny Bursts: ", n_tiny_bursts_in_real_data)
print("Non-Tiny Bursts: ", n_non_tiny_bursts_in_real_data)

### Uniformly sample the tiny burst conditions

In [None]:
np.random.seed(42)
tiny_burst_conditions = np.random.multinomial(n_tiny_bursts_in_real_data, [1/4.]*4, size=1)[0]
non_tiny_burst_conditions =  np.random.multinomial(n_non_tiny_bursts_in_real_data, [1/8.]*8, size=1)[0]

In [None]:
print("Tiny burst #/condition:", tiny_burst_conditions)
print("Non-Tiny burst #/condition:", non_tiny_burst_conditions)

In [None]:
SAMPLES_PER_CONDITION = list(tiny_burst_conditions) + list(non_tiny_burst_conditions)
print(SAMPLES_PER_CONDITION)

In [None]:
F_signal, F_signal_noise, noises, param_data = generate_ALF_data(X, AMPLITUDE_CONDITIONS, 
                                                                 TIME_CONSTANT_CONDITIONS,
                                                                 AMBIGUOUS_CONDITIONS, 
                                                                 SAMPLES_PER_CONDITION,
                                                                 SAMPLES_PER_AMBIGUOUS_CONDITION,MU, 
                                                                 noise_type = NOISE_TYPE, 
                                                                 equal_noise = USE_EQUAL_NOISE)

In [None]:
#param_data.to_csv( "clear_data_equal_noise=[0,0.2]_tiny_burst_overload_non_tiny_upsampled" + "_parameter" + ".csv",index=False)
#np.save("clear_data_equal_noise=[0,0.2]_tiny_burst_overload_non_tiny_upsampled" + "_F_signal",F_signal)
#np.save("clear_data_equal_noise=[0,0.2]_tiny_burst_overload_non_tiny_upsampled" + "_F_signal_noise", F_signal_noise)

In [None]:
data = np.load("clear_data_equal_noise=[0,0.2]_tiny_burst_overload_F_signal_noise.npy")
class_dict = get_index_per_class(AMPLITUDE_CONDITIONS, TIME_CONSTANT_CONDITIONS, 
                                 AMBIGUOUS_CONDITIONS, SAMPLES_PER_CONDITION, 
                                 SAMPLES_PER_AMBIGUOUS_CONDITION)
true_labels = get_labels(data, class_dict)

# Plot examples

In [None]:
clear_clusters_ordered = list(range(0,len(class_dict)+1))
layout_mapping = labels_to_layout_mapping(clear_clusters_ordered, 4, (1,4))

In [None]:
rows = 3
columns = 4
figsize = (20,20)
subplot_adjustments = [0.05,0.95,0.03,0.9,0.4, 0.15]
title = ""
save_file_clusters="test.pdf"

In [None]:
functions_for_plotting.plot_clusters(data, # the dataset 
                                     true_labels, # the true labels for the dataset 
                                     true_labels,  # the clustered labels 
                                     rows, # the number of rows in the grid 
                                     columns, # the number of columns in the grid 
                                     layout_mapping, # our layout mapping 
                                     figsize=figsize, # the figsize
                                     n_bursts = 100, # the number of bursts you want to plot for each cluster 
                                     y_lim = (0,16), # the y_lim
                                     save_file=save_file_clusters, # the file you want to save the plot 
                                     subplot_adjustments= subplot_adjustments, # adjustments for suplots and overall spacing (tricky) 
                                     plot_mean=True, # plot the mean of each cluster ? 
                                     title= title) # title of the plots