### Imports

In [1]:
import load_data as dataloader
import InceptionModule_keras
import utils
import os
import sys
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder

import random
seed = 18
random.seed(seed)

### Load data

In [2]:
from pyts.datasets import ucr_dataset_list

# During development only choose a subset, to speed up load
DATASET_NAMES = ucr_dataset_list()[0:20]
print(DATASET_NAMES)

datasets_dict = {}
meta_data_dict = {'name': [], 'train_size': [], 'test_size':[] ,'length':[], 'test_proportion':[], "num_classes":[]}     # pandas dataframe with metadata about UCR datasets

CACHED_DATA_FOLDER = os.path.dirname(os.path.dirname(os.getcwd())) + "/Data"
for dataset_name in DATASET_NAMES:
    cache_path = os.path.join(CACHED_DATA_FOLDER, dataset_name)
    dataset_obj = dataloader.fetch_ucr_dataset(dataset=dataset_name, use_cache=True, data_home=cache_path)
    datasets_dict[dataset_name] = dataset_obj

    # Filter the datasets depending on number of classes
    num_classes = len(np.unique(dataset_obj['target_train']))
    #if num_clases < 3:
    
    data_length = dataset_obj['data_train'].shape[1]
    train_size = dataset_obj['data_train'].shape[0]
    test_size = dataset_obj['data_test'].shape[0]
    (labels,counts) = np.unique(dataset_obj['target_test'],return_counts=True)
    test_proportion = counts[0]/(counts[0]+counts[1])

    meta_data_dict['length'].append(data_length)
    meta_data_dict['train_size'].append(train_size)
    meta_data_dict['test_size'].append(test_size)
    meta_data_dict['name'].append(dataset_name)
    meta_data_dict['test_proportion'].append(test_proportion)
    meta_data_dict['num_classes'].append(num_classes)


meta_df = pd.DataFrame(data=meta_data_dict)


['ACSF1', 'Adiac', 'AllGestureWiimoteX', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'ArrowHead', 'BME', 'Beef', 'BeetleFly', 'BirdChicken', 'CBF', 'Car', 'Chinatown', 'ChlorineConcentration', 'CinCECGtorso', 'Coffee', 'Computers', 'CricketX', 'CricketY', 'CricketZ']


### Filter data and split into three

In [3]:


reasonable_datasets = meta_df.loc[(meta_df['train_size']>200)&(meta_df['train_size']<2000)& (meta_df['test_size']<2*meta_df['train_size']) & (meta_df['length']<1000)]
print(reasonable_datasets.shape[0])
split = int(reasonable_datasets.shape[0]/3)
print("sofia:\n", reasonable_datasets['name'][0:split].to_string(index=False).split())
print("ayman:\n", reasonable_datasets['name'][split:2*split].to_string(index=False).split())
print("nils:\n", reasonable_datasets['name'][2*split:].to_string(index=False).split())


5
sofia:
 ['Adiac']
ayman:
 ['Computers']
nils:
 ['CricketX', 'CricketY', 'CricketZ']


### Preprocess data

In [4]:
from sklearn.model_selection import train_test_split

processed_data_dict = {}
for name, dataset_object in datasets_dict.items():
    X = dataset_obj['data_train']
    Y = dataset_obj['target_train']
    x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.2, stratify=Y)
    x_test = dataset_obj['data_test']
    y_test = dataset_obj['target_test']

    num_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

    y_train, y_val, y_test = utils.transform_labels(y_train, y_val, y_test)

    # save orignal y because later we will use binary
    y_test = y_test.astype(np.int64)
    y_train = y_train.astype(np.int64)
    y_val = y_val.astype(np.int64)

    # transform the labels from integers to one hot vectors
    enc = OneHotEncoder()
    enc.fit(np.concatenate((y_train, y_val, y_test), axis=0).reshape(-1, 1))
    y_train_onehot = enc.transform(y_train.reshape(-1, 1)).toarray()
    y_val_onehot = enc.transform(y_val.reshape(-1, 1)).toarray()
    y_test_onehot = enc.transform(y_test.reshape(-1, 1)).toarray()



    if len(x_train.shape) == 2:  # if univariate
        # add a dimension to make it multivariate with one dimension
        x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
        x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

    processed_data_dict[name] = {"x_train":x_train, "y_train":y_train, "y_train_onehot":y_train_onehot, 
                                "x_test":x_test, "y_test":y_test, "y_test_onehot":y_test_onehot,
                                "x_val":x_val, "y_val":y_val, "y_val_onehot":y_val_onehot,
                                "num_classes":num_classes}



### Train an InceptionTime Network

In [9]:


N_EPOCHS = 25
N_ENSEMBLE = 1


#for name, dataset_object in processed_data_dict.items():
for name in ["Computers"]:
   dataset_object = processed_data_dict[name]
   x_train = dataset_object['x_train']
   y_train = dataset_object['y_train']
   x_val = dataset_object['x_val']
   y_val = dataset_object['y_val']
   y_val_onehot = dataset_object['y_val_onehot']
   y_train_onehot = dataset_object['y_train_onehot']
   x_test = dataset_object['x_test']
   y_test = dataset_object['y_test']
   y_test_onehot = dataset_object['y_test_onehot']
   num_classes = dataset_object['num_classes']
   input_shape = x_train.shape[1:]


   verbose = True
   all_predictions = np.zeros_like(y_test_onehot)

   val_metrics = {"loss":[], "recall":[], "precision":[], "accuracy":[], "f1_score":[]}
   train_metrics = {"loss":[], "recall":[], "precision":[], "accuracy":[], "f1_score":[]}

   for i in range(N_ENSEMBLE):
      print(i)
      checkpoints_path = ""
      inception_net = InceptionModule_keras.Classifier_INCEPTION(checkpoints_path, input_shape, num_classes, save_weights=False, verbose=verbose, nb_epochs=N_EPOCHS)
      history = inception_net.fit(x_train, y_train_onehot, x_val, y_val_onehot, plot_test_acc=True)

      # Save metrics
      train_metrics['loss'].append(history.history['loss'])
      train_metrics['accuracy'].append(history.history['accuracy'])
      train_metrics['precision'].append(history.history['Precision'])
      train_metrics['recall'].append(history.history['Recall'])
      train_metrics['f1_score'].append(history['F1_score'])
      val_metrics['loss'].append(history.history['val_loss'])
      val_metrics['accuracy'].append(history.history['val_accuracy'])
      val_metrics['precision'].append(history.history['val_Precision'])
      val_metrics['recall'].append(history.history['val_Recall'])
      val_metrics['f1_score'].append(history['val_F1_score'])


      single_prediction = inception_net.predict(x_test, False)
      print("subnet {}, metrics: {}".format(i, utils.calculate_metrics(y_test, np.argmax(single_prediction, axis=1), 0.0)))
      all_predictions += single_prediction/N_ENSEMBLE
   
   y_pred_final = np.argmax(all_predictions, axis=1)

   metrics = utils.calculate_metrics(y_test, y_pred_final, 0.0)
   print(metrics)



0
Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_4 (InputLayer)        [(None, 300, 1)]             0         []                            
                                                                                                  
 max_pooling1d_18 (MaxPooli  (None, 300, 1)               0         ['input_4[0][0]']             
 ng1D)                                                                                            
                                                                                                  
 conv1d_93 (Conv1D)          (None, 300, 32)              1280      ['input_4[0][0]']             
                                                                                                  
 conv1d_94 (Conv1D)          (None, 300, 32)              640       ['input_4[0][0]']     

### Plot learning curves

In [6]:
from matplotlib import pyplot as plt


plt.plot([i for i in range(N_EPOCHS)], train_metrics['loss'] , label="train loss")
plt.plot([i for i in range(N_EPOCHS)], val_metrics['loss'], label="val loss")
plt.plot([i for i in range(N_EPOCHS)], val_metrics['accuracies'], label="val acc")
plt.legend()
plt.show()



TypeError: list indices must be integers or slices, not tuple