In [1]:
%matplotlib inline
!pip install seaborn

[33mYou are using pip version 9.0.3, however version 19.0.3 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
# Model architecture
from keras.layers import Dense, Dropout, Conv1D, MaxPool1D, Flatten, SpatialDropout1D
from keras.models import Sequential
from keras import optimizers
from keras.callbacks import Callback

# General packages
import numpy as np
import os
import h5py
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Data preparation and validation packages
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn import preprocessing
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Jupyter interactive plotting
from IPython.display import clear_output

from components.dataToolkit import dataToolkit, LABEL_MAP

Using TensorFlow backend.


In [3]:
#call back functions
# real time plotting
class PlotLosses(Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.accuracies = []
        self.val_losses = []
        self.val_accuracies = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.accuracies.append(logs.get('acc'))
        self.val_losses.append(logs.get('val_loss'))
        self.val_accuracies.append(logs.get('val_acc'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.accuracies, label="accuracy")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.plot(self.x, self.val_accuracies, label="val_accuracy")
        plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        plt.show();
        
plot_losses = PlotLosses()

In [4]:
dtk = dataToolkit()
data = dtk.getAllTimeSeries()
dtk.close()

In [5]:
#preprocess data stuff
rescaled_data = preprocessing.minmax_scale(data.T).T
rescaled_data = np.reshape(rescaled_data, (rescaled_data.shape[0], rescaled_data.shape[1], 1))

In [6]:
#get labels
list_filename="gspy-db-20180813_O1_filtered_t1126400691-1205493119_snr7.5_tr_gspy.csv"
csv_data_dir = os.path.join(os.path.dirname(os.getcwd()),"data")
csv_data_dir
gl_df = pd.read_csv(os.path.join(csv_data_dir,list_filename))
Y = gl_df.get('label')

classes_in_use = []
for key in LABEL_MAP:
    classes_in_use.append(key)

print(classes_in_use)
print(len(Y))
#for row in Y:
k = 0
for item in Y.iteritems():
    if(item[1] not in classes_in_use):
        Y = Y.drop(item[0])
    k+=1
        
print(len(Y))

['Low_Frequency_Lines', 'None_of_the_Above', 'Blip', 'Tomte', '1400Ripples', 'Paired_Doves', 'Air_Compressor', 'Whistle', 'Extremely_Loud', 'Koi_Fish', 'Low_Frequency_Burst', 'Light_Modulation', 'Repeating_Blips', 'Scattered_Light', 'No_Glitch', 'Chirp', 'Scratchy', 'Power_Line']
6667
6235


In [None]:
# Let's shuffle data
#ind = np.random.permutation(rescaled_data.shape[0])
#rescaled_data = np.take(rescaled_data, ind, axis=0)
#labels = np.take(labels, ind)
# Let's convert labels into one hot encoding
onehot_encoder = OneHotEncoder(sparse=False)
labels = labels.reshape(labels.shape[0], 1)
targets = onehot_encoder.fit_transform(labels)

In [None]:
# Split the data for training and validation
x_train, x_val, y_train, y_val = train_test_split(rescaled_data, targets, test_size=0.3)