In [14]:
'''Train a recurrent convolutional network on the IMDB sentiment
classification task.
GPU command:
    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python kepler_cnn_lstm.py
'''

from __future__ import print_function
import numpy as np
import pandas as pd
#np.random.seed(1337)  # for reproducibility

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.datasets import imdb


import matplotlib.pyplot as plt
import random




numSamples = 100 # Length of each sample
minPeriod = 5
maxPeriod = 30
noiseAmplitude = 1 #Noise amplitude as a percent of signal amplitude in the pulses

numTrue = 2500
numFalse =2500
X_True = []
X_False = []

testSetSize = 0.2 #Size of test size as a fraction of total dataset size

for i in range(numTrue) :
    
    
    period = random.randint(minPeriod , maxPeriod)  # period
    width = random.randint( int(minPeriod/2) , int(maxPeriod/2))   # width of pulse

    signal = np.arange(numSamples) % period < width
    signal = signal.astype(int)
    signal = signal * (-1)
    signal = signal * 100

    noise = noiseAmplitude * ( np.random.normal(0, 1, numSamples) )

    finalSignal = signal + noise 
    X_True.append(finalSignal.tolist())

    #plt.plot(finalSignal)
    #plt.ylim(-200, 200)
    #plt.xlim(0, 300)
    #plt.show()
    
y_True = np.ones(numTrue).tolist()
print("True dataset made")


for i in range(numFalse) :
    blankNoise = 50 * ( np.random.normal(0, 1, numSamples) )  #Adjust amplitude of blank noise
    
    X_False.append(blankNoise.tolist())
    
    #plt.plot(blankNoise)
    #plt.ylim(-200, 200)
    #plt.xlim(0, 300)
    #plt.show()

y_False = np.zeros(numFalse).tolist()
print("False dataset made")

X = X_True + X_False
y = y_True + y_False


testIdx = random.sample(range(0, len(X)), int(testSetSize * len(X) ))
trainIdx = list( set(range(0, len(X))) - set(testIdx) )
random.shuffle(trainIdx)
X_test = [X[i] for i in testIdx]
X_train = [X[i] for i in trainIdx]
y_test = [y[i] for i in testIdx]
y_train = [y[i] for i in trainIdx]
print("Test and Train sets made")



# Embedding: Turn positive integers (indexes) into dense vectors of fixed size
max_features = 50000
maxlen = 100
embedding_size = 128

# Convolution
filter_length = 3 #The extension (spatial or temporal) of each filter
nb_filter = 64 #Number of convolution kernels to use (dimensionality of the output)
pool_length = 2 # factor by which to downscale. 2 will halve the input.

# LSTM
lstm_output_size = 70

# Training
batch_size = 16 # # of samples used to compute the state, input at one time.
nb_epoch = 5

#print('Loading data...')
#data_file1 = "x-3d4hr_0210_training_nor.csv"
#data_file2 = "x-3d4hr_0210_testing_nor.csv"
#data_file3 = "y-3d4hr_0210_training.csv"
#data_file4 = "y-3d4hr_0210_testing.csv"

# data loading
#X_train = pd.read_csv(data_file1, delimiter=',', error_bad_lines=False, header=None)
#X_train = X_train.as_matrix()
X_train = np.array(X_train)

#y_train = pd.read_csv(data_file3, delimiter=',', error_bad_lines=False, header=None)
#y_train = y_train.as_matrix()
y_train = np.array(y_train)

#X_test = pd.read_csv(data_file2, delimiter=',', error_bad_lines=False, header=None)
#X_test = X_test.as_matrix()
X_test = np.array(X_test)

#y_test = pd.read_csv(data_file4, delimiter=',', error_bad_lines=False, header=None)
#y_test = y_test.as_matrix()
y_test = np.array(y_test)

#(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)

#print(raw_input('123...'))

print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

#print('Pad sequences (samples x time)')
#X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
#X_test = sequence.pad_sequences(X_test, maxlen=maxlen)

#X_train = X_train[0:500]
#y_train = y_train[0:500]
#X_test  = X_test[0:100]
#y_test  = y_test[0:100]



#print('X_train shape:', X_train.shape)
#print('X_test shape:', X_test.shape)
#print(X_train)
#print(y_train)
#print(raw_input('123...'))


print('Build model...')

model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(Dropout(0.25))
model.add(Convolution1D(nb_filter=nb_filter,
                        filter_length=filter_length,
                        border_mode='valid',
                        activation='relu',
                        subsample_length=1))
model.add(MaxPooling1D(pool_length=pool_length)) #Max pooling operation for temporal data
model.add(LSTM(lstm_output_size))
model.add(Dense(1)) #regular fully connected NN layer, the output dimension is one
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',  # configure the learning process after the model is built well.
              optimizer='adam',
              class_mode='binary')

print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size,
                            show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)

True dataset made
False dataset made
Test and Train sets made
Loading data...
4000 train sequences
1000 test sequences
Build model...


MemoryError: ('Error allocating 25600000 bytes of device memory (out of memory).', "you might consider using 'theano.shared(..., borrow=True)'")

In [15]:
y_train

array([ 1.,  1.,  1., ...,  0.,  1.,  1.])

In [17]:
plt.plot(X_train[1])
plt.show()