In [11]:
'''Train a recurrent convolutional network on the IMDB sentiment
classification task.
GPU command:
    THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python kepler_cnn_lstm.py
'''

from __future__ import print_function
import numpy as np
import pandas as pd
#np.random.seed(1337)  # for reproducibility

from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.layers.convolutional import Convolution1D, MaxPooling1D
from keras.datasets import imdb


import matplotlib.pyplot as plt
import random






# Embedding: Turn positive integers (indexes) into dense vectors of fixed size
max_features = 5000
maxlen = 100
embedding_size = 128

# Convolution
filter_length = 3 #The extension (spatial or temporal) of each filter
nb_filter = 64 #Number of convolution kernels to use (dimensionality of the output)
pool_length = 2 # factor by which to downscale. 2 will halve the input.

# LSTM
lstm_output_size = 70

# Training
batch_size = 16 # # of samples used to compute the state, input at one time.
nb_epoch = 5

print('Loading data...')
data_file1 = "x-3d4hr_0210_training_nor.csv"
data_file2 = "x-3d4hr_0210_testing_nor.csv"
data_file3 = "y-3d4hr_0210_training.csv"
data_file4 = "y-3d4hr_0210_testing.csv"

# data loading
X_train = pd.read_csv(data_file1, delimiter=',', error_bad_lines=False, header=None)
X_train = X_train.as_matrix()
X_train = np.array(X_train)

y_train = pd.read_csv(data_file3, delimiter=',', error_bad_lines=False, header=None)
y_train = y_train.as_matrix()
y_train = np.array(y_train)

X_test = pd.read_csv(data_file2, delimiter=',', error_bad_lines=False, header=None)
X_test = X_test.as_matrix()
X_test = np.array(X_test)

y_test = pd.read_csv(data_file4, delimiter=',', error_bad_lines=False, header=None)
y_test = y_test.as_matrix()
y_test = np.array(y_test)



print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')

#print('Pad sequences (samples x time)')
#X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
#X_test = sequence.pad_sequences(X_test, maxlen=maxlen)


X_train = X_train[0:5000]
y_train = y_train[0:5000]
X_test  = X_test[0:1000]
y_test  = y_test[0:1000]

X_train = X_train*100
X_test = X_test*100



#print('X_train shape:', X_train.shape)
#print('X_test shape:', X_test.shape)
#print(X_train)
#print(y_train)
#print(raw_input('123...'))


print('Build model...')

model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(Dropout(0.25))
model.add(Convolution1D(nb_filter=nb_filter,
                        filter_length=filter_length,
                        border_mode='valid',
                        activation='relu',
                        subsample_length=1))
model.add(MaxPooling1D(pool_length=pool_length))
model.add(Convolution1D(nb_filter=nb_filter,
                        filter_length=filter_length,
                        border_mode='valid',
                        activation='relu',
                        subsample_length=1))
model.add(MaxPooling1D(pool_length=pool_length))
#Max pooling operation for temporal data
#model.add(LSTM(lstm_output_size))
model.add(Flatten())
model.add(Dense(32))
model.add(Dense(1)) #regular fully connected NN layer, the output dimension is one
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',  # configure the learning process after the model is built well.
              optimizer='adam',
              class_mode='binary')

print('Train...')
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
          validation_data=(X_test, y_test), show_accuracy=True)
score, acc = model.evaluate(X_test, y_test, batch_size=batch_size,
                            show_accuracy=True)
print('Test score:', score)
print('Test accuracy:', acc)

Loading data...
50000 train sequences
10000 test sequences
Build model...
Train...
Train on 5000 samples, validate on 1000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test score: 0.280155418366
Test accuracy: 0.884


In [15]:
y_train

array([ 1.,  1.,  1., ...,  0.,  1.,  1.])

In [17]:
plt.plot(X_train[1])
plt.show()

In [6]:
X_test[5]

array([  99.99320232,  100.00196666,  100.00573503,   99.99939731,
        100.0462451 ,  100.05370573,  100.0386227 ,  100.0063631 ,
         99.99611425,  100.02329225,  100.01448985,   99.9960286 ,
        100.01307195,  100.02241677,   99.95535676,   99.96413061,
        100.05481912,  100.03181868,   99.99127055,  100.08344357,
        100.0009294 ,  100.05869217,   99.98541815,   99.96022901,
         99.99853134,   99.97863316,   99.97153414,   99.97604478,
         99.99384942,  100.01518453,   99.99999259,  100.05416251,
        100.00839954,   99.9190433 ,   99.97488381,  100.06595296,
         99.94797226,   99.9632361 ,   99.99342119,   99.90008721,
        100.00780003,   99.96549142,   99.95748837,   99.97722478,
         99.99229829,  100.02770773,   99.94730613,   99.96500609,
        100.05354396,  100.0114447 ,  100.01637404,  100.07567842,
        100.01047406,   99.94806742,  100.00701019,   99.9572124 ,
         99.95241628,   99.96233207,   99.97904235,   99.93944