In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.contrib.keras as keras
from sklearn.ensemble import IsolationForest
#from keras.layers import Dense
#from keras.models import Sequential
#from keras.optimizers import Adam

class TFModel:
    def _create_layer(self, input_layer, layer_weights, layer_bias, activation_func):
        layer = tf.add(tf.matmul(input_layer, layer_weights), layer_bias)
        return activation_func(layer)
    
    def __init__(self, num_inputs, num_outputs, hidden_layer_sizes):
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        
        layer_sizes = [self.num_inputs] + hidden_layer_sizes + [self.num_outputs]
        num_layers = len(layer_sizes)
        
        weights = list(
            map(
                lambda input_size, output_size: tf.Variable(
                    tf.random_normal(shape=[input_size, output_size])
                ),
                layer_sizes,
                layer_sizes[1:],
            )
        )
        
        biases = list(
            map(
                lambda layer_size: tf.Variable(tf.random_normal(shape=[layer_size])),
                layer_sizes[1:],
            )
        )
        
        activation_funcs = list(
            map(
                lambda layer_index: tf.identity
                if layer_index == num_layers - 2
                else tf.nn.relu,
                range(num_layers - 1),
            )
        )
        
        self._input = tf.placeholder(shape=[None, self.num_inputs], dtype=tf.float32)
        layer = self._input
        for layer_weights, layer_bias, activation_func in zip(
            weights, biases, activation_funcs
        ):
            layer = self._create_layer(
                layer, layer_weights, layer_bias, activation_func
            )
        
        self._output = layer
        
        self._target_output = tf.placeholder(
            shape=[None, self.num_outputs], dtype=tf.float32
        )
        loss = tf.losses.mean_squared_error(self._target_output, self._output)
        self._optimizer = tf.train.AdamOptimizer().minimize(loss)
        
        self.init = tf.global_variables_initializer()
    
    def predict(self, sess, data_input):
        return sess.run(self._output, feed_dict={self._input: data_input})

    def predict_batch(self, sess, inputs):
        return sess.run(self._output, feed_dict={self._input: inputs})

    def train_batch(self, sess, inputs, outputs):
        sess.run(
            self._optimizer,
            feed_dict={self._input: inputs, self._target_output: outputs},
        )
        
def compile_keras_model(data_input, output, number_of_nodes_in_hidden_layer=3, number_of_hidden_layers=2, hidden_layer_activation='relu', output_layer_activation='sigmoid', loss='mean_squared_error'):
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(number_of_nodes_in_hidden_layer, activation=hidden_layer_activation, input_shape=(len(data_input.columns),)))
    for _ in range(number_of_hidden_layers - 1):  
        model.add(keras.layers.Dense(number_of_nodes_in_hidden_layer, activation=hidden_layer_activation))
    model.add(keras.layers.Dense(1, activation=output_layer_activation))
    
    adam = keras.optimizers.Adam()
    model.compile(loss=loss, optimizer=adam, metrics=['acc'])
    
    return model

In [15]:
# find outliers using Isolation Forest
def find_outliers_isolation_forest(dataframe):
    isolation_forest = IsolationForest(contamination='auto', behaviour='new')
    isolation_forest.fit(dataframe)
    return isolation_forest.predict(dataframe)

In [16]:
data = pd.read_csv("dataset/yes_date_set.csv")
data = data.drop('date', axis=1)

# find outliers using Isolation Forest
outliers_isolation = find_outliers_isolation_forest(data)
data.insert(0, "outliers_isolation", outliers_isolation, True)

# drop outliers based on Isolation Forest method
data = data.drop(data[data.outliers_isolation == -1.0].index)
data = data.drop('outliers_isolation', axis=1)

X = data.drop(['trip_count', 'Holiday_None'], axis=1)
Y = data['trip_count']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

# normalize input
standard_scaler = StandardScaler()
X_train = standard_scaler.fit_transform(X_train)
X_test = standard_scaler.transform(X_test)

print("training X")
print(X_train)
print("training Y")
print(y_train)
print("test X")
print(X_test)
print("test Y")
print(y_test)


ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [6]:
model = compile_keras_model(X, Y)
model.fit(X_train, y_train, epochs=150, batch_size=32, validation_data=(X_test, y_test), verbose=1)

Train on 3846 samples, validate on 1649 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150


Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150


Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


<tensorflow.python.keras.callbacks.History at 0x2d70d654860>

In [7]:
model = TFModel(len(X.columns), 1, [3, 3])
with tf.Session() as sess:
    sess.run(model.init)
    
    model.train_batch(sess, X, Y)
    predictions = model.predict_batch(sess, X)
    print(predictions)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


ValueError: Cannot feed value of shape (5495,) for Tensor 'Placeholder_1:0', which has shape '(?, 1)'