In [1]:
# univariate multi-step encoder-decoder cnn-lstm
from math import sqrt
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense, Flatten, LSTM, RepeatVector, TimeDistributed, Dropout, Embedding

from keras.layers.convolutional import Conv1D, MaxPooling1D

from keras import backend as K
from keras.regularizers import l1_l2, l1, l2
from keras.callbacks import EarlyStopping

In [0]:
def split_dataset(data,split=0.8):
    Nrow, Ncol = data.shape
    splitRows = math.ceil(Nrow * split)
    
    train,test = data.loc[0:splitRows], data.loc[splitRows+1:Nrow]
    print(train.shape)
    print(test.shape)
    
    return train, test

In [0]:
# evaluate one or more weekly forecasts against expected values
def evaluate_forecasts(actual, predicted):
	scores = list()
	# calculate an RMSE score for each day
	for i in range(actual.shape[1]):
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
	return score, scores

In [0]:
# summarize scores
def summarize_scores(name, score, scores):
	s_scores = ', '.join(['%.1f' % s for s in scores])
	print('%s: [%.3f] %s' % (name, score, s_scores))

In [0]:
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=200):
  # flatten data
  data = np.array(train)
  data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
  X, y = list(), list()
  in_start = 0
  # step over the entire history one time step at a time
  for _ in range(len(data)):
    # define the end of the input sequence
    in_end = in_start + n_input
    out_end = in_end + n_out
    # ensure we have enough data for this instance
    if out_end <= len(data):
      x_input = data[in_start:in_end, -3]
      x_input = x_input.reshape((len(x_input), 1))
      X.append(x_input)
      y.append(data[in_end:out_end, -3])
    # move along one time step
    in_start += 1
  return np.array(X), np.array(y)

In [0]:
def build_model_2(train, n_input):
# prepare data
  train_x, train_y = to_supervised(train, n_input)
  # define parameters
  verbose, epochs, batch_size = 0, 50, 200
  n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
  # reshape output into [samples, timesteps, features]
  train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1)) #Features es 3?

  # define model
  model = Sequential()
  model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu',
                                 kernel_regularizer=l2(0.001), bias_regularizer=l2(0.001),
                                 input_shape=(None, n_timesteps, n_features))))
  model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu',
                                 kernel_regularizer=l2(0.001), bias_regularizer=l2(0.001))))
  model.add(TimeDistributed(Dropout(0.5)))
  model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
  model.add(TimeDistributed(Flatten()))
  model.add(RepeatVector(n_outputs))
  #model.add(CuDNNLSTM(32, kernel_regularizer=l2(0.0001), bias_regularizer=l2(0.0001),
   #                 recurrent_regularizer=l2(0.0001), activity_regularizer=l2(0.0001),
    #               return_sequences = False))
  model.add(LSTM(200, activation='relu', return_sequences=True))
  #model.add(CuDNNLSTM(50, kernel_regularizer=l2(0.001), bias_regularizer=l2(0.001),
  #                    recurrent_regularizer=l2(0.001), activity_regularizer=l2(0.001)))
  #model.add(Dropout(0.5)) #DESCOMENTADO respecto al email
  model.add(Dense(32, activation='relu',kernel_regularizer=l2(0.0001), bias_regularizer=l2(0.0001)))
  model.add(Dropout(0.5))
  model.add(Dense(1, activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer='nadam')#, metrics=['accuracy'])

  # fit network
  model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=1)
  return model

In [0]:

# train the model
def build_model(train, n_input):
  # prepare data
  train_x, train_y = to_supervised(train, n_input)
  #train_x shape(1093, 14, 1) <- inputs
  #train_y shape(1093, 7)
  # define parameters
  verbose, epochs, batch_size = 1, 2, 200
  n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
  # reshape output into [samples, timesteps, features]
  train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
  # define model
  model = Sequential()
  model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
  model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
  model.add(MaxPooling1D(pool_size=2))
  model.add(Flatten())
  model.add(RepeatVector(n_outputs))

  model.add(LSTM(200, activation='relu', return_sequences=True))

  model.add(TimeDistributed(Dense(100, activation='relu')))
  #model.add(TimeDistributed(Dropout(0.2)))
  model.add(TimeDistributed(Dense(1)))
  model.compile(loss='binary_crossentropy', optimizer='nadam')
  # fit network
  model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
  return model


In [0]:
  # make a forecast
def forecast(model, history, n_input):
  # flatten data
  data = np.array(history)
  data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
  # retrieve last observations for input data
  input_x = data[-n_input:, -3]
  # reshape into [1, n_input, 1]
  input_x = input_x.reshape((1, len(input_x), 1))
  # forecast the next week
  yhat = model.predict(input_x, verbose=0)
  # we only want the vector forecast
  yhat = yhat[0]
  print(yhat)
  return yhat

In [0]:
# evaluate a single model
def evaluate_model(train, test, n_input):
	# fit model
	model = build_model(train, n_input)
	# history is a list of weekly data
	history = [x for x in train]
	# walk-forward validation over each week
	predictions = list()
	for i in range(len(test)):
		# predict the week
		yhat_sequence = forecast(model, history, n_input)
		# store the predictions
		predictions.append(yhat_sequence)
		# get real observation and add to history for predicting the next week
		history.append(test[i, :])
	# evaluate predictions days for each week
	predictions = np.array(predictions)
	score, scores = evaluate_forecasts(test[:, :, 0], predictions)
	return score, scores

In [0]:
np.random.seed(42)

df = pd.read_csv('../output/df_general.csv')
print(df.columns)

df = df.drop(columns=['No','Protocol','tcp_flags_fin','tcp_flags_syn'])
df = df.drop(columns=['tcp_flags_push','tcp_flags_cwr'])
df = df.drop(columns=['tcp_flags_ecn','tcp_flags_urg','prebuffering','Time'])
df = df.astype(dtype= {"pkt_length":np.uint16, 'ip_len':np.uint16, 'tcp_window_size_value':np.uint16,
                       'stalling_event':np.uint8, 'measure':np.uint8,
                       'capture':np.uint8})

In [0]:
df.shape #1-232183 0-75 test
         #232184-739039 train
df.values

In [0]:
# load the new file
#dataset = read_csv('household_power_consumption_days.csv', header=0, infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
train, test = split_dataset(df.values)
# evaluate model and get scores
n_input = 200
#score, scores = evaluate_model(train, test, n_input)
# summarize scores
#summarize_scores('lstm', score, scores)

In [0]:
print(train)

In [0]:
jeje = np.array(train)
jeje.shape #232182, 1, 6 #6 features, 1 ventana, 232182 samples

In [0]:
train_x, train_y = to_supervised(train, n_input=200)

In [0]:
train_x, train_y = to_supervised(train, 200, n_out=100)

In [0]:
print(train_x.shape) #200 de input, 100 output
print(train_y.shape)

print(type(train_x))
print(type(train_y))
history=[x for x in train]
print(type(history))
data= np.array(history)
data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
	# retrieve last observations for input data
input_x = data[-100000:, -3]
print(input_x)
#print(data.shape)
#print(sum(data==0)
input_x = input_x.reshape((1, len(input_x), 1))
print(input_x)

In [0]:
verbose, epochs, batch_size = 0, 50, 100
n_timesteps, n_features, n_outputs = train_x.shape[1], train_x.shape[2], train_y.shape[1]
# reshape output into [samples, timesteps, features]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1)) #Features es 3?

# define model
model = Sequential()
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu',
                               input_shape=(n_timesteps, n_features))))
model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu',
                               kernel_regularizer=l2(0.001), bias_regularizer=l2(0.001))))
#model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
model.add(TimeDistributed(Flatten()))
model.add(RepeatVector(n_outputs))
model.add(CuDNNLSTM(32, kernel_regularizer=l2(0.0001), bias_regularizer=l2(0.0001),
                  recurrent_regularizer=l2(0.0001), activity_regularizer=l2(0.0001),
                 return_sequences = True))
#model.add(CuDNNLSTM(50, kernel_regularizer=l2(0.001), bias_regularizer=l2(0.001),
#                    recurrent_regularizer=l2(0.001), activity_regularizer=l2(0.001)))
#model.add(Dropout(0.5)) #DESCOMENTADO respecto al email
model.add(Dense(32, activation='relu',kernel_regularizer=l2(0.0001), bias_regularizer=l2(0.0001)))
#model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['accuracy'])

# fit network
model.fit(train_x, train_y, epochs=20, batch_size=100, verbose=1)

In [0]:
build_model(train, n_input)

W0831 16:00:54.685841 140382719793024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:66: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0831 16:00:54.725475 140382719793024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:541: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0831 16:00:54.732731 140382719793024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4432: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0831 16:00:54.804379 140382719793024 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4267: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0831 16:00:55.177259 140382719793024 deprecation_wrapp

Epoch 1/2
 49000/231783 [=====>........................] - ETA: 29:06 - loss: 0.7164