In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import cross_validation
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
import tflearn
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.normalization import local_response_normalization
from tflearn.layers.estimator import regression



hdf5 is not supported on this machine (please install/reinstall h5py for optimal experience)


# run environ

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.4)
sess_config = tf.ConfigProto(gpu_options=gpu_options)

# label

In [3]:
labels = {'0':'file', '1':'network', '2':'service', '3':'database', '4':'communication', '5':'memory', '6':'driver', 
    '7':'system', '8':'application', '9':'io', '10':'others', '11':'security', '12':'disk', '13':'processor'}

fault_label = {'0':'file', '1':'network', '2':'service', '3':'database', '4':'communication', '5':'memory', '6':'driver', 
    '7':'system', '9':'io', '10':'others', '11':'security', '12':'disk', '13':'processor'}

# load data

In [4]:
def one_hot(y):
    y = y.reshape(len(y))
    n_values = np.max(y) + 1
    return np.eye(n_values)[np.array(y, dtype=np.int32)]  # Returns FLOATS


def load_X(X_path):
    X_list = []
    file = open(X_path, 'r')
    # Read dataset from disk, dealing with text files' syntax
    X_signal = [np.array(item, dtype=np.float32) for item in [
               line.strip().split('\t') for line in file]]
    X_list.append(X_signal)
    file.close()
    return np.transpose(np.array(X_list), (1, 2, 0))


# Load "y" (the neural network's training and testing outputs)
def load_y(y_path):
    file = open(y_path, 'r')
    # Read dataset from disk, dealing with text file's syntax
    y_ = np.array([elem for elem in [line.strip().split('\t') for line in file]], 
                  dtype=np.int32)
    file.close()
    # Substract 1 to each output class for friendly 0-based indexing
    return one_hot(y_-1)


dataset_path = "data_msg_type/"
X_path = dataset_path + "semantic_sim.txt"
y_path = dataset_path + "semantic_label_index.txt"

X = load_X(X_path)
y = load_y(y_path)
x = X.reshape(len(X), 10, 14, 1)

# Separate our training data into test and training.
print("Separating data into 80% training set & 20% test set...")
train_x, test_x, train_y, test_y = cross_validation.train_test_split(
    x, y, test_size=0.2, random_state=33)#add random state here...
print("Dataset separated.\n")
print(train_x.shape, train_y.shape, test_y.shape)

Separating data into 80% training set & 20% test set...
Dataset separated.

((80000, 10, 14, 1), (80000, 14), (20000, 14))


# train and test

In [5]:
with tf.Session(config=sess_config) as session:
    tf.global_variables_initializer().run()

    network = input_data(shape=[None, 10, 14, 1])
    network = conv_2d(network, 4, 5, strides=1, activation='tanh')
    network = max_pool_2d(network, 2, strides=1)
    network = local_response_normalization(network)    
    network = conv_2d(network, 6, 5, strides=4, activation='tanh')
    network = max_pool_2d(network, 2, strides=1)
    network = local_response_normalization(network)        
    network = fully_connected(network, 64, activation='tanh')
    network = dropout(network, 0.5)
    network = fully_connected(network, 14, activation='softmax')
    network = regression(network, optimizer='Adagrad',
          loss='categorical_crossentropy', learning_rate=0.01)
    
    # train test
    if not os.path.isdir('checkpoints'):
        os.makedirs('checkpoints')
    if not os.path.isdir('model'):
        os.makedirs('model')
    model = tflearn.DNN(network, checkpoint_path='checkpoints/lenet',
                    max_checkpoints=1, tensorboard_verbose=0)
    model.fit(train_x, train_y, n_epoch=200, validation_set=(test_x, test_y), shuffle=True,
                  show_metric=True, batch_size=128, snapshot_step=200,
                  snapshot_epoch=True, run_id='lenet')
    z = np.argmax(model.predict(test_x), axis=1)
    acc = np.mean(np.argmax(test_y,axis=1) == z)
    model.save('model/model_retrained_by_lenet')
    print("accuracy: ", acc)

Training Step: 124999  | total loss: [1m[32m0.30370[0m[0m | time: 7.657s
| AdaGrad | epoch: 200 | loss: 0.30370 - acc: 0.9086 -- iter: 79872/80000
Training Step: 125000  | total loss: [1m[32m0.28314[0m[0m | time: 8.852s
| AdaGrad | epoch: 200 | loss: 0.28314 - acc: 0.9162 | val_loss: 0.24249 - val_acc: 0.9418 -- iter: 80000/80000
--
('accuracy: ', 0.94179999999999997)
