In [None]:
import tensorflow as tf
import time
import pandas as pd
import numpy as np

### Balance, encode and decode classes

In [None]:
# Balance count fof classes
def balance(dataset, opt='Y'):
    
    groups  = dataset.groupby(opt)
    dataset = groups.apply(lambda g: g.sample(groups.size().min())).reset_index(drop=True)
    
    return dataset    

# Class labels have a white spaces, so let's do an encoder-function ...
def encode(dataset):

    N = dataset.shape[0]

    for i in range(N):

        label = dataset.at[i, 'Y']

        if 7 < label < 53:
            dataset.set_value(i, 'Y', label - 1)
            continue

        if 52 < label < 62:
            dataset.set_value(i, 'Y', label - 3)
            continue

        if 61 < label:
            dataset.set_value(i, 'Y', label - 4)
            continue
            
# ... and decoder-function           
def decode(dataset):    
    
    N = dataset.shape[0]

    for i in range(N):
        
        label = dataset.at[i, 'Y']
        
        if 8 <= label < 52:
            dataset.set_value(i, 'Y', label + 1)
            continue

        if 52 <= label < 59:
            dataset.set_value(i, 'Y', label + 3)
            continue

        if 59 <= label:
            dataset.set_value(i, 'Y', label + 4)  
            continue

### Read dataset from file and process it

In [None]:
data_train = pd.read_csv('.\\dataset_train.csv')

# Apply the encoder-function to labels of train dataset            
encode(data_train)

class_labels = data_train['Y'].drop_duplicates()

print('After map max class label is %d' % class_labels.max())
print('After map min class label is %d' % class_labels.min())
# print('All class labels:\n', class_labels.sort_values())

data_train = data_train.drop(data_train[data_train.Feat1 > 0.1].index)
data_train = data_train.drop(data_train[data_train.Feat2 > 0.1].index)

# Balance dataset
data_train = balance(data_train).reset_index(drop=True)

### Create net configuration for classifying task

In [None]:
def net(size_input, size_output):

    x      = tf.placeholder(tf.float64, (None, size_input), name="x")
    labels = tf.placeholder(tf.float64, (None, size_output))

    sigma, bias, k = 0.1, 0.5, 10

    w1 = tf.Variable(tf.truncated_normal((size_input,      k     ), stddev=sigma, dtype=tf.float64))
    w2 = tf.Variable(tf.truncated_normal((k,               k     ), stddev=sigma, dtype=tf.float64))
    w3 = tf.Variable(tf.truncated_normal((k,          size_output), stddev=sigma, dtype=tf.float64))
    
    b1 = tf.Variable(bias * tf.ones((1,      k     ), dtype=tf.float64))
    b2 = tf.Variable(bias * tf.ones((1,      k     ), dtype=tf.float64))
    b3 = tf.Variable(bias * tf.ones((1, size_output), dtype=tf.float64))

    y1 = tf.tanh   (tf.matmul(x,   w1) + b1)
    y2 = tf.nn.relu(tf.matmul(y1,  w2) + b2)
    y3 =            tf.matmul(y2,  w3) + b3
    
    out = tf.nn.softmax(y3, name="y") 
            
    loss  = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=labels, logits=y3))
        
    reg   = tf.cast(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES), tf.float64)
    loss += 0.1 * tf.reduce_sum(reg)
    
    correct_prediction = tf.equal(tf.argmax(out, axis=1), tf.argmax(labels, axis=1))
    accuracy           = tf.reduce_mean(tf.cast(correct_prediction, tf.float64))
    
    right = tf.argmax(labels, axis=1)
    
    return x, labels, out, loss, accuracy, right

###  Functions to train net

In [None]:
def normalization(datasеt):
        
        rows, cols = datasеt.shape
                
        datasеt   -= np.matlib.repmat(np.mean(datasеt, axis=1).reshape((rows, 1)), 1, cols) 
        datasеt   /= (np.matlib.repmat(np.std(datasеt, axis=1).reshape((rows, 1)), 1, cols) + 1e-12)
        datasеt   += np.random.normal(0, 1e-6, (rows, cols)) 
        
        return datasеt

    
def train_net(dataset, num_class, size_input, size_batch, num_steps, rate_learn, init_rate, file_for_save):
        
    x, labels, out, loss, accuracy, right = net(size_input, num_class)     

    # Gradient optimization
    train_step = tf.train.AdagradOptimizer(rate_learn, initial_accumulator_value=init_rate).minimize(loss)
#     train_step = tf.train.GradientDescentOptimizer(rate_learn).minimize(y)
#     train_step = tf.train.AdadeltaOptimizer(rate_learn, rho=0.9).minimize(y)

    # To save model and count time of training
    saver, start = tf.train.Saver(), time.time()
                  
    def make_batch_labels(size_batch, num_class, class_labels):
                
        labels = np.zeros((size_batch, num_class))
        
        for i, label in enumerate(class_labels):
            labels[i, int(label)] = 1
            
        return labels                     
                   
    # Open sesssion with tensor math to net training    
    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())
        
        res = tf.argmax(out, axis=1)

        for i in range(num_steps):  
            
            data = dataset.sample(n=size_batch).as_matrix(columns=['Feat1', 'Feat2', 'Feat4', 'Y'])
            
            batch_labels = make_batch_labels(size_batch, num_class, data[:,-1])              

#             batch_train = normalization(data[:, 0:3])
            batch_train = data[:, 0:3]
                                                                      
            precision, error, results, rightval, _  = sess.run([accuracy, 
                                                                loss, 
                                                                res, 
                                                                right, 
                                                                train_step], 
                                                                feed_dict={x:      batch_train, 
                                                                           labels: batch_labels})  
            
            print(results)
            print("Accuracy: %.2f %%; loss: %.4f; progress: %.2f %%" % (precision*100, error, 100*i/num_steps))

        finish = time.time()

        # Save model as graph
        save_path = saver.save(sess, file_for_save + ".ckpt", meta_graph_suffix='meta', write_meta_graph=True)
        print("Net model is saved to file: %s" % save_path)

    print("Training is finished by %.2f s" % (finish - start))    

In [None]:
# Run net training
train_net(data_train, num_class=60, 
          size_input=3, size_batch=100, num_steps=10000, 
          rate_learn=0.5, init_rate=0.1, 
          file_for_save=".\\net_1")

### Net processing

In [None]:
def run_net(dataset, size_batch, file_name_net):
    
    rows      = dataset.shape[0]
    num_batch = int(np.ceil(rows/size_batch))
    
    predict = np.zeros(rows)    

    # Recover net model from graph, so create this recovering graрh ...
    graph = tf.Graph()

    # ... and fill graph to recover
    with graph.as_default():

        saver = tf.train.import_meta_graph(file_name_net + ".ckpt.meta")
               
        # Open sesssion to tensor math    
        with tf.Session() as sess:
            
            x = sess.graph.get_tensor_by_name("x:0")
            y = sess.graph.get_tensor_by_name("y:0")
                        
            num_class = int(y.shape[1])
            
            # Convert one hot encoding to integer class label
            out = tf.argmax(y, axis=1)
            
            saver.restore(sess, file_name_net + ".ckpt")
            
            start = time.time()

            for i in range(num_batch):
                
                ind1, ind2 = int(i*size_batch), int((i+1)*size_batch)
                
                features = dataset[ind1:ind2].as_matrix(columns=['Feat1', 'Feat2', 'Feat4'])   
                 
                batch    = normalization(features)
                                                                              
                results = sess.run(out, feed_dict={x: batch}) 
                                
                predict[ind1:ind2] = results

                print("Net progress ... %.1f %%" % (100*i/num_batch))                                      
                
        finish = time.time() 
        print("Net processing is finished in %.2f s" % (finish - start))  

    return predict

In [None]:
# Let's run net to classify
predict = run_net(data_train, size_batch=100, file_name_net='.\\net_1')

In [None]:
# Sort test dataset by Node and save result of classifying to file 

def sort_by(dataset, by):
    
    dataset = dataset.sort_values(by=[by])        
    dataset = dataset.reset_index(drop=True) 
    
    return dataset   

file_name_to_save = 'results.csv'

results = pd.DataFrame({'Node': data_train['Node'], 'Y': predict})
results = sort_by(results, by='Node')

# Decode class labels
decode(results)

print(results)

results.to_csv('.\\' + file_name_to_save, index=False)
