# CNN Model Test

### Imports

In [None]:
from skimage import io
from skimage.color import rgb2gray
from skimage.viewer import ImageViewer
import matplotlib.pyplot as plt
from scipy import misc
import cv2
import os
import numpy as np

### Load data

In [None]:
print(os.getcwd())

train_img_dir = os.getcwd() + '/Capstone data/train/'
test_img_dir = os.getcwd() + '/Capstone data/test/'

#spectrogram of current 
train_img_names_c = [x for x in os.listdir(train_img_dir) if x.endswith('_c.png')]

#spectrogram of voltage
train_img_names_v = [x for x in os.listdir(train_img_dir) if x.endswith('_v.png')]

#spectrogram of current 
test_img_names_c = [x for x in os.listdir(test_img_dir) if x.endswith('_c.png')]

#spectrogram of voltage
test_img_names_v = [x for x in os.listdir(test_img_dir) if x.endswith('_v.png')]


print(len(train_img_names_c))
print(len(train_img_names_v))

print(len(test_img_names_c))
print(len(test_img_names_v))

In [None]:
print(len(train_img_names_c))

all_train_images = np.ones((len(train_img_names_c), 128, 118)) #if as_grey=True
all_test_images = np.ones((len(test_img_names_c), 128, 118)) #if as_grey=True


i = 0
for im in train_img_names_c:

    # Load an color image in grayscale
    my_image = cv2.imread(train_img_dir + '/' + im,0)


    all_train_images[i] = my_image
    i = i + 1

    
i = 0
for im in test_img_names_c:
    #print(im)
    my_image = cv2.imread(test_img_dir + '/' + im,0)


    all_test_images[i] = my_image
    i = i + 1    


print("all_train_images.shape {0}".format(all_train_images.shape))
print("all_test_images.shape {0}".format(all_test_images.shape))

In [None]:
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:

print("\n**************************")
n_samples = len(all_train_images)

print("all_train_images.shape {0}".format(all_train_images.shape))
data_train = all_train_images.reshape((n_samples, 15104)) #988, 15104, 4

print("data_train shape {0}".format(data_train.shape))
print(data_train[:10])

print("**************************\n")


print("\n**************************")
n_samples = len(all_test_images)

print("all_test_images.shape {0}".format(all_test_images.shape))
data_test = all_test_images.reshape((n_samples, 15104)) #988, 15104, 4

print("data_test shape {0}".format(data_test.shape))
print(data_test[:10])

print("**************************\n")

In [None]:
from numpy import genfromtxt


train_labels_dir = os.getcwd() + '/'

train_labels = genfromtxt(train_labels_dir + 'train_labels.csv', delimiter=',', skip_header=1)

print(len(train_labels))
print(train_labels.shape)

### Split the data and prepare the text files

In [None]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(data_train, train_labels, test_size = 0.3, random_state = 54)

print('train_labels shape is :', train_labels.shape, '\n')
print('x_train shape is: ', x_train.shape, '\n',
      'y_train shape is: ', y_train.shape, '\n')

print('x_test shape is: ', x_test.shape, '\n', 
      'y_test shape is: ', y_test.shape)

In [None]:
# Save the data files into a format compatible with CNTK text reader
def savetxt(filename, data, hasLabels=True, labels=0):
    dir = os.path.dirname(filename)

    if not os.path.exists(dir):
        os.makedirs(dir)
    
    print("Saving", filename )
    with open(filename, 'w') as f:
        print("opened....")
        labels_ohe = list(map(' '.join, np.eye(11, dtype=np.uint).astype(str))) #for one hot encoding
        index = 0
        for row in data:            
            row_str = row.astype(str)
            if hasLabels:                               
                label_str = labels_ohe[int(labels[index])]               
            
            feature_str = ' '.join(row_str)
            
            if hasLabels:
                f.write('|labels {} |features {}\n'.format(label_str, feature_str))
            else:
                f.write('|features {}\n'.format(feature_str))
            
            index = index + 1

In [None]:
train_labels_GT = y_train[:,1] #Get Ground truth
test_labels_GT = y_test[:,1]

print ('Writing train text file...')

data_dir = os.path.join(os.getcwd(), "data/Out")


savetxt(os.path.join(data_dir, "train.txt"), x_train, True, train_labels_GT)
savetxt(os.path.join(data_dir, "test.txt"), x_test, True, test_labels_GT)


print("Done")

### Prepare de CNN Model

In [None]:
from __future__ import print_function # Use a function definition from future version (say 3.x from 2.7 interpreter)
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import os
import sys
import time

import cntk as C

%matplotlib inline



# Ensure we always get the same amount of randomness
np.random.seed(0)
C.cntk_py.set_fixed_random_seed(1)
C.cntk_py.force_deterministic_algorithms()

# Define the data dimensions
input_dim_model = (1, 128, 118)    # images are 28 x 28 with 1 channel of color (gray)
input_dim = 128*118                # used by readers to treat input data as a vector
num_output_classes = 11


x = C.input_variable(input_dim_model)
y = C.input_variable(num_output_classes)

In [None]:
# function to build model

def create_model(features):
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
            h = features
            h = C.layers.Convolution2D(filter_shape=(5,5), 
                                       num_filters=8, 
                                       strides=(2,2), 
                                       pad=True, name='first_conv')(h)
            h = C.layers.Convolution2D(filter_shape=(5,5), 
                                       num_filters=16, 
                                       strides=(2,2), 
                                       pad=True, name='second_conv')(h)
            r = C.layers.Dense(num_output_classes, activation= C.relu, name='classify')(h)
            return r
        
# Create the model
z = create_model(x)

# Print the output shapes / parameters of different components
print("Output Shape of the first convolution layer:", z.first_conv.shape)
print("Bias value of the last dense layer:", z.classify.b.value)

In [None]:
# Number of parameters in the network
C.logging.log_number_of_parameters(z)

In [None]:
def create_criterion_function(model, labels):
    loss = C.cross_entropy_with_softmax(model, labels)
    errs = C.classification_error(model, labels)
    return loss, errs # (model, labels) -> (loss, error metric)

# Define a utility function to compute the moving average sum.
# A more efficient implementation is possible with np.cumsum() function
def moving_average(a, w=5):
    if len(a) < w:
        return a[:]    # Need to send a copy of the array
    return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]


# Defines a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
    training_loss = "NA"
    eval_error = "NA"

    if mb%frequency == 0:
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if verbose: 
            print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%".format(mb, training_loss, eval_error*100))
        
    return mb, training_loss, eval_error

### Train_test function

In [None]:
def train_test(train_reader, test_reader, model_func, num_sweeps_to_train_with=10):
    
    # Instantiate the model function; x is the input (feature) variable 
    # We will scale the input image pixels within 0-1 range by dividing all input value by 255.
    model = model_func(x/255)
    
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function(model, y)
    
    # Instantiate the trainer object to drive the model training
    learning_rate = 0.2
    lr_schedule = C.learning_rate_schedule(learning_rate, C.UnitType.minibatch)
    learner = C.sgd(z.parameters, lr_schedule)
    trainer = C.Trainer(z, (loss, label_error), [learner])
    
    # Initialize the parameters for the trainer
    minibatch_size = 1
    num_samples_per_sweep = 691
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    
    # Map the data streams to the input and labels.
    input_map={
        y  : train_reader.streams.labels,
        x  : train_reader.streams.features
    } 
    
    # Uncomment below for more detailed logging
    training_progress_output_freq = 10
     
    # Start a timer
    start = time.time()

    for i in range(0, int(num_minibatches_to_train)):
        # Read a mini batch from the training data file
        data=train_reader.next_minibatch(minibatch_size, input_map=input_map) 
        trainer.train_minibatch(data)
        print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
     
    # Print training time
    print("Training took {:.1f} sec".format(time.time() - start))
    
    # Test the model
    test_input_map = {
        y  : test_reader.streams.labels,
        x  : test_reader.streams.features
    }

    # Test data for trained model
    test_minibatch_size = 1
    num_samples = 297
    num_minibatches_to_test = num_samples // test_minibatch_size

    test_result = 0.0   

    for i in range(num_minibatches_to_test):
    
        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions 
        # with one pixel per dimension that we will encode / decode with the 
        # trained model.
        data = test_reader.next_minibatch(test_minibatch_size, input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        test_result = test_result + eval_error

    # Average of evaluation errors of all test minibatches
    print("Average test error: {0:.2f}%".format(test_result*100 / num_minibatches_to_test))

In [None]:

def do_train_test():
    global z
    z = create_model(x)
    reader_train = create_reader(train_file, True, input_dim, num_output_classes)
    reader_test = create_reader(test_file, False, input_dim, num_output_classes)
    train_test(reader_train, reader_test, z)
    
do_train_test()


print("Bias value of the last dense layer:", z.classify.b.value)

### Prepare for model evaluation

In [None]:
out = C.softmax(z)

### Modify the save_text function to avoid malformed input file error

In [None]:
# Save the data files into a format compatible with CNTK text reader
def savetxt(filename, data, hasLabels=True, labels=0):
    dir = os.path.dirname(filename)

    if not os.path.exists(dir):
        os.makedirs(dir)
    
    print("Saving", filename )
    with open(filename, 'w') as f:
        print("opened....")
        labels_ohe = list(map(' '.join, np.eye(11, dtype=np.uint).astype(str))) #for one hot encoding
        index = 0
        for row in data:            
            row_str = row.astype(str)
            if hasLabels:                               
                label_str = labels_ohe[int(labels[index])]               
            
            feature_str = ' '.join(row_str)
            
            if hasLabels:
                f.write('|labels {} |features {}\n'.format(label_str, feature_str))
            else:
                f.write('|labels {} |features {}\n'.format(labels_ohe[1], feature_str))

            
            index = index + 1

### Create the model evaluation file

In [None]:
print ('Writing train text file...')

data_dir = os.path.join(os.getcwd(), "data/Out")


savetxt(os.path.join(data_dir, "test_eval.txt"), data_test, False)


print("Done")

In [None]:
# Read the data for evaluation
eval_file = 'C:/Users/bauer/OneDrive para la Empresa/Microsoft Capstone IA/data/Out/test_eval.txt'
reader_eval=create_reader(eval_file, False, input_dim, num_output_classes)

eval_minibatch_size = 25
eval_input_map = {x: reader_eval.streams.features, y:reader_eval.streams.labels} 

data = reader_eval.next_minibatch(eval_minibatch_size, input_map=eval_input_map)

img_label = data[y].asarray()
img_data = data[x].asarray()

### Make Predictions

In [None]:
# reshape img_data to: M x 1 x 128 x 118 to be compatible with model
img_data = np.reshape(img_data, (eval_minibatch_size, 1, 128, 118))

predicted_label_prob = [out.eval(img_data[i]) for i in range(len(img_data))]


# Find the index with the maximum value for both predicted as well as the ground truth
pred = [np.argmax(predicted_label_prob[i]) for i in range(len(predicted_label_prob))]
gtlabel = [np.argmax(img_label[i]) for i in range(len(img_label))]


print("Label    :", gtlabel[:25])
print("Predicted:", pred)

# Optionally modify model

In [None]:
'''from cntk.layers.layers import AveragePooling

def create_model(features):
    with C.layers.default_options(init = C.glorot_uniform(), activation = C.relu):
            h = features
            
            h = C.layers.Convolution2D(filter_shape=(5,5), 
                                       num_filters=8, 
                                       strides=(1,1), 
                                       pad=True, name='first_conv')(h)
            
            p =  AveragePooling((3,3), strides=1)
            
            ph = p(h)
            
            h = C.layers.Convolution2D(filter_shape=(5,5), 
                                       num_filters=16, 
                                       strides=(1,1), 
                                       pad=True, name='second_conv')(h)
            
            p =  AveragePooling((3,3), strides=1)
            
            ph = p(h)
            
            r = C.layers.Dense(num_output_classes, activation = None, name='classify')(h)
            return r'''

### Optionally train the model on the whole data

In [None]:
'''# Save the data files into a format compatible with CNTK text reader
def savetxt(filename, data, hasLabels=True, labels=0):
    dir = os.path.dirname(filename)

    if not os.path.exists(dir):
        os.makedirs(dir)
    
    print("Saving", filename )
    with open(filename, 'w') as f:
        print("opened....")
        labels_ohe = list(map(' '.join, np.eye(11, dtype=np.uint).astype(str))) #for one hot encoding
        index = 0
        for row in data:            
            row_str = row.astype(str)
            if hasLabels:                               
                label_str = labels_ohe[int(labels[index])]               
            
            feature_str = ' '.join(row_str)
            
            if hasLabels:
                f.write('|labels {} |features {}\n'.format(label_str, feature_str))
            else:
                f.write('|labels {} |features {}\n'.format(labels_ohe[1], feature_str))

            
            index = index + 1



train_labels_GT = train_labels[:,1] #Get Ground truth


print ('Writing train text file...')

data_dir = os.path.join(os.getcwd(), "data/Out")


savetxt(os.path.join(data_dir, "train.txt"), data_train, True, train_labels_GT)
savetxt(os.path.join(data_dir, "test.txt"), data_test, False)


print("Done")

do_train_test()'''

### Optionally make predictions again from the new model

In [None]:
'''# reshape img_data to: M x 1 x 128 x 118 to be compatible with model
img_data = np.reshape(img_data, (eval_minibatch_size, 1, 128, 118))

predicted_label_prob = [out.eval(img_data[i]) for i in range(len(img_data))]


# Find the index with the maximum value for both predicted as well as the ground truth
pred = [np.argmax(predicted_label_prob[i]) for i in range(len(predicted_label_prob))]
gtlabel = [np.argmax(img_label[i]) for i in range(len(img_label))]


print("Label    :", gtlabel[:25])
print("Predicted:", pred)'''

### Save predictions

In [None]:
import pandas as pd
id_labels = pd.read_csv('test_labels.csv')
predictions = pd.DataFrame(data = pred, columns = ['appliance'])
predictions['id'] = id_labels['names']
predictions = predictions[['id', 'appliance']]


#SAVE CSV
predictions.to_csv('Caps_Preds.csv', index = None)