In [1]:
import cntk as C
import cntk.tests.test_utils
from cntk.device import try_set_default_device, gpu
try_set_default_device(gpu(0))
cntk.tests.test_utils.set_device_from_pytest_env() # (only needed for our build system)
C.cntk_py.set_fixed_random_seed(1) # fix the random seed so that LR examples are repeatable

In [2]:
import numpy as np
import sys
import os
import math
import cntk
global f 
global index_model
index_model = 1

f = open("log2.txt", 'a')

# Read a CTF formatted text (as mentioned above) using the CTF deserializer from a file
def create_reader(path, is_training, input_dim, num_label_classes):   
    labelStream = cntk.io.StreamDef(field='label', shape=num_label_classes, is_sparse=False)
    featureStream = cntk.io.StreamDef(field='features', shape=input_dim, is_sparse=False)
    
    deserailizer = cntk.io.CTFDeserializer(path, cntk.io.StreamDefs(labels = labelStream, features = featureStream))
            
    return cntk.io.MinibatchSource(deserailizer,
       randomize = is_training, max_sweeps = cntk.io.INFINITELY_REPEAT if is_training else 1)

# Ensure the training and test data is generated and available for this tutorial.
# We search in two locations in the toolkit for the cached MNIST data set.
data_found = False

for data_dir in ["../training_data/input/k-fold/1"]:
    train_file = os.path.join(data_dir, "train.txt")
    test_file = os.path.join(data_dir, "test.txt")
    if os.path.isfile(train_file) and os.path.isfile(test_file):
        data_found = True
        break
        
if not data_found:
    raise ValueError("Your data files are not available. Please check it out if you put them in the same fol")
    
print("Data directory is {0}".format(data_dir))
print("Train-data path is " + train_file)
print("Test-data path is " + test_file)

Data directory is ../training_data/input/k-fold/1
Train-data path is ../training_data/input/k-fold/1/train.txt
Test-data path is ../training_data/input/k-fold/1/test.txt


In [3]:
def create_criterion_function(model, labels):
    loss = cntk.cross_entropy_with_softmax(model, labels)
    errs = cntk.classification_error(model, labels)
    return loss, errs # (model, labels) -> (loss, error metric)

In [4]:
# Define a utility function to compute the moving average sum.
# A more efficient implementation is possible with np.cumsum() function
def moving_average(a, w=5):
    if len(a) < w:
        return a[:]    # Need to send a copy of the array
    return [val if idx < w else sum(a[(idx-w):idx])/w for idx, val in enumerate(a)]


# Defines a utility that prints the training progress
def print_training_progress(trainer, mb, frequency, verbose=1):
    training_loss = "NA"
    eval_error = "NA"

    if mb%frequency == 0:
        training_loss = trainer.previous_minibatch_loss_average
        eval_error = trainer.previous_minibatch_evaluation_average
        if verbose: 
            print ("\t\tMinibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}%".format(mb, training_loss, eval_error*100))
        
    return mb, training_loss, eval_error

In [5]:
def set_adaptive_learner(model, base_lr_per_sample, minibatch_size, num_samples_per_sweep):
        lr_schedule = get_adaptive_learning_schedule(base_lr_per_sample, minibatch_size, num_samples_per_sweep)
        #momentum_time_constant = -minibatch_size / np.log(0.98)
        #momentum_time_constant = [3000]
        l2_reg_weight = 0.0001  # 0.0001
        #mm_schedule = cntk.momentum_as_time_constant_schedule(momentum_time_constant)
        mm_schedule = cntk.momentum_schedule(0.90)

        learner = cntk.momentum_sgd(model.parameters, lr_schedule, mm_schedule,
                               l2_regularization_weight=l2_reg_weight, unit_gain=True)

        return learner
    
def get_adaptive_learning_schedule(base_lr_per_sample, minibatch_size, num_samples_per_sweep):
        return cntk.learners.learning_parameter_schedule(base_lr_per_sample, minibatch_size, num_samples_per_sweep)

In [6]:
import time

def train_test(train_reader, test_reader, model_func ,index_model, file_writer, num_sweeps_to_train_with=10):
    
    # Instantiate the model function; x is the input (feature) variable 
    # We will scale the input image pixels within 0-1 range by dividing all input value by 255.
    model = model_func(input/255)
    
    # Instantiate the loss and error function
    loss, label_error = create_criterion_function(model, label)
    
    minibatch_size = 64
    num_samples_per_sweep = len( open(train_file,'r').readlines() )
    num_minibatches_to_train = (num_samples_per_sweep * num_sweeps_to_train_with) / minibatch_size
    
    # Instantiate the trainer object to drive the model training
    k = int(num_samples_per_sweep / 64.0)
    learning_rate = ([0.2]*(k*2))+([0.1]*(k*2))+([0.05]*(k*2))+([0.025]*(k*2))+([0.175]*(k*2))
    
    lr_schedule = cntk.learning_rate_schedule(learning_rate, cntk.UnitType.minibatch)
    learner =  set_adaptive_learner(z, learning_rate, minibatch_size, num_samples_per_sweep)
#     learner = cntk.sgd(z.parameters, lr_schedule)
    trainer = cntk.Trainer(z, (loss, label_error), [learner])
    
    # Initialize the parameters for the trainer
    
    
    # Map the data streams to the input and labels.
    input_map={
        label  : train_reader.streams.labels,
        input  : train_reader.streams.features
    } 
    
    # Uncomment below for more detailed logging
    training_progress_output_freq = 500
     
    # Start a timer
    start = time.time()

    for i in range(0, int(num_minibatches_to_train)):
        # Read a mini batch from the training data file
        data=train_reader.next_minibatch(minibatch_size, input_map=input_map) 
        trainer.train_minibatch(data)
        print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
     
    # Print training time
    string_training_time = "\t\tTraining took {:.1f} sec".format(time.time() - start)
    string_append_and_print(string_training_time, file_writer)
    
    # Test the model
    test_input_map = {
        label  : test_reader.streams.labels,
        input  : test_reader.streams.features
    }

    # Test data for trained model
    test_minibatch_size = 64
    num_samples = len( open(test_file,'r').readlines() )
    num_minibatches_to_test = num_samples // test_minibatch_size
    
    test_result = 0.0   

    for i in range(num_minibatches_to_test):
    
        # We are loading test data in batches specified by test_minibatch_size
        # Each data point in the minibatch is a MNIST digit image of 784 dimensions 
        # with one pixel per dimension that we will encode / decode with the 
        # trained model.
        data = test_reader.next_minibatch(test_minibatch_size, input_map=test_input_map)
        eval_error = trainer.test_minibatch(data)
        test_result = test_result + eval_error
    
    # Average of evaluation errors of all test minibatches
    string_average_test_error = "\t\tAverage test error: {0:.2f}%".format(test_result*100 / num_minibatches_to_test)
    string_append_and_print(string_average_test_error, file_writer)
    
    #save model whem error late less number target
    path_model = "../testing_data/model/model{0:.2f}.model".format(test_result*100 / num_minibatches_to_test)
#     if((test_result*100 / num_minibatches_to_test) < 9) :
    string_saved_model = "\t\tmodel saved from data set {0:.2f}".format(test_result*100 / num_minibatches_to_test)
    string_append_and_print(string_saved_model, file_writer)

    z.save(path_model)
    return (test_result*100 / num_minibatches_to_test)

In [7]:
def do_train_test(train_file,test_file,input_dim, index_model, file_writer ,num_output_classes, filter, stride, filters):
    global z
    z = create_model(input,num_output_classes, filter, stride, filters)
    reader_train = create_reader(train_file, True, input_dim, num_output_classes)
    reader_test = create_reader(test_file, False, input_dim, num_output_classes)
    return train_test(reader_train, reader_test, z, index_model, file_writer)

In [8]:
def create_model(input, out_dims, filter, stride, filters):
    with C.layers.default_options(init=C.glorot_uniform(), activation=C.relu):
        h = C.layers.Convolution2D(filter_shape=(filter,filter), 
                                       num_filters=filters[0], 
                                       strides=(stride,stride), 
                                       pad=True)(input)
        h = C.layers.Convolution2D(filter_shape=(filter,filter), 
                                       num_filters=filters[1], 
                                       strides=(stride,stride), 
                                       pad=True)(h)
        h = C.layers.Convolution2D(filter_shape=(filter,filter), 
                                       num_filters=filters[2], 
                                       strides=(stride,stride), 
                                       pad=True)(h)
        h = C.layers.Dense(9,activation=None)(h)
#         h = C.layers.Dense(6,activation=None)(h)
        r = C.layers.Dense(out_dims, activation=None)(h)
        
    return r

In [9]:
input_dim_model = ( 3 , 31, 31)
input_dim = 3 * 31 * 31
num_output_classes = 3
input = cntk.input_variable(input_dim_model)  # สังเกตว่าเราใช้ input_dim_model เป็นพารามิเตอร์แทนการใช้ input_dim
label = cntk.input_variable(num_output_classes)

In [10]:
def string_append_and_print(line, file_writer) :
    file_writer.write(line + "\n")
    print(line)

In [11]:
def process1(filter, stride, filters, path_log):
    error_rates = []
    sum_error_rate = 0
    file_writer = open(path_log, 'a')
    
    string_filter_details = "filter => {:d}, stride => {:d}, dept => {:d}, {:d} {:d} {:d}".format(filter, stride, len(filters), filters[0], filters[1], filters[2])
    string_append_and_print(string_filter_details, f)

    file_writer.close()
    file_writer = open(path_log, 'a')
    
    #k-flod validation = 10
    for i in range(1,11):
        #address file train and test
        data_dir = '../training_data/input/k-fold/' + str(i)
        string_index_data_set = '\tData Set : ' + str(i)

        string_append_and_print(string_index_data_set, file_writer)
        
        #read file train and test
        train_file = os.path.join(data_dir, "train.txt")
        test_file = os.path.join(data_dir, "test.txt")
        
        #start train and test
        error_rate = do_train_test(train_file, test_file, input_dim, i, file_writer, num_output_classes, filter, stride, filters)
        sum_error_rate += error_rate
        
        #append error rate to list for calculate average error late
        error_rates.append(error_rate)

        if index_model > 1 :
            break

    string_file_end_line = "\tAverage model error: {0:.2f}%\n\tmin model error: {0:.2f}%\n\t>>>>>>>>>>>>>>>>>>\n>>>>>>>>>>>>>>>>>>".format(sum_error_rate/10,min(error_rates))     
    string_append_and_print(string_file_end_line, file_writer)
    
    file_writer.close()

In [12]:
path_log = "log.txt"
file = open(path_log,'w')
process1(5, 2, [128,512,1024], path_log)

filter => 5, stride => 2, dept => 3, 128 512 1024
	Data Set : 1
		Minibatch: 0, Loss: 1.0982, Error: 51.56%


SystemError: <built-in function MinibatchSource_get_next_minibatch> returned a result with an error set

In [None]:
#load model
# from cntk.ops.functions import load_model
# current_model = load_model("output/model/model4.model")

In [None]:
#transform testing data for prediction

# def number_of_line(path):
#     file = open(path, 'r')
#     f = []
#     while True :
#         line = file.readline();
#         if not line :
#             break
#         f.append(line)
#     return len(f)

# def prediction_file(path_directory):
#     arr = []
#     for kk in range(1,11) :
#         if not kk == 3:
#             path_file = "traning_data_features_t" + str(kk)+ ".txt"
#             size_file = number_of_line(path_directory + path_file)

#             testing_file = os.path.join(path_directory, path_file)
#             reader_test = create_reader(testing_file, False, input_dim, num_output_classes)
#             test_input_map = {
#                     input  : reader_test.streams.features,
#                     label : reader_test.streams.labels
#                 }
#             data = reader_test.next_minibatch(size_file, input_map=test_input_map)
#             data_asarray =  data[input].asarray()

          
#             for i in range(0, size_file):
#                 patch = np.reshape(data_asarray[i], (3,31,31))
#                 print(current_model.eval(patch)[0])
#                 arr.append(current_model.eval(patch)[0])
#     print(len(arr))
#     return arr

# arr = prediction_file("output_label/t/")

In [None]:
# #prediction and write answer in file 

# predict_labels = []

# #prediction method => index : position label max
# def predict_label(index) :
#     if index == 0 :
#         return "1 0 0"
#     elif index == 1 :
#         return "0 1 0"
#     else :
#         return "0 0 1"

# path_file_prediction = "output_label/prediction_label.txt"
# file = open(path_file_prediction, 'w')
# file = open(path_file_prediction, 'a')

# for current_arr in arr :
#     file.write(predict_label(np.argmax(current_arr))+"\n")
#     predict_labels.append(predict_label(np.argmax(current_arr)))
# file.close()



In [None]:
# #test set data
# labels = []
# def get_label_from_testing_data(path):    
#     file_test = open(path)
#     while True :
#         line = file_test.readline()
#         if not line :
#             break
#         lines = line.split(" ")
#         current_label = lines[1] + " " + lines[2] + " " + lines[3]
#         labels.append(current_label)

# get_label_from_testing_data("output_label/t/traning_data_features_t1.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t2.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t4.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t5.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t6.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t7.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t8.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t9.txt")
# get_label_from_testing_data("output_label/t/traning_data_features_t10.txt")


In [None]:
# print(len(labels))

In [None]:
# size_labels = len(labels)
# t_bounary, t_inner, t_outer = 0, 0, 0
# n_bounary_inner, n_bounary_outer = 0, 0
# n_inner_bounary, n_inner_outer = 0, 0
# n_outer_bounary, n_outer_inner = 0, 0,
# for i in range(0, size_labels) :
#     if labels[i] == "0 0 1":
#         if labels[i] == predict_labels[i]:
#             t_bounary += 1 
#         elif predict_labels[i] == "0 1 0" :
#             n_bounary_inner += 1
#         elif predict_labels[i] == "1 0 0" :
#             n_bounary_outer += 1
#     elif labels[i] == "0 1 0":
#         if labels[i] == predict_labels[i]:
#             t_inner += 1 
#         elif predict_labels[i] == "0 0 1" :
#             n_inner_bounary += 1
#         elif predict_labels[i] == "1 0 0" :
#             n_inner_outer += 1
#     elif labels[i] == "1 0 0":
#         if labels[i] == predict_labels[i]:
#             t_outer += 1 
#         elif predict_labels[i] == "0 0 1" :
#             n_outer_bounary += 1
#         elif predict_labels[i] == "0 1 0" :
#             n_outer_inner += 1
# print(str(t_bounary) + " " + str(n_bounary_inner) + " " + str(n_bounary_outer))
# print(str(n_inner_bounary) + " " + str(t_inner) + " " + str(n_inner_outer))
# print(str(n_outer_bounary) + " " + str( n_outer_inner) + " " + str(n_bounary_outer))
    