**Habilitando a GPU**

In [1]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
print('HABILITANDO GPU - DONE')

Found GPU at: /device:GPU:0
HABILITANDO GPU - DONE


**Habilita a execução da GPU**

In [2]:
!nvidia-smi

Fri Oct  2 01:04:04 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.23.05    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   41C    P0    23W /  75W |    209MiB /  7611MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

**Habilitando o Google Drive**

In [3]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

print('DONE')

Mounted at /content/drive
DONE


**Realizando os IMPORTS**

In [4]:
import csv
import os
import time
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_svmlight_file
from sklearn.datasets import dump_svmlight_file

print('DONE')

DONE


**Metodos**

In [5]:
def get_time():
    return time.time()


def get_time_diff(start_time):
    end_time = time.time()
    return round_float(end_time - start_time)


def round_float(value):
    return float("{:.3f}".format(value))


def get_num_rows(file):
    return sum(1 for line in open(file))


def generate_hash():
    return f"{get_time():.0f}"


def load_svm_file(input_file):
    x, y = load_svmlight_file(input_file)
    return [x, y]


def dump_svm_file(x, y, output_file):
    dump_svmlight_file(x, y, output_file)


def split_data(input_file):
    x, y = load_svm_file(input_file)
    x_train, x_test_50, y_train, y_test_50 = train_test_split(x, y, test_size=0.5, random_state=5)
    x_test, x_validation, y_test, y_validation = train_test_split(x_test_50, y_test_50, test_size=0.3, random_state=5)
    x_train = x_train.toarray()
    x_validation = x_validation.toarray()
    x_test = x_test.toarray()
    return [x_train, x_validation, x_test, y_train, y_validation, y_test]


def save_splited_data(x, y, output_file):
    dump_svm_file(x, y, output_file)


def get_tabulation(file, header, delimiter=','):
    tabulation_file = open(file, mode='w')
    tabulation_writer = csv.writer(tabulation_file, delimiter=delimiter, quotechar='"', quoting=csv.QUOTE_MINIMAL)
    tabulation_writer.writerow(header)
    return [tabulation_writer, tabulation_file]


def save_tabulation_row(tabulation_writer, row):
    tabulation_writer.writerow(row)


def get_output_table(configs, experiment_hash):
    header = ['Classifier', 'F1Score', 'Accuracy', 'Precision', 'Recall', 'Execution Time (s)']
    file_name = configs["result_classifiers"].replace("{timestamp}", experiment_hash)
    tabulation_writer, tabulation_file = get_tabulation(file_name, header)
    result = {
        'tabulation_writer': tabulation_writer,
        'tabulation_file': tabulation_file,
    }
    return result


def save_tabulation_conf_mat(path, classifier, result_conf_mat, folder):
    mk_folder = path + "/" + folder
    if not os.path.exists(mk_folder):
        os.mkdir(mk_folder)
    tabulation_csv_file = open(
        mk_folder + "/" + str(classifier) + '.csv', mode='w')
    tabulation_writer = csv.writer(
        tabulation_csv_file, delimiter=';', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    tabulation_writer.writerows(result_conf_mat)


def save_split_results(x_train, x_validation, x_test, y_train, y_validation, y_test):
    header = ['Type', 'Length']
    tabulation_writer, tabulation_file = get_tabulation(FILE_RESULT_SPLIT, header)
    print(f'Saving train file on {FILE_SVMLIGHT_TRAIN_OUTPUT}.')
    save_splited_data(x_train, y_train, FILE_SVMLIGHT_TRAIN_OUTPUT)
    save_tabulation_row(tabulation_writer, ['Train', str(len(x_train))])
    print(f'Saving validation file on {FILE_SVMLIGHT_VALIDATION_OUTPUT}.')
    save_splited_data(x_validation, y_validation, FILE_SVMLIGHT_VALIDATION_OUTPUT)
    save_tabulation_row(tabulation_writer, ['Validation', len(x_validation)])
    print(f'Saving train file on {FILE_SVMLIGHT_TEST_OUTPUT}.')
    save_splited_data(x_test, y_test, FILE_SVMLIGHT_TEST_OUTPUT)
    save_tabulation_row(tabulation_writer, ['Test', len(x_test)])
    tabulation_file.close()

print('DONE')

DONE


**Iniciando as variaveis**

In [6]:
FILE_PATH_DRIVE = '/content/drive/My Drive/Colab Notebooks/'

FILE_SVMLIGHT_INPUT = FILE_PATH_DRIVE + 'data/credit.svmlight'

FILE_SVMLIGHT_TRAIN_OUTPUT = FILE_PATH_DRIVE + 'data/train/credit.svmlight'
FILE_SVMLIGHT_VALIDATION_OUTPUT = FILE_PATH_DRIVE + 'data/validation/credit.svmlight'
FILE_SVMLIGHT_TEST_OUTPUT = FILE_PATH_DRIVE + 'data/test/credit.svmlight'

FILE_RESULT_SPLIT = FILE_PATH_DRIVE + 'result/split/tabulation.csv'

start_time = 0

print('DONE')

DONE


**SPLIT**

In [8]:
start_time = get_time()
x_train, x_validation, x_test, y_train, y_validation, y_test = split_data(FILE_SVMLIGHT_INPUT)
print(f'Split Executed in {get_time_diff(start_time)} seconds.')
save_split_results(x_train, x_validation, x_test, y_train, y_validation, y_test)

print('DONE')

Split Executed in 57.169 seconds.
Saving train file on /content/drive/My Drive/Colab Notebooks/data/train/credit.svmlight.
Saving validation file on /content/drive/My Drive/Colab Notebooks/data/validation/credit.svmlight.
Saving train file on /content/drive/My Drive/Colab Notebooks/data/test/credit.svmlight.
DONE
