In [384]:
import warnings
warnings.filterwarnings("ignore")
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
import glob
import os
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from keras.callbacks import ModelCheckpoint
from sklearn.naive_bayes import MultinomialNB
from sklearn.neural_network import MLPClassifier
from sklearn import tree, svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from imblearn.over_sampling import SMOTE
from collections import Counter
from Inputs_HDLAct import *
from Extrasensory_Manipulation import *

In [385]:
def get_filepath(file_dir,uuid):
    """This function gets the uuid of a subject and returns the file path for
    csv file of the subject
    
    Input:
        file_dir[string]: holds the directory of the desired file
        uuid[string]: 32 character string holding uuid
    
    Output:
        filepath[string]: a string of the address of the file that can be read easily
    
    """
    filename = '{}.features_labels.csv'.format(uuid)
    filepath = os.path.join(file_dir, filename)
    return(filepath)

# Leitura dos dados em .csv

In [386]:
def readdata_csv(data_dir):
    """This function gets the directory of the datasets and returns the dataset
    containing information of all 60 users
    
    Input:
        data_dir[string]: holds the directory of all the csv files (60)
        
    Output:
        grand_dataset[dict]: a dictionary of all the users' data. The format is:
            grand_dataset:{'uuid1': dataframe of csv file of the user 1
                           'uuid2': dataframe of csv file of the user 2
                           'uuid3': dataframe of csv file of the user 3
                           ...}
    
    """
    length_uuids = 36 # number of characters for each uuid
    
    __path = os.path.join(os.getcwd(), data_dir, "*.csv")           # Para fins de debug e encontrar a pasta correta ao qual o python estava se referenciando
    print(f"Pasta: {__path}")
    
    data_list = glob.glob("../data/*csv")
    print(f"Numero de arquivos encontrados {len(data_list)}")       # Imprime a quantidade de arquivos encontrada
    
    # grand_dataset is a dict. that holds the uuids and correspondong datast
    grand_dataset = {}
    lengthOFdataset = 0
    #Interação sobre todos os arquivos correspondentes a cada usuário (60 usuários para recuperar o uuid de cada um deles)
    for i in range(len(data_list)):
#    for i in range(5):
        # Lê a UUID de cada usuário para montar um dicionário com todas as UUIDS
        uuid = os.path.basename(data_list[i])[:length_uuids]
        print(f"UUID: {uuid}")
        
        dataset_ith = pd.read_csv(data_list[i])
        print(i,dataset_ith.shape)
        
        lengthOFdataset += len(dataset_ith)
        grand_dataset[uuid] = dataset_ith
        
    print(lengthOFdataset)
    
    return(grand_dataset)


# Leitura da pasta com todos os arquivos de cada usuário

In [387]:
def get_folds_uuids(fold_dir):
    """
    The function gets the directory where the the folds text files are located
    and returns a list of five np.arrays in each of them the uuids of the
    corresponding fold are stored.
    
    Input:
        fold_dir[string]: holds the directory in which folds are
    
    Output:
        folds_uuids[list]: a list of numpy arrays. Each array holds the uuids
                    in that fold. ex.
                    folds_uuids = [('uuid1','uuid2',...,'uuid12'),
                                   ('uuid13','uuid14',...,'uuid24'),
                                   ...,
                                   ('uuid49','uuid50',...,'uuid60')]
    """
    num_folds = 5
    # folds_uuids is gonna be a list of np.arrays. each array is a set of uuids
    folds_uuids = [0,1,2,3,4]
    # This loop reads all 5 test folds (iphone and android) and stores uuids
    for i in range(0,num_folds):
        filename = 'fold_{}_test_android_uuids.txt'.format(i)
        filepath = os.path.join(fold_dir, filename)
        # aux1 is the uuids of ith test fold for "android"
        aux1 = pd.read_csv(filepath,header=None,delimiter='\n')
        aux1 = aux1.values
        
        filename = 'fold_%s_test_iphone_uuids.txt' %i
        filepath = os.path.join(fold_dir, filename)
        # aux2 is the uuids of ith test fold for "iphone"
        aux2 = pd.read_csv(filepath,header=None,delimiter='\n')
        aux2 = aux2.values
        
        # Then we concatenate them
        folds_uuids[i] = np.concatenate((aux1,aux2),axis=0)
        
    return(folds_uuids)

# Determinando o range dos sensores

In [388]:
def sensors():
    """This function sets the ranges of the various sensors"""
    Sensor = {}
    Sensor['Acc'] = list(range(1,27))
    Sensor['Gyro'] = list(range(27,53))
    Sensor['Mag'] = list(range(53,84))
    Sensor['W_acc'] = list(range(84,130))
    Sensor['Compass'] = list(range(130,139))
    Sensor['Loc'] = list(range(139,156))
    Sensor['Aud'] = list(range(156,182))
    Sensor['AP'] = list(range(182,184))
    Sensor['PS'] = list(np.append(range(184,210),range(218,226)))
    return(Sensor)

In [389]:
def activities():
    activity = {}
    activity['label:LYING_DOWN'] = 226
    activity['label:SITTING'] = 227
    activity['label:FIX_walking'] = 228
    activity['label:FIX_running'] = 229
    activity['label:BICYCLING'] = 230
    activity['label:SLEEPING'] = 231
    activity['label:OR_indoors'] = 236
    activity['label:OR_outside'] = 237
    activity['label:IN_A_CAR'] = 238
    activity['label:ON_A_BUS'] = 239
    activity['label:DRIVE_-_I_M_THE_DRIVER'] = 240
    activity['label:DRIVE_-_I_M_A_PASSENGER'] = 241
    activity['label:PHONE_IN_POCKET'] = 244
    activity['label:PHONE_IN_HAND'] = 272
    activity['label:PHONE_IN_BAG'] = 273
    activity['label:PHONE_ON_TABLE'] = 274
    return(activity)

# Função para limpeza dos dados
Essa função foi refatorada para evitar que classes fossem repetidas em uma mesma situação dos sensores e 
caso não haja nenhuma observação dentre os labels utilizados realizar uma limpeza nos dados e retornar o X e o Y de treinamento para cada caso

In [390]:
def data_cleaner(dataset, feature_set_range, parent_labels):
    features = dataset.iloc[:,feature_set_range]  #add fillna after this line
    labels = dataset[parent_labels]
    # Junta os labels com as features para formar o dataset que será utilizado nos modelos
    raw_data = pd.concat([features,labels],axis=1)
    # Retira do dataset as linhas que contenham elementos que não são números
    raw_data.dropna(inplace=True)
    df_labels = raw_data.iloc[:,len(feature_set_range):]

    df_labels_values = df_labels.values
    df_labels_values_my = df_labels_values.astype(int)

    #print('My float: {}'.format(df_labels_values.shape))
    #print('My int: {}'.format(df_labels_values_my.shape))

    row, column = df_labels_values_my.shape
    cont = 0
    list_labels = []
    indices = []
    missing_indexes = [] # lista de indices que não pertencem a nenhuma classe

    for i in range(row):
        indices.append(i)
        for c in range(column):
            value = df_labels_values_my[i][c]
            # debug para saber se quebrou
            if value != 1:
                if value != 0:
                    print("Quebrou")
        
            if value == 1:
                cont += 1
        # Testa se ocorreu um missing indice
        if cont == 0:
            missing_indexes.append(i)
        cont = 0

    # A diferença acontece porque há linhas em que as amostras não pertencem a classe alguma
    #print("Tamanho: {}".format(len(list_labels)))
    #print("Missing: {}".format(len(missing_indexes)))
    #print("Indices: {}".format(len(indices)))

    # Conficurando o novo dataset e deletando as linhas que não pertencem a nenhuma classe existente
    raw_data['indice'] = np.array(indices).reshape(raw_data.shape[0])
    raw_data.set_index('indice', inplace=True)
    raw_data_treated =  raw_data.drop(raw_data.index[missing_indexes])

    df_labels['indice'] = np.array(indices).reshape(raw_data.shape[0])
    df_labels.set_index('indice', inplace=True)
    df_labels_treated = df_labels.drop(df_labels.index[missing_indexes])

    #print("Shape das features tratadas {}".format(raw_data_treated.shape))
    #print("Shape dos labels tratados {}".format(df_labels_treated.shape))

    # Salvando uma coluna para os labels de cada classe
    df_labels_values = df_labels_treated.values
    df_labels_values_my = df_labels_values.astype(int)

    row,column = df_labels_values_my.shape

    print(row)
    print(column)

    for i in range(row):
        for c in range(column):
            value = df_labels_values_my[i][c]
            if value == 1:
                list_labels.append(c)
                break
            
    #print("Lista de Labels: {}".format(len(list_labels)))
            
    #Determinando o X_parent
    X_parent = raw_data_treated.iloc[:,range(0, len(feature_set_range))]
    X_parent = preprocessing.scale(X_parent, axis=0)
    y_parent = np.array(list_labels).reshape(X_parent.shape[0]) 
    
    return X_parent, y_parent       

# Função utilizada para avaliação dos modelos

In [391]:
def return_accuracy(clf, y_test, x_test):
    #y_pred = clf.predict_classes(X_test) #Esta função foi descontinuada
    y_pred = clf.predict(x_test)
    classes_x = np.argmax(y_pred,axis=1)
    #print(classes_x)
    f1_accuracy = f1_score(y_test, classes_x, average='macro')
    BA_accuracy = balanced_accuracy_score(y_test, classes_x)
    accuracy = accuracy_score(y_test, classes_x)
    return accuracy, f1_accuracy, BA_accuracy
 

# Função para treinamento de um classificador utilizando o Keras

In [392]:
def run_dnn(x_train, y_train, n_out = 6):
    clf = Sequential()
    clf.add(Dense(128, input_dim=len(feature_set_range), activation='relu'))
    clf.add(Dropout(drop_out_per))
    clf.add(Dense(64, activation='relu'))
    clf.add(Dense(n_out, activation='softmax'))

    clf.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    clf.fit(x_train, y_train, batch_size=nb_batch,  epochs=nb_epoch, validation_split = val_split, class_weight=None)#, callbacks=[checkpointer])
    
    return clf

# Montando o dataset

In [393]:
from Inputs_HDLAct import *
#reading all data and storing in "dataset" a DF
dataset_uuids = readdata_csv("./data/") 
    
uuids = list(dataset_uuids.keys())
    
#Combining the all users' data to dataset
dataset = dataset_uuids[uuids[0]]
    
for i in range(1,len(uuids)):
    dataset = pd.concat([dataset,dataset_uuids[uuids[i]]],axis=0)
    
sensors_list = sensors()
feature_set_range = []
    
for i in range(len(sensors_to_use)):
    feature_set_range += sensors_list[sensors_to_use[i]]
    


Pasta: /home/pedro/Documentos/WorkspaceProjetos/IA_Projects/Contexto/src/./data/*.csv
Numero de arquivos encontrados 60
UUID: 27E04243-B138-4F40-A164-F40B60165CF3
0 (4927, 278)
UUID: 83CF687B-7CEC-434B-9FE8-00C3D5799BE6
1 (9539, 278)
UUID: 11B5EC4D-4133-4289-B475-4E737182A406
2 (8845, 278)
UUID: D7D20E2E-FC78-405D-B346-DBD3FD8FC92B
3 (6210, 278)
UUID: 5EF64122-B513-46AE-BCF1-E62AAC285D2C
4 (3911, 278)
UUID: CCAF77F0-FABB-4F2F-9E24-D56AD0C5A82F
5 (8472, 278)
UUID: 9759096F-1119-4E19-A0AD-6F16989C7E1C
6 (9959, 278)
UUID: F50235E0-DD67-4F2A-B00B-1F31ADA998B9
7 (2266, 278)
UUID: 78A91A4E-4A51-4065-BDA7-94755F0BB3BB
8 (11996, 278)
UUID: E65577C1-8D5D-4F70-AF23-B3ADB9D3DBA3
9 (3441, 278)
UUID: 3600D531-0C55-44A7-AE95-A7A38519464E
10 (5203, 278)
UUID: 7CE37510-56D0-4120-A1CF-0E23351428D2
11 (9761, 278)
UUID: 99B204C0-DD5C-4BB7-83E8-A37281B8D769
12 (6038, 278)
UUID: 1538C99F-BA1E-4EFB-A949-6C7C47701B20
13 (6549, 278)
UUID: 5119D0F8-FCA8-4184-A4EB-19421A40DE0D
14 (6617, 278)
UUID: 0A986513-7828

# Classificador para 6 classes

In [394]:
# Labels Utilizadas para seis classes
parent_labels = ['label:COOKING','label:SITTING','label:LYING_DOWN','label:FIX_running','label:FIX_walking','label:BICYCLING']

for i in range(len(parent_labels)):
    data = dataset[parent_labels[i]]
    print(parent_labels[i])
    #print(data.value_counts())

label:COOKING
label:SITTING
label:LYING_DOWN
label:FIX_running
label:FIX_walking
label:BICYCLING


## Pre-Processamento dos dados

In [395]:
X_parent, y_parent = data_cleaner(dataset, feature_set_range, parent_labels)
print(X_parent.shape)
print(y_parent.shape)

6065
6
(6065, 217)
(6065,)


## Observação dos dados

In [396]:
y_df = pd.DataFrame(y_parent)
print(y_df.value_counts())

2    2500
1    2372
5     571
4     396
0     177
3      49
dtype: int64


In [397]:
#Separando em treino e teste
seed = np.random.seed(5)
X_train, X_test, y_train, y_test = train_test_split(X_parent, y_parent, test_size=test_split, random_state = seed, stratify=y_parent)

In [398]:
# Definindo a estratégia de oversample
counter = Counter(y_train)
print(counter)
oversample = SMOTE(sampling_strategy={2:2000, 1:1909, 0:200, 5:600, 4:450, 3:50})
#Aplicando a reamostragem nos conjuntos X e Y
X_over, y_over = oversample.fit_resample(X_train, y_train)
print(y_over.shape)

Counter({2: 2000, 1: 1897, 5: 457, 4: 317, 0: 142, 3: 39})
(5209,)


In [399]:
#Rodando o código sem oversample com seis labels
model_6 = run_dnn(X_train, y_train)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [400]:
# Resultados do modelo
print(return_accuracy(model_6, y_test, X_test))

(0.9307502061005771, 0.8596614073772559, 0.8270774615864557)


In [401]:
# Treinamento do modelo com oversampling utilizando SMOTE
model_6_over = run_dnn(X_over, y_over)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [402]:
# Avaliação do modelo com oversampling
print(return_accuracy(model_6_over, y_test, X_test))

(0.9348722176422094, 0.8851271012499219, 0.8662567706185293)


# Classificador para 8 classes

In [403]:
# Treinamento com 8 labels
parent_labels_8 = ['label:COOKING','label:SITTING','label:LYING_DOWN','label:FIX_running','label:FIX_walking','label:BICYCLING', 'label:WATCHING_TV', 'label:SURFING_THE_INTERNET']

for i in range(len(parent_labels_8)):
    data = dataset[parent_labels_8[i]]
    print(parent_labels_8[i])
    #print(data.value_counts())
    
print(len(parent_labels_8))

label:COOKING
label:SITTING
label:LYING_DOWN
label:FIX_running
label:FIX_walking
label:BICYCLING
label:WATCHING_TV
label:SURFING_THE_INTERNET
8


In [404]:
#Separação dos conjuntos para aplicação de acordo com os labels
X_parent_8, y_parent_8 = data_cleaner(dataset, feature_set_range, parent_labels_8)
print(X_parent_8.shape)
print(y_parent_8.shape)

3557
8
(3557, 217)
(3557,)


In [405]:
#Observado o conjunto das classes
y_df_8 = pd.DataFrame(y_parent_8)
print(y_df_8.value_counts())

1    1797
2     800
5     469
4     291
0     132
6      38
3      30
dtype: int64


In [406]:
#Separando em treino e teste para 8 labels
X_train_8, X_test_8, y_train_8, y_test_8 = train_test_split(X_parent_8, y_parent_8, test_size=test_split, random_state = seed, stratify=y_parent_8)

In [408]:
# Definindo a estratégia de oversample para 8 labels
counter = Counter(y_train_8)
print(counter)
oversample = SMOTE(sampling_strategy={1:1437, 2:640, 5:375, 4:233, 0:130, 6:50, 3: 40})
#Aplicando a reamostragem nos conjuntos X e Y
X_over_8, y_over_8 = oversample.fit_resample(X_train_8, y_train_8)
print(y_over_8.shape)

Counter({1: 1437, 2: 640, 5: 375, 4: 233, 0: 106, 6: 30, 3: 24})
(2905,)


In [409]:
#Treinamento do modelo com 8 classes
model8 = run_dnn(X_train_8, y_train_8, 7)
model8_over = run_dnn(X_over_8, y_over_8, 7)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [410]:
# Avaliação dos modelos com 8 classes
print(return_accuracy(model8, y_test_8, X_test_8))
print(return_accuracy(model8_over, y_test_8, X_test_8))

(0.9030898876404494, 0.8375975595481103, 0.8243226936917326)
(0.8960674157303371, 0.8062526178215632, 0.7750564309946189)


# Utilização do Modelo com 10 classes

In [411]:
# Treinamento com 10 labels
parent_labels_10 = ['label:COOKING','label:SITTING','label:LYING_DOWN','label:FIX_running','label:FIX_walking','label:BICYCLING', 'label:WATCHING_TV', 'label:SURFING_THE_INTERNET', 'label:COMPUTER_WORK', 'label:LOC_home']

for i in range(len(parent_labels_10)):
    data = dataset[parent_labels_10[i]]
    print(parent_labels_10[i])
    #print(data.value_counts())
    
print(len(parent_labels_10))

label:COOKING
label:SITTING
label:LYING_DOWN
label:FIX_running
label:FIX_walking
label:BICYCLING
label:WATCHING_TV
label:SURFING_THE_INTERNET
label:COMPUTER_WORK
label:LOC_home
10


In [412]:
# Limpeza dos dados e seleção para aplicar ao modelo
X_parent_10, y_parent_10 = data_cleaner(dataset, feature_set_range, parent_labels_10)
print(X_parent_10.shape)
print(y_parent_10.shape)

3138
10
(3138, 217)
(3138,)


In [413]:
# Observação da frequencia das classes
y_df_10 = pd.DataFrame(y_parent_10)
print(y_df_10.value_counts())

1    1321
9     768
5     442
2     244
4     214
0     119
3      30
dtype: int64


In [414]:
#Separando em treino e teste para 10 labels
X_train_10, X_test_10, y_train_10, y_test_10 = train_test_split(X_parent_10, y_parent_10, test_size=test_split, random_state = seed, stratify=y_parent_10)

In [416]:
# Definindo a estratégia de oversample para 8 labels
counter = Counter(y_train_10)
print(counter)
oversample = SMOTE(sampling_strategy={1:1057, 9:614, 5:400, 2:200, 4:200, 0:100, 3: 40})
#Aplicando a reamostragem nos conjuntos X e Y
X_over_10, y_over_10 = oversample.fit_resample(X_train_10, y_train_10)
print(y_over_10.shape)

Counter({1: 1057, 9: 614, 5: 354, 2: 195, 4: 171, 0: 95, 3: 24})
(2611,)


In [418]:
# Treinamento dos modelos
model_10 = run_dnn(X_train_10, y_train_10, 10)
model_10_over = run_dnn(X_over_10, y_over_10, 10)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [419]:
# Avaliação dos modelos
print(return_accuracy(model_10, y_test_10, X_test_10))
print(return_accuracy(model_10_over, y_test_10, X_test_10))

(0.856687898089172, 0.8324256460335624, 0.8248332706685816)
(0.8375796178343949, 0.833539391923867, 0.8322397820499386)


# Modelo com 12 Classes

In [420]:
# Treinamento com 12 labels
parent_labels_12 = ['label:COOKING','label:SITTING','label:LYING_DOWN','label:FIX_running','label:FIX_walking','label:BICYCLING', 'label:WATCHING_TV', 'label:SURFING_THE_INTERNET', 'label:COMPUTER_WORK', 'label:LOC_home', 'label:TALKING','label:DRIVE_-_I_M_THE_DRIVER']

for i in range(len(parent_labels_12)):
    data = dataset[parent_labels_12[i]]
    print(parent_labels_12[i])
    #print(data.value_counts())
    
print(len(parent_labels_12))

label:COOKING
label:SITTING
label:LYING_DOWN
label:FIX_running
label:FIX_walking
label:BICYCLING
label:WATCHING_TV
label:SURFING_THE_INTERNET
label:COMPUTER_WORK
label:LOC_home
label:TALKING
label:DRIVE_-_I_M_THE_DRIVER
12


In [421]:
# Limpeza dos dados e seleção para aplicar ao modelo
X_parent_12, y_parent_12 = data_cleaner(dataset, feature_set_range, parent_labels_12)
print(X_parent_12.shape)
print(y_parent_12.shape)

3170
12
(3170, 217)
(3170,)


In [422]:
# Observação da frequencia das classes
y_df_12 = pd.DataFrame(y_parent_12)
print(y_df_12.value_counts())

1     1321
9      768
5      442
2      244
4      214
0      119
10      32
3       30
dtype: int64


In [423]:
#Separando em treino e teste para 10 labels
X_train_12, X_test_12, y_train_12, y_test_12 = train_test_split(X_parent_12, y_parent_12, test_size=test_split, random_state = seed, stratify=y_parent_12)

In [426]:
# Definindo a estratégia de oversample para 12 labels
counter = Counter(y_train_12)
print(counter)
oversample = SMOTE(sampling_strategy={1:1057, 9:614, 5:400, 2:200, 4:200, 0:100, 3: 40, 10:40})
#Aplicando a reamostragem nos conjuntos X e Y
X_over_12, y_over_12 = oversample.fit_resample(X_train_12, y_train_12)
print(y_over_12.shape)

Counter({1: 1057, 9: 614, 5: 354, 2: 195, 4: 171, 0: 95, 10: 26, 3: 24})
(2651,)


In [428]:
# Treinamento dos modelos
model_12 = run_dnn(X_train_12, y_train_12, 12)
model_12_over = run_dnn(X_over_12, y_over_12, 12)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [429]:
# Avaliação dos modelos
print(return_accuracy(model_12, y_test_12, X_test_12))
print(return_accuracy(model_12_over, y_test_12, X_test_12))

(0.8564668769716088, 0.8217526497358838, 0.8168690044728251)
(0.8470031545741324, 0.8037060020229683, 0.8188788993398628)
