# Trabalhando os dados

## Carregando o dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
ds_test = pd.read_csv('dataset/test.csv', header=0)
print(ds_test.shape)
ds_test.head(5)

(3644, 507)


Unnamed: 0,id,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
0,p01_8459,p01,06:45:00,,9.2,,,10.2,,,...,,,,,,,,,,
1,p01_8460,p01,11:25:00,,,9.9,,,9.4,,...,,,,,,,,Walk,Walk,Walk
2,p01_8461,p01,14:45:00,,5.5,,,5.5,,,...,,,,,,,,,,
3,p01_8462,p01,04:30:00,,3.4,,,3.9,,,...,,,,,,,,,,
4,p01_8463,p01,04:20:00,,,8.3,,,10.0,,...,,,,,,,,,,


## Preenchendo os nulos com zero (com exceção das colunas de Atividade Física)

In [3]:
for prefix in ['bg-', 'insulin-', 'carbs-', 'hr-', 'steps-', 'cals-']:
    cols_to_fill = [col for col in ds_test.columns if col.startswith(prefix)]

    ds_test.loc[:, cols_to_fill] = ds_test.loc[:, cols_to_fill].fillna(0.0)

ds_test.head(5)

Unnamed: 0,id,p_num,time,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
0,p01_8459,p01,06:45:00,0.0,9.2,0.0,0.0,10.2,0.0,0.0,...,,,,,,,,,,
1,p01_8460,p01,11:25:00,0.0,0.0,9.9,0.0,0.0,9.4,0.0,...,,,,,,,,Walk,Walk,Walk
2,p01_8461,p01,14:45:00,0.0,5.5,0.0,0.0,5.5,0.0,0.0,...,,,,,,,,,,
3,p01_8462,p01,04:30:00,0.0,3.4,0.0,0.0,3.9,0.0,0.0,...,,,,,,,,,,
4,p01_8463,p01,04:20:00,0.0,0.0,8.3,0.0,0.0,10.0,0.0,...,,,,,,,,,,


## Preencher com "other" as atividades físicas nulas

In [4]:
cols_to_fill = [col for col in ds_test.columns if col.startswith('activity-')]

ds_test.loc[:, cols_to_fill] = ds_test.loc[:, cols_to_fill].fillna('other')

## Convertendo o time num inteiro crescente

In [5]:
ds_test['minutes_since_start'] = pd.to_datetime(ds_test['time'], format="%H:%M:%S").dt.hour * 60 + pd.to_datetime(ds_test['time'], format="%H:%M:%S").dt.minute
ds_test = ds_test.drop(columns=['time'])
ds_test.head(5)

Unnamed: 0,id,p_num,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
0,p01_8459,p01,0.0,9.2,0.0,0.0,10.2,0.0,0.0,10.3,...,other,other,other,other,other,other,other,other,other,other
1,p01_8460,p01,0.0,0.0,9.9,0.0,0.0,9.4,0.0,0.0,...,other,other,other,other,other,other,other,Walk,Walk,Walk
2,p01_8461,p01,0.0,5.5,0.0,0.0,5.5,0.0,0.0,5.2,...,other,other,other,other,other,other,other,other,other,other
3,p01_8462,p01,0.0,3.4,0.0,0.0,3.9,0.0,0.0,4.7,...,other,other,other,other,other,other,other,other,other,other
4,p01_8463,p01,0.0,0.0,8.3,0.0,0.0,10.0,0.0,0.0,...,other,other,other,other,other,other,other,other,other,other


## Removendo as colunas de identificação

In [6]:
cols_to_pop = ['id', 'p_num']
identificadores = ds_test[cols_to_pop].copy()
ds_test = ds_test.drop(columns=cols_to_pop)
ds_test.head(5)

Unnamed: 0,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,bg-5:15,bg-5:10,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
0,0.0,9.2,0.0,0.0,10.2,0.0,0.0,10.3,0.0,0.0,...,other,other,other,other,other,other,other,other,other,other
1,0.0,0.0,9.9,0.0,0.0,9.4,0.0,0.0,9.1,0.0,...,other,other,other,other,other,other,other,Walk,Walk,Walk
2,0.0,5.5,0.0,0.0,5.5,0.0,0.0,5.2,0.0,0.0,...,other,other,other,other,other,other,other,other,other,other
3,0.0,3.4,0.0,0.0,3.9,0.0,0.0,4.7,0.0,0.0,...,other,other,other,other,other,other,other,other,other,other
4,0.0,0.0,8.3,0.0,0.0,10.0,0.0,0.0,12.2,0.0,...,other,other,other,other,other,other,other,other,other,other


## Trocando as colunas de atividade por um valor inteiro

In [7]:
# ds['activity-0:00'].unique()
mapeamento = {
    'other': 0, 'Walk': 1, 'Indoor climbing': 2, 'Yoga': 3, 'Zumba': 4, 'HIIT': 5,
    'Dancing': 6, 'Swim': 7, 'Outdoor Bike': 8, 'Aerobic Workout': 9, 'Sport': 10,
    'Walking': 11, 'Running': 12, 'Swimming': 13, 'Run': 14, 'Weights': 15, 'Workout': 16,
    'Tennis': 17, 'Strength training': 18, 'Stairclimber': 19, 'Spinning': 20, 'Hike': 21,
    'Bike':21
}

prefix = 'activity-'
cols_to_map = [col for col in ds_test.columns if col.startswith(prefix)]
ds_test[cols_to_map] = ds_test[cols_to_map].apply(lambda col: col.map(mapeamento))

ds_test.head(5)

Unnamed: 0,bg-5:55,bg-5:50,bg-5:45,bg-5:40,bg-5:35,bg-5:30,bg-5:25,bg-5:20,bg-5:15,bg-5:10,...,activity-0:45,activity-0:40,activity-0:35,activity-0:30,activity-0:25,activity-0:20,activity-0:15,activity-0:10,activity-0:05,activity-0:00
0,0.0,9.2,0.0,0.0,10.2,0.0,0.0,10.3,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
1,0.0,0.0,9.9,0.0,0.0,9.4,0.0,0.0,9.1,0.0,...,0,0,0,0,0,0,0,1,1,1.0
2,0.0,5.5,0.0,0.0,5.5,0.0,0.0,5.2,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
3,0.0,3.4,0.0,0.0,3.9,0.0,0.0,4.7,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0.0
4,0.0,0.0,8.3,0.0,0.0,10.0,0.0,0.0,12.2,0.0,...,0,0,0,0,0,0,0,0,0,0.0


# Carregando o modelo e prevendo a base de teste

In [8]:
import tensorflow as tf
model = tf.keras.models.load_model("brist1d.keras")

2024-10-31 00:05:24.763255: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-31 00:05:24.772249: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1730343924.782545   13058 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1730343924.785991   13058 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-31 00:05:24.798178: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [9]:
y_pred = model.predict(ds_test)
ds_pred = pd.DataFrame(y_pred, columns=['bg+1:00'])

I0000 00:00:1730343931.079547   13119 service.cc:148] XLA service 0x77bd58005100 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730343931.079564   13119 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2024-10-31 00:05:31.083830: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1730343931.111776   13119 cuda_dnn.cc:529] Loaded cuDNN version 90501


[1m 82/114[0m [32m━━━━━━━━━━━━━━[0m[37m━━━━━━[0m [1m0s[0m 623us/step  

I0000 00:00:1730343931.603319   13119 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m114/114[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


## Juntando com os identificadores

In [11]:
df_result = pd.concat([identificadores, ds_pred], axis=1)
df_result.pop('p_num')
df_result.head(5)

Unnamed: 0,id,bg+1:00
0,p01_8459,8.681263
1,p01_8460,5.701169
2,p01_8461,7.143938
3,p01_8462,10.670377
4,p01_8463,6.942614


## Exportando o resultado para CSV

In [12]:
df_result.to_csv("dataset/predictions.csv", index=False)