In [1]:
ref='nb92-' #Note to matt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import matplotlib

plt.style.use('seaborn-whitegrid')
matplotlib.rcParams.update({
    'font.family': 'serif',
    'axes.titlesize':16,
    'axes.labelsize':16,
    'axes.xmargin':0.1,
    'axes.ymargin':0.1,
    'legend.fontsize':16,
    'xtick.labelsize' : 16,
    'ytick.labelsize' : 16,
    'lines.markersize': 10,
    'lines.linewidth' : 3,
    'font.size': 16
})


import pickle 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
import mlflow.tensorflow


# Importing Data

In [2]:

dataset=pd.read_csv("../data/processed/mlp_dataset_raw_xyz.csv",index_col=False )
dataset.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,293,294,295,296,297,298,299,300,301,302
0,-426.0,-413.0,-394.0,-367.0,-340.0,-313.0,-286.0,-261.0,-235.0,-220.0,...,115.0,115.0,107.0,103.0,96.0,98.0,98.0,0.133031,1527700000.0,52-1
1,-362.0,-362.0,-362.0,-352.0,-343.0,-326.0,-324.0,-310.0,-299.0,-292.0,...,77.0,75.0,71.0,64.0,53.0,45.0,43.0,0.328511,1527700000.0,52-1
2,-312.0,-311.0,-307.0,-313.0,-317.0,-317.0,-320.0,-321.0,-330.0,-331.0,...,25.0,22.0,14.0,13.0,28.0,67.0,100.0,0.462072,1527700000.0,52-1
3,-408.0,-420.0,-413.0,-408.0,-410.0,-425.0,-425.0,-413.0,-374.0,-343.0,...,108.0,107.0,108.0,114.0,115.0,108.0,103.0,0.516548,1527700000.0,52-1
4,-343.0,-331.0,-324.0,-312.0,-306.0,-301.0,-302.0,-306.0,-312.0,-312.0,...,55.0,51.0,45.0,44.0,38.0,36.0,34.0,0.591766,1527700000.0,52-1


In [3]:
X=dataset.iloc[:,0:300].values
y=dataset.iloc[:,300].values

#Split training/testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Hyperparameters

In [4]:


run1 = {
    'name': 'param1',
    'scaler': StandardScaler(),  # standard, minmax, maxabs 
    'batch_size': 16,
    'epochs':30,
    'nodes': [128],  
    'act': ['relu'],
    'dropout': [0],  # dropout 0 means keep all nodes
    'loss': 'mean_squared_error',
    'metrics': ['mean_squared_error']
}


run2 = {
    'name': 'param2',
    'scaler': StandardScaler(),  # standard, minmax, maxabs 
    'batch_size': 32,
    'epochs': 10,
    'nodes': [128],  
    'act': ['relu'],
    'dropout': [0],  # dropout 0 means keep all nodes
    'loss': 'mean_squared_error',
    'metrics': ['mean_squared_error']
}

run3 = {
    'name': 'param3',
    'scaler': StandardScaler(),  # standard, minmax, maxabs 
    'batch_size': 32,
    'epochs': 20,
    'nodes': [128],  
    'act': ['relu'],
    'dropout': [0],  # dropout 0 means keep all nodes
    'loss': 'mean_squared_error',
    'metrics': ['mean_squared_error']
}

run4 = {
    'name': 'param4',
    'scaler': StandardScaler(),  # standard, minmax, maxabs
    'batch_size': 16,
    'epochs': 30,
    'nodes': [256, 128],  
    'act': ['relu', 'relu'],
    'dropout': [0.8, 0.8],  # dropout 0 means keep all nodes
    'loss': 'mean_squared_error',
    'metrics': ['mean_squared_error']
}


grid=[run1, run4]


In [5]:
def define_model(inp_dim, run): #Functional API #Default initializer for all layers is Xavier (aka glorot_uniform)
    inputs = keras.Input(shape=(inp_dim,), name='input') 
    x = Dense(run['nodes'][0], activation=run['act'][0])(inputs)
    x = Dropout(run['dropout'][0])(x)
    for i in range(int(len(run['nodes'])-1)):
        x = Dense(run['nodes'][i+1], activation=run['act'][i+1])(x)
        x=Dropout(run['dropout'][i+1])(x)
    outputs = Dense(1, activation='linear')(x)
    model = keras.Model(inputs=inputs, outputs=outputs, name='model')
    return model

In [6]:
mlflow.set_experiment("Gait Speed Prediction")

for i,run in enumerate(grid):
    scaler=run['scaler']
    sX_train=scaler.fit_transform(X_train)
    pickle.dump(scaler, open('scaler.pkl', 'wb'))
    mlflow.start_run(run_name='raw_unrounded-'+str(i))
    mlflow.log_artifact('scaler.pkl')
    model=define_model(X.shape[1], run)
    model.compile(loss=run['loss'] , optimizer="adam", metrics=run['metrics'])
    mlflow.tensorflow.autolog(every_n_iter=2)
    history = model.fit(sX_train,y_train, batch_size=run['batch_size'], epochs=run['epochs'], verbose=1, validation_split=0.1)
    # Testing
    sX_test=scaler.transform(X_test)
    score = model.evaluate(sX_test, y_test,  verbose=1)
    mlflow.log_metrics({'test_loss': score[0], 'test_rmse': np.sqrt(score[1])})
    mlflow.end_run()

2022-01-05 19:24:49.116068: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:925] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-01-05 19:24:49.129001: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory
2022-01-05 19:24:49.129024: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1850] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
2022-01-05 19:24:49.130544: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (o

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


2022-01-05 19:25:42.458498: W tensorflow/python/util/util.cc:368] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


INFO:tensorflow:Assets written to: /tmp/tmpp9pogcv2/model/data/model/assets
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
INFO:tensorflow:Assets written to: /tmp/tmp6hdx4xzt/model/data/model/assets


In [7]:
#Predicting based on selected run

#Retrieve Scaler
client = mlflow.tracking.MlflowClient() 
local_dir = "/tmp/artifact_downloads" 
if not os.path.exists(local_dir): 
	os.mkdir(local_dir) 

local_path = client.download_artifacts('3329b6cce2fd426cad7194f9da526103', '', local_dir) 
file = open('/tmp/artifact_downloads/scaler.pkl', 'rb')
scaler=pickle.load(file)

#Retrieve Model
logged_model = 'runs:/3329b6cce2fd426cad7194f9da526103/model'
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

#Predict 
sX_test=scaler.transform(X_test)
loaded_model.predict(sX_test)



array([[0.46681085],
       [0.38615432],
       [0.46681085],
       ...,
       [0.46681085],
       [0.4615082 ],
       [0.46681085]], dtype=float32)