# Note: this NB is in progress!!

In [1]:
ref='nb10-' #Note to matt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import matplotlib
plt.style.use('seaborn-whitegrid')
matplotlib.rcParams.update({
    'font.family': 'serif',
    'axes.titlesize':16,
    'axes.labelsize':16,
    'axes.xmargin':0.1,
    'axes.ymargin':0.1,
    'legend.fontsize':16,
    'xtick.labelsize' : 16,
    'ytick.labelsize' : 16,
    'lines.markersize': 10,
    'lines.linewidth' : 3,
    'font.size': 16
})


import pickle 
import tempfile
import os


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler 

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Conv1D
import mlflow.tensorflow

RANDOM_STATE=24


# Define CNN Model

In [None]:
# Fixed two convultional layers and flexible number of dense layers
def make_cnn(run): #Functional API #Default initializer for all layers is Xavier (aka glorot_uniform)
    inputs = keras.Input(shape=((100,3),), name='input') 
    x = Conv1D(run['filters'][0],run['kernel_size'][0], strides=run['strides'][0], activation=run['act_conv'][0])(inputs)
    x = Conv1D(run['filters'][1],run['kernel_size'][1], strides=run['strides'][1], activation=run['act_conv'][1])(x)
    ????
    x = Dense(run['nodes'][0], activation=run['act'][0])(x)
    x = Dropout(run['dropout'][0])(x)
    for i in range(int(len(run['nodes'])-1)):
        x = Dense(run['nodes'][i+1], activation=run['act'][i+1])(x)
        x=Dropout(run['dropout'][i+1])(x)
    outputs = Dense(1, activation='linear')(x)
    model = keras.Model(inputs=inputs, outputs=outputs, name='model')
    return model

# Import and reshape Data

In [2]:
def make_data(data_key):
    if data_key=='xyz_raw':
        data_file_path="../data/processed/mlp_dataset_raw_xyz.csv"
        dataset=pd.read_csv(data_file_path,index_col=False )
    elif data_key=='xyz_interp':
        data_file_path="../data/processed/mlp_dataset_interp_xyz.csv"
        dataset=pd.read_csv(data_file_path,index_col=False )  

    y=dataset.iloc[:,300].values
    X=dataset.iloc[:,0:300].values

    input_x=X[:,0:100]
    input_y=X[:,100:200]
    input_z=X[:,200:]
    X=np.empty([len(X),100,3])
    X[:,:,0]=input_x
    X[:,:,1]=input_y
    X[:,:,2]=input_z

    #Split training/testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
    return X_train, X_test,y_train, y_test, data_file_path

# Import Hyperparameters

In [None]:
# Import Hyperparameters
from cnn_hyperparams_10 import grid

# Train and Test

In [None]:
mlflow.set_experiment("Gait Speed Prediction")

for i,run in enumerate(grid):
    with mlflow.start_run(run_name=str(run['id'])):
        mlflow.set_tag('Model', run['model'])
        mlflow.set_tag('Dataset', run['dataset'])
        mlflow.set_tag('Data splitting Random State', RANDOM_STATE)
        X_train, X_test,y_train, y_test, data_file_path=make_data(run['dataset'])
        mlflow.set_tag('Data File Path', data_file_path)
        # mlflow.log_artifact(data_file_path) #Uncomment this if dataset needs ot be saved along with model
        
        scaler_X=run['scaler_X']
        sX_train=scaler_X.fit_transform(X_train)
        sX_test=scaler_X.transform(X_test)
        with tempfile.TemporaryDirectory() as tmpdir:
            pickle.dump(scaler_X, open(os.path.join(tmpdir,'scaler_X.pkl'), 'wb'))
            mlflow.log_artifact(os.path.join(tmpdir,'scaler_X.pkl'))
        mlflow.set_tag('Scaler_X', str(scaler_X.get_params()))
        
        scaler_y=run['scaler_y']
        if scaler_y == 'none':
            sy_train=y_train
            sy_test=y_test
            mlflow.set_tag('Scaler_y','No scale is applied to traget (i.e. speed)')
        else:
            sy_train=scaler_y.fit_transform(y_train.reshape(-1, 1))
            sy_test=scaler_y.transform(y_test.reshape(-1, 1))
            with tempfile.TemporaryDirectory() as tmpdir:
                pickle.dump(scaler_y, open(os.path.join(tmpdir,'scaler_y.pkl'), 'wb'))
                mlflow.log_artifact(os.path.join(tmpdir,'scaler_y.pkl'))
            mlflow.set_tag('Scaler_X', str(scaler_y.get_params()))

        model=make_mlp(sX_train.shape[1], run)
        model.compile(loss=run['loss'] , optimizer="adam", metrics=run['metrics'])
        mlflow.tensorflow.autolog() #This will log params passed in model.fit
        history = model.fit(sX_train,sy_train, batch_size=run['batch_size'], epochs=run['epochs'], verbose=1, validation_split=0.1)
        # Testing
        
        score = model.evaluate(sX_test, sy_test,  verbose=1)
        mlflow.log_metrics({'test_loss': score[0], 'test_RMSE': np.sqrt(score[1])})
    

In [None]:
# #Predicting based on selected run (to be used in production)

# #Retrieve Scaler
# client = mlflow.tracking.MlflowClient() 
# local_dir = "/tmp/artifact_downloads" 
# if not os.path.exists(local_dir): 
# 	os.mkdir(local_dir) 

# local_path = client.download_artifacts('3329b6cce2fd426cad7194f9da526103', '', local_dir) 
# file = open('/tmp/artifact_downloads/scaler.pkl', 'rb')
# scaler=pickle.load(file)

# #Retrieve Model
# logged_model = 'runs:/3329b6cce2fd426cad7194f9da526103/model'
# # Load model as a PyFuncModel.
# loaded_model = mlflow.pyfunc.load_model(logged_model)

# #Predict 
# sX_test=scaler.transform(X_test)
# loaded_model.predict(sX_test)