<a href="https://colab.research.google.com/github/Bryant-Dental/raptor_functions/blob/main/raptor_functions/examples/supervised_end_to_end.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install Libraries

In [1]:
# !pip install raptor_functions

In [2]:
# !pip install mlflow optuna Boruta pycaret awscli boto3 tsfresh

### Configure AWS
- This is necessary to use train_experiments to log results and artifacts on aws instance

In [3]:
# !aws configure

### Import Packages

In [2]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import mlflow
from raptor_functions.supervised.prediction import load_model
import xgboost as xgb
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from raptor_functions.supervised.train import train_experiments
from raptor_functions.supervised.datasets import get_data
from raptor_functions.supervised.feature_extraction import get_training_features




### Load Data

In [3]:
df = get_data('handheld_data')
df.head()

Unnamed: 0,exp_unique_id,exp_name,timesteps,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,sensor_22,sensor_23,sensor_24,Humidity (r.h.),measurement_stage,date_exp,time_elapsed,datetime_exp_start,datetime_exp,filename,result
0,0,Test 24,1,131.938,132.443,177.444,178.157,44.974,44.508,219.374,71.286,159.292,21.965,162.449,36.144,52.439,110.97,124.839,138.676,60.717,273.972,26.905,25.456,189.092,188.708,143.288,142.05,46.07,baseline,21/3/2022,0.0,14:12:54,2022-03-21 14:12:54.000,5 NegTest_3_20220321_14_12.txt,Control
1,0,Test 24,2,133.078,133.972,177.444,178.694,45.131,44.611,219.607,71.927,161.653,21.927,162.29,36.144,52.211,111.181,124.364,141.232,61.096,274.289,27.067,25.536,189.285,188.516,142.874,141.914,46.07,baseline,21/3/2022,0.25,14:12:54,2022-03-21 14:12:54.250,5 NegTest_3_20220321_14_12.txt,Control
2,0,Test 24,3,131.812,132.064,177.622,178.694,45.183,44.663,219.841,72.07,162.13,21.965,162.29,36.19,52.268,111.075,124.364,141.641,61.159,274.289,27.027,25.496,189.285,188.516,143.288,141.914,46.07,baseline,21/3/2022,0.5,14:12:54,2022-03-21 14:12:54.500,5 NegTest_3_20220321_14_12.txt,Control
3,0,Test 24,4,131.059,132.064,177.444,178.694,45.183,44.663,219.607,72.07,161.812,21.927,161.971,36.237,52.211,111.075,124.72,141.914,61.159,274.289,27.067,25.496,189.092,188.516,142.599,141.368,46.07,baseline,21/3/2022,0.75,14:12:54,2022-03-21 14:12:54.750,5 NegTest_3_20220321_14_12.txt,Control
4,0,Test 24,5,131.435,131.938,177.622,178.694,45.131,44.663,219.607,72.286,161.971,22.003,161.971,36.144,52.268,111.075,125.673,142.187,61.286,273.972,26.986,25.456,189.092,188.516,142.187,140.825,46.07,baseline,21/3/2022,1.0,14:12:54,2022-03-21 14:12:55.000,5 NegTest_3_20220321_14_12.txt,Control


### Feature Engineering
- Features extracted from each sensor array  for all cycles of experiment. 
- These include piecemeal features like mean, median, std, variance and transform features eg FFT and wavelength transform. 
- There is option to use only the raw signals or add the offset and gradient signals 

In [4]:
forest = RandomForestClassifier()


In [None]:
df = get_training_features(df, offset=True, gradient=True, tree_model=forest)

In [13]:
df.head()

Unnamed: 0,exp_unique_id,exp_name,timesteps,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,sensor_22,sensor_23,sensor_24,Humidity (r.h.),measurement_stage,date_exp,time_elapsed,datetime_exp_start,datetime_exp,filename,exp_unique_idoffset,exp_nameoffset,timestepsoffset,sensor_1offset,sensor_2offset,sensor_3offset,sensor_4offset,sensor_5offset,sensor_6offset,sensor_7offset,sensor_8offset,sensor_9offset,sensor_10offset,sensor_11offset,sensor_12offset,sensor_13offset,sensor_14offset,sensor_15offset,sensor_16offset,sensor_17offset,sensor_18offset,sensor_19offset,sensor_20offset,sensor_21offset,sensor_22offset,sensor_23offset,sensor_24offset,Humidity (r.h.)offset,measurement_stageoffset,date_expoffset,time_elapsedoffset,datetime_exp_startoffset,datetime_expoffset,filenameoffset,exp_unique_idgradient,exp_namegradient,timestepsgradient,sensor_1gradient,sensor_2gradient,sensor_3gradient,sensor_4gradient,sensor_5gradient,sensor_6gradient,sensor_7gradient,sensor_8gradient,sensor_9gradient,sensor_10gradient,sensor_11gradient,sensor_12gradient,sensor_13gradient,sensor_14gradient,sensor_15gradient,sensor_16gradient,sensor_17gradient,sensor_18gradient,sensor_19gradient,sensor_20gradient,sensor_21gradient,sensor_22gradient,sensor_23gradient,sensor_24gradient,Humidity (r.h.)gradient,measurement_stagegradient,date_expgradient,time_elapsedgradient,datetime_exp_startgradient,datetime_expgradient,filenamegradient,result
0,0,Test 24,1,131.938,132.443,177.444,178.157,44.974,44.508,219.374,71.286,159.292,21.965,162.449,36.144,52.439,110.97,124.839,138.676,60.717,273.972,26.905,25.456,189.092,188.708,143.288,142.05,46.07,baseline,21/3/2022,0.0,14:12:54,2022-03-21 14:12:54.000,5 NegTest_3_20220321_14_12.txt,0,Test 24,1,-0.28625,-0.366875,-0.089,-0.447375,-0.182875,-0.135625,-0.320875,-0.89275,-2.463875,-0.0095,0.27875,-0.005875,0.149625,-0.118375,-0.134125,-3.004875,-0.474,-0.437,-0.111625,-0.04,-0.04825,0.239625,0.688,0.68025,46.07,baseline,21/3/2022,0.0,14:12:54,2022-03-21 14:12:54.000,5 NegTest_3_20220321_14_12.txt,0,Test 24,1,1.14,1.529,0.0,0.537,0.157,0.103,0.233,0.641,2.361,-0.038,-0.159,0.0,-0.228,0.211,-0.475,2.556,0.379,0.317,0.162,0.08,0.193,-0.192,-0.414,-0.136,46.07,baseline,21/3/2022,0.0,14:12:54,2022-03-21 14:12:54.000,5 NegTest_3_20220321_14_12.txt,Control
0,1,Test 24,1,132.57,133.46,176.735,177.622,45.758,45.235,219.841,70.157,155.294,22.42,160.074,37.169,52.154,108.781,123.656,135.52,60.528,273.972,27.764,26.257,190.057,189.285,148.791,147.358,46.52,baseline,21/3/2022,0.0,14:07:01,2022-03-21 14:07:01.000,3 PosTest_2_20220321_14_07.txt,1,Test 24,1,0.426875,0.635875,-0.199125,-0.2895,-0.16475,-0.137125,-0.2925,-0.988125,-2.294625,0.03325,0.0785,0.058125,0.056625,0.0,-0.01575,-2.912625,-0.449375,0.078875,-0.0205,-0.0605,0.048125,-2.842171e-14,1.394625,1.309625,46.52,baseline,21/3/2022,0.0,14:07:01,2022-03-21 14:07:01.000,3 PosTest_2_20220321_14_07.txt,1,Test 24,1,-0.506,-0.509,0.354,0.535,0.158,0.157,0.234,0.704,2.138,-0.076,0.0,-0.093,-0.113,0.0,0.118,2.227,0.315,0.0,0.0,0.04,0.0,0.0,-1.291,-1.136,46.52,baseline,21/3/2022,0.0,14:07:01,2022-03-21 14:07:01.000,3 PosTest_2_20220321_14_07.txt,Control
0,2,Test 24,1,133.46,134.101,176.735,177.978,45.183,44.611,218.909,71.215,158.203,22.003,162.29,36.005,52.097,110.655,124.01,138.012,60.654,273.656,27.149,25.655,188.9,188.134,142.324,140.825,46.14,baseline,21/3/2022,0.0,14:12:21,2022-03-21 14:12:21.000,5 NegTest_2_20220321_14_12.txt,2,Test 24,1,0.3975,0.32075,-0.46525,-0.179,-0.09775,-0.143,-0.49425,-0.883125,-2.52375,0.08525,0.2195,-0.11025,0.0775,-0.02625,0.412125,-2.750625,-0.521,-0.237,-0.025625,0.009625,0.096,0.0,0.478,0.269875,46.14,baseline,21/3/2022,0.0,14:12:21,2022-03-21 14:12:21.000,5 NegTest_2_20220321_14_12.txt,2,Test 24,1,-0.255,-0.129,0.532,0.358,0.0,0.156,0.698,0.712,2.343,-0.038,-0.16,0.185,-0.34,0.0,-0.236,2.273,0.378,0.316,0.041,0.0,0.0,0.382,-0.41,-0.135,46.14,baseline,21/3/2022,0.0,14:12:21,2022-03-21 14:12:21.000,5 NegTest_2_20220321_14_12.txt,Control
0,3,Test 24,1,130.31,130.434,176.03,176.912,44.198,43.685,218.909,71.927,156.359,21.927,161.971,36.888,52.611,111.498,124.601,135.132,60.151,273.972,26.58,24.98,188.134,187.561,138.277,137.22,45.62,baseline,21/3/2022,0.0,14:18:02,2022-03-21 14:18:02.000,5 PosTest_3_20220321_14_18.txt,3,Test 24,1,1.0215,0.434625,-0.308,-0.399125,-0.142125,-0.11525,-0.20325,-0.93575,-2.39125,0.033,0.218625,0.06975,0.079125,-0.01325,0.4875,-3.06925,-0.550375,-0.397125,-0.015,-0.049625,0.04775,0.16625,1.59725,1.6165,45.62,baseline,21/3/2022,0.0,14:18:02,2022-03-21 14:18:02.000,5 PosTest_3_20220321_14_18.txt,3,Test 24,1,-1.115,-0.497,0.352,0.355,0.155,0.103,0.232,0.574,1.999,-0.038,-0.159,-0.046,0.0,0.0,-0.355,2.351,0.44,0.0,0.0,0.04,0.0,-0.19,-1.189,-1.179,45.62,baseline,21/3/2022,0.0,14:18:02,2022-03-21 14:18:02.000,5 PosTest_3_20220321_14_18.txt,Control
0,4,Test 24,1,135.002,135.52,176.558,177.622,46.55,46.022,220.544,68.489,145.799,22.917,159.604,36.469,51.531,106.939,125.793,132.697,60.654,275.561,28.135,26.742,190.834,190.057,145.096,143.841,46.64,baseline,21/3/2022,0.0,14:02:49,2022-03-21 14:02:49.000,3 NegTest_3_20220321_14_02.txt,4,Test 24,1,-0.162125,-0.244,-0.376125,-0.378625,-0.246125,-0.2175,0.23475,-0.979125,-2.385,0.048125,0.31225,-0.12775,0.2045,0.113875,1.010125,-3.090125,-0.545,1.35075,-0.0725,-0.086375,0.218875,0.0965,0.524125,0.691,46.64,baseline,21/3/2022,0.0,14:02:49,2022-03-21 14:02:49.000,3 NegTest_3_20220321_14_02.txt,4,Test 24,1,0.518,0.521,0.531,0.356,0.213,0.211,-0.235,0.691,1.987,-0.039,-0.156,0.186,-0.113,-0.101,-0.597,2.435,0.378,-1.589,0.0,0.081,0.0,0.0,-0.42,-0.692,46.64,baseline,21/3/2022,0.0,14:02:49,2022-03-21 14:02:49.000,3 NegTest_3_20220321_14_02.txt,Control


### Model Training

In [None]:
train_experiments(df)

### Load Model

In [None]:

logged_model = 'fullpath/to/logged_model'
loaded_model = load_model(logged_model)


In [None]:
# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)



In [None]:
# Load model from pickle file.
model_pickle_filepath  = 'path/to/pickle_model'

loaded_model = load_model(logged_model)

### Prediction

In [None]:
data = df.iloc[5,:-1].values.reshape(1,-1)

In [None]:
prediction = loaded_model.predict(data)