In [1]:
#import libraries
import flask
from flask import Flask, jsonify, request
import pandas as pd
import pickle
import os
import numpy as np
import copy

from biosppy.signals import ecg 
from biosppy.signals import eeg
from biosppy.signals import resp
from scipy.interpolate import interp1d 

import warnings
warnings.filterwarnings("ignore")

In [2]:
app = Flask(__name__)


def preprocess_data(data):
    """
    This function converts raw data to the form which the model needs.
    """    
    # get categorical and numerical features
    cat_features = data[["crew", "seat"]]
    try:
        data = data.drop(["crew", "seat"], axis=1)
        data = data.drop(["experiment"], axis=1)
    except:
        pass

    try:
        data = data.drop(["id"], axis=1)
        data = data.drop(["Unnamed: 0"], axis=1)
    except:
        pass
    
    # encode and scale the features
    print("Encoding categorical features")
    cat_features_encoded = cat_encoder.transform(cat_features).toarray()
    print("Scaling numerical features")
    scaled_features = scaler.transform(data)
    numerical_features = data.columns
    del data
    return pd.concat(
                        [
                            pd.DataFrame(cat_features_encoded, columns=cat_encoder.get_feature_names_out()), 
                            pd.DataFrame(scaled_features, columns=numerical_features)
                        ], 
                            axis=1
                    )

def interpolation_fn(timestamps,biosppy_ts, biosppy_values):
    """
    Interpolation function to produce values at all time steps.
    """
    interpolation = interp1d(biosppy_ts,biosppy_values, kind="linear", fill_value="extrapolate")  
    return interpolation(timestamps)

def extract_biosignal_features(data):
    """
    This function derives heart rate and respiration rate from ECG and respiration data and derives EEG bands.
    """
    final = pd.DataFrame()
    
    # heart rate from ECG
    print("Deriving heart rate")
    ecg_data = ecg.ecg(data["ecg"],show=False)  
    final["heart_rate"] = interpolation_fn(data["time"], ecg_data["heart_rate_ts"], ecg_data["heart_rate"])

    # resp rate from r signal
    print("Deriving respiration rate")
    resp_data=resp.resp(data["r"],show=False) 
    final["resp_rate"] = interpolation_fn(data["time"],resp_data["resp_rate_ts"],resp_data["resp_rate"])
    
    # Derive EEG bands
    # As seen in feature engineering, we will have the data from all the 20 electrodes for each of the bands.
    print("Deriving EEG bands")
    eeg_features = [
                    'eeg_fp1', 'eeg_f7', 'eeg_f8','eeg_t4', 'eeg_t6', 
                    'eeg_t5', 'eeg_t3', 'eeg_fp2', 'eeg_o1', 'eeg_p3',
                    'eeg_pz', 'eeg_f3', 'eeg_fz', 'eeg_f4', 'eeg_c4', 
                    'eeg_p4', 'eeg_poz','eeg_c3', 'eeg_cz', 'eeg_o2'
                   ]
    eeg_data = eeg.get_power_features(data[eeg_features])

    time_list = list(data["time"])
    del ecg_data
    del resp_data
    del data
    
    # theta frequency band
    print("1. Theta")
    theta_feature = [i+"_theta" for i in eeg_features]
    for i in range(20):
        final[theta_feature[i]]=interpolation_fn(time_list, eeg_data["ts"], eeg_data["theta"][:,i])
    
    # alpha_low frequency band
    print("2. Alpha low")
    alpha_low_feature = [i+"_alpha_low" for i in eeg_features] 
    for i in range(20):
        final[alpha_low_feature[i]] = interpolation_fn(time_list, eeg_data["ts"], eeg_data["alpha_low"][:,i])

    # alpha_high frequency band
    print("3. Alpha high")
    alpha_high_feature = [i+"_alpha_high" for i in eeg_features]
    for i in range(20):
        final[alpha_high_feature[i]] = interpolation_fn(time_list, eeg_data["ts"], eeg_data["alpha_high"][:,i])
    
    # beta frequency band
    print("4. Beta")
    beta_feature = [i+"_beta" for i in eeg_features]
    for i in range(20):
        final[beta_feature[i]] = interpolation_fn(time_list, eeg_data["ts"], eeg_data["beta"][:,i])
    
    # gamma frequency band
    print("5. Gamma")
    gamma_feature = [i+"_gamma" for i in eeg_features]
    for i in range(20):
        final[gamma_feature[i]] = interpolation_fn(time_list, eeg_data["ts"], eeg_data["gamma"][:,i])

    return final


############################################################################################################################
############################################################################################################################

@app.route('/')
def main_page():
    return flask.render_template('main.html')

@app.route('/predict_batch', methods = ['POST'])
def predict_batch():
    '''
    This function preprocesses batch inputs and predict the outputs.
    '''
    if request.method == 'POST':
        f = request.files['file']
        f.save(f.filename)
        print("Reading input data")
        data = pd.read_csv(f.filename)
        id_list = list(data['id'])
        n = data.shape[0]
        print(f"Number of records: {n}")
        print("Preprocessing and extracting new features")
        new_features = extract_biosignal_features(data)
        processed_data = preprocess_data(data)
        data = pd.concat([processed_data, new_features], axis=1)
        print("PCA to reduce to 4 dimensions")
        data = pca.transform(data)
        print("Predicting output")
        output = rf_model.predict(data)
        del data
        label  = {
                    0 : "Baseline",
                    1 : "Startle/Surprise",
                    2 : "Channelized Attention",
                    3 : "Diverted Attention"
                }
        output_df = pd.DataFrame()
        output_df['id'] = id_list
        output_df['predicted_state'] = [label[x] for x in output]
        print("Saving results")
        output_df.to_csv('results.csv', index=False)
        os.remove(f.filename)
        
        # write to html file
        html = output_df.to_html(index=False)
        text_file = open("Templates/output.html", "w")
        text_file.write(html)
        text_file.close()
        print("Completed")
    return flask.render_template('output.html')

@app.route('/batch_mode')
def batch_mode():
    return flask.render_template('batch_mode.html')

@app.route('/single_mode')
def single_mode():
    return flask.render_template('single_mode.html')

@app.route('/predict_single', methods = ['POST'])
def predict_single():
    '''
    This function preprocesses the input and predicts the output for single record.
    '''    
    # read input
    input_data = request.form.to_dict()["input_data"]
    input_data = input_data.split(',')
    input_data = np.array(input_data).reshape(1,26)
    input_data = input_data.astype(np.float)
    data = copy.deepcopy(data_sampled)
    df = pd.DataFrame(input_data, columns=data.columns)
    data = pd.concat([data,df])
    print("Preprocessing and extracting new features")
    new_features = extract_biosignal_features(data)
    processed_data = preprocess_data(data)
    data = pd.concat([processed_data, new_features], axis=1)
    print("PCA to reduce to 4 dimensions")
    data = pca.transform(data)
    print("Predicting output")
    output = rf_model.predict(data)
    
    label  = {
                    0 : "Baseline",
                    1 : "Startle/Surprise",
                    2 : "Channelized Attention",
                    3 : "Diverted Attention"
                }
    
    prediction = label[output[50000]]
    print("Completed")
    return jsonify({'prediction': prediction})
    

if __name__ == '__main__':
    
    # load categorical encoder and standard scaler which was fit on train data
    cat_encoder = pickle.load(open("cat_encoder.pkl", 'rb'))
    scaler = pickle.load(open("standard_scaler.pkl", 'rb'))
    
    # load PCA and Randomforest model
    pca = pickle.load(open("pca_4_features.pkl", 'rb'))
    rf_model = pickle.load(open("random_forest.pkl", 'rb'))
    
    # sampled data from train data
    data_sampled = pd.read_csv("../Data/train.csv").sample(50000, random_state=1)
    data_sampled = data_sampled.drop(["experiment", "event"], axis=1)
    
    app.run(host='0.0.0.0', port=8080)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8080
 * Running on http://192.168.0.75:8080
Press CTRL+C to quit
127.0.0.1 - - [06/Jan/2023 09:13:58] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [06/Jan/2023 09:14:42] "GET /single_mode HTTP/1.1" 200 -


Preprocessing and extracting new features
Deriving heart rate
Deriving respiration rate
Deriving EEG bands
1. Theta
2. Alpha low
3. Alpha high
4. Beta
5. Gamma
Encoding categorical features
Scaling numerical features
PCA to reduce to 4 dimensions
Predicting output


127.0.0.1 - - [06/Jan/2023 09:14:57] "POST /predict_single HTTP/1.1" 200 -


Completed


127.0.0.1 - - [06/Jan/2023 09:15:04] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [06/Jan/2023 09:15:10] "GET /batch_mode HTTP/1.1" 200 -


Reading input data
Number of records: 50000
Preprocessing and extracting new features
Deriving heart rate
Deriving respiration rate
Deriving EEG bands
1. Theta
2. Alpha low
3. Alpha high
4. Beta
5. Gamma
Encoding categorical features
Scaling numerical features
PCA to reduce to 4 dimensions
Predicting output
Saving results
Completed


127.0.0.1 - - [06/Jan/2023 09:15:33] "POST /predict_batch HTTP/1.1" 200 -
