### Current Process
1. Read in data --> Done

2. Custom Imputation --> Done

3. Add Binary Class --> Done, Should Add Binary Class Later

4. Summary Statistics Features --> Done

5. Wrapper Functions --> Done, Need to Test Though

6. Sklearn Pipeline Categorical Features --> One Hot Encoding Done

7. Sklearn Pipeline Numerical Features --> StandardScaler Done

8. Create Lagged Features --> Done

9. Modeling --> Currently XgBoost, (Maybe Try: TensorFlow Decision Tree, TensorFlow Probability Model)

10. Model Evaluation --> Accuracy, Precision, Recall, F1, Confusion Matrix (Need to add Variable Importance Based on Variance)

11. PySpark: XGBoost Classification Feature Importance

In [1]:
# # Need to Run These in Notebook Version For Pandas UDF
! pip install pyarrow
! pip install pandas
! pip install scikit-learn
! pip install pyspark



In [2]:
from Input_Variables.read_vars import raw_data_storage, \
                                      analysis_group, \
                                      daily_stats_features_lower, daily_stats_features_upper, \
                                      ml_models_train_split, ml_models_test_split, model_storage_location, \
                                      time_series_lag_values_created

from Data_Schema.schema import Pandas_UDF_Data_Schema
from Read_In_Data.read_data import Reading_Data
from Data_Pipeline.sklearn_pipeline import Sklearn_Pipeline
from Feature_Generation.create_binary_labels import Create_Binary_Labels
from Feature_Generation.summary_stats import Summary_Stats_Features
from Feature_Generation.lag_features import Create_Lagged_Features
from Model_Creation.xgboost_model import XGBoost_Classification
from Model_Evaluation.classification_evaluation import Classification_Evalaution_Metrics
from Model_Plots.xgboost_classification_plots import XGBoost_Classification_Plot

from Data_Pipeline.encoding_scaling_pipeline import Feature_Transformations

# General Modules

In [3]:
# PySpark UDF Schema Activation
pandas_udf_data_schema=Pandas_UDF_Data_Schema()

# Data Location
reading_data=Reading_Data(data_location=raw_data_storage)

# Create Binary y Variables
create_binary_labels=Create_Binary_Labels()

# Sklearn Pipeline
pandas_sklearn_pipeline=Sklearn_Pipeline()

# Features Daily Stats Module
summary_stats_features=Summary_Stats_Features()

# Features Lagged Value
create_lag_features=Create_Lagged_Features()

# XGBoost Model Module
xgboost_classification=XGBoost_Classification()

# Classification Evaluation
classification_evalaution_metrics=Classification_Evalaution_Metrics()

# Model Plots Feature Importance
xgboost_classification_plot=XGBoost_Classification_Plot()

# Feature Transformations
feature_transformations=Feature_Transformations()

# PySpark

### 1. PySpark: Reading In Data

In [None]:
####### PySpark
pyspark_df=reading_data.read_in_pyspark()

In [5]:
print((pyspark_df.count(), len(pyspark_df.columns)))

(5734, 5)


In [33]:
from pyspark.ml.functions import vector_to_array

In [32]:
from pyspark.sql.functions import date_trunc, col, udf
pyspark_df=pyspark_df.withColumn("GlucoseDisplayTime", date_trunc("minute", col("GlucoseDisplayTime")))

ImportError: cannot import name 'vector_to_array' from 'pyspark.sql.functions' (/home/jovyan/glucose-data-analysis/glucose_venv/lib/python3.10/site-packages/pyspark/sql/functions.py)

In [7]:
pyspark_df=pyspark_df.distinct()

In [8]:
print((pyspark_df.count(), len(pyspark_df.columns)))

[Stage 4:>                                                          (0 + 1) / 1]

(5734, 5)


                                                                                

In [9]:
pyspark_df.show(1)

+--------------------+-----+-------------------+---------------------+------------------+
|           PatientId|Value| GlucoseDisplayTime|GlucoseDisplayTimeRaw|GlucoseDisplayDate|
+--------------------+-----+-------------------+---------------------+------------------+
|vH4j/sVPDk4luo9wf...|157.0|2022-12-28 02:52:00| 2022-12-28T02:52:...|        2022-12-28|
+--------------------+-----+-------------------+---------------------+------------------+
only showing top 1 row



### 2. PySpark: Custom Imputation Pipeline

In [10]:
pyspark_df=pyspark_df.orderBy("PatientId", 
                              "GlucoseDisplayTime",
                              ascending=True)

In [None]:
####### PySpark
pyspark_custom_imputation_schema=pandas_udf_data_schema.custom_imputation_pyspark_schema()
pyspark_custom_imputation_pipeline=pandas_sklearn_pipeline.pyspark_custom_imputation_pipeline(df=pyspark_df, 
                                                                                              output_schema=pyspark_custom_imputation_schema,
                                                                                              analysis_group=analysis_group)




In [None]:
pyspark_custom_imputation_pipeline.show(1)

[Stage 24:>                                                         (0 + 1) / 1]

+-------------------+--------------------+-----+
| GlucoseDisplayTime|           PatientId|Value|
+-------------------+--------------------+-----+
|2022-02-18 17:05:00|Zw997clFRcTAHrWiO...|260.0|
+-------------------+--------------------+-----+
only showing top 1 row



                                                                                

### 3. PySpark: Adding Binary Labels

In [None]:
pyspark_df_added_binary_labels=create_binary_labels.pyspark_binary_labels(df=pyspark_custom_imputation_pipeline, 
                                                                          lower=daily_stats_features_lower, 
                                                                          upper=daily_stats_features_upper)

pyspark_df_added_binary_labels.show(1)

[Stage 36:>                                                         (0 + 1) / 1]

+-------------------+--------------------+-----+--------+
| GlucoseDisplayTime|           PatientId|Value|y_Binary|
+-------------------+--------------------+-----+--------+
|2022-02-18 17:05:00|Zw997clFRcTAHrWiO...|260.0|       1|
+-------------------+--------------------+-----+--------+
only showing top 1 row



                                                                                

### 4. PySpark: Features: Summary Statistics

In [None]:
####### PySpark
pyspark_features_summary_stats=summary_stats_features.pyspark_summary_statistics(df=pyspark_df_added_binary_labels,
                                                                                 daily_stats_features_lower=daily_stats_features_lower,
                                                                                 daily_stats_features_upper=daily_stats_features_upper)



In [None]:
pyspark_features_summary_stats.show(1)

[Stage 58:>                                                         (0 + 1) / 1]

+--------------------+-----+-------------------+-----+--------+-----+----------------+------------------+-----------------+------+-----+-----+----------+----------+---------------+------------------+
|           PatientId|Chunk| GlucoseDisplayTime|Value|y_Binary|index|y_summary_binary|              Mean|          Std Dev|Median|  Min|  Max|CountBelow|CountAbove|PercentageBelow|   PercentageAbove|
+--------------------+-----+-------------------+-----+--------+-----+----------------+------------------+-----------------+------+-----+-----+----------+----------+---------------+------------------+
|Zw997clFRcTAHrWiO...|    0|2022-02-18 17:55:00|271.0|       1|   11|               1|273.45454545454544|8.017027333914188| 272.0|260.0|284.0|         0|        11|            0.0|0.9166666666666666|
+--------------------+-----+-------------------+-----+--------+-----+----------------+------------------+-----------------+------+-----+-----+----------+----------+---------------+------------------+


                                                                                

### 5. PySpark: Wrapper Functions

### 6. PySpark: Lag Features

In [None]:
####### PySpark
pyspark_lag_features_creation=create_lag_features.pyspark_lag_features(df=pyspark_features_summary_stats,
                                                                       time_series_lag_values_created=time_series_lag_values_created)
pyspark_lag_features_creation.show(5)

23/04/20 06:05:32 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.


[Stage 80:>                                                         (0 + 1) / 1]

+--------------------+-----+-------------------+-----+--------+-----+----------------+------------------+-----------------+------+-----+-----+----------+----------+---------------+------------------+-----------+------------------+-----------------+---------+---------+---------+-------------+-------------+---------------+------------------+-----------+------------------+-----------------+---------+---------+---------+-------------+-------------+---------------+------------------+
|           PatientId|Chunk| GlucoseDisplayTime|Value|y_Binary|index|y_summary_binary|              Mean|          Std Dev|Median|  Min|  Max|CountBelow|CountAbove|PercentageBelow|   PercentageAbove|value_lag_1|        mean_lag_1|    std_dev_lag_1|med_lag_1|min_lag_1|max_lag_1|cnt_bel_lag_1|cnt_abv_lag_1|perc_belw_lag_1|    perc_abv_lag_1|value_lag_2|        mean_lag_2|    std_dev_lag_2|med_lag_2|min_lag_2|max_lag_2|cnt_bel_lag_2|cnt_abv_lag_2|perc_belw_lag_2|    perc_abv_lag_2|
+--------------------+-----+----

                                                                                

### 8. PySpark: Sklearn Categorical Pipeline in PySpark

In [None]:
####### PySpark
pyspark_categorical_schema=pandas_udf_data_schema.sklearn_pyspark_categorical_schema()
pyspark_transformations_categorical=pandas_sklearn_pipeline.pyspark_sklearn_pipeline_categorical(df=pyspark_features_summary_stats, 
                                                                                                 output_schema=pyspark_categorical_schema,
                                                                                                 analysis_group=analysis_group)
pyspark_transformations_categorical.show(1)

### 9. PySpark: Sklearn Numerical Pipeline in PySpark

In [26]:
####### PySpark
pyspark_numerical_features=feature_transformations.numerical_scaling(df=pyspark_lag_features_creation)
pyspark_numerical_features.show(1)

[Stage 1020:>                                                       (0 + 1) / 1]

+--------------------+-----+-------------------+-----+--------+-----+----------------+------------------+-----------------+------+-----+-----+----------+----------+---------------+------------------+-----------+------------------+-----------------+---------+---------+---------+-------------+-------------+---------------+------------------+-----------+------------------+-----------------+---------+---------+---------+-------------+-------------+---------------+------------------+--------------------+-------------------+-------------+-------------------+-------------+---------------+-------------------+-------------+-------------+-------------------+--------------------+-------------+-------------+--------------------+--------------------+---------------+--------------------+----------------------+--------------------+--------------------+--------------------+--------------------+----------------------+--------------------+-------------------+--------------------+--------------------+---

                                                                                

### 11. PySpark: XGBoost Model

In [27]:
scaled_vals=pyspark_numerical_features.columns
scaled_feats=[x for x in scaled_vals if "scaled" in x]



In [28]:
scaled_feats

['mean_lag_1_scaled',
 'perc_belw_lag_2_scaled',
 'med_lag_2_scaled',
 'std_dev_lag_1_scaled',
 'min_lag_1_scaled',
 'value_lag_1_scaled',
 'perc_belw_lag_1_scaled',
 'med_lag_1_scaled',
 'max_lag_1_scaled',
 'std_dev_lag_2_scaled',
 'mean_lag_2_scaled',
 'max_lag_2_scaled',
 'min_lag_2_scaled',
 'perc_abv_lag_2_scaled',
 'perc_abv_lag_1_scaled',
 'value_lag_2_scaled']

In [34]:
for num_feature in scaled_feats:
    pyspark_numerical_features=pyspark_numerical_features.select(vector_to_array(num_feature)[0].cast('float').alias(num_feature))

AnalysisException: Column 'perc_belw_lag_2_scaled' does not exist. Did you mean one of the following? [mean_lag_1_scaled];
'Project [cast(UDF('perc_belw_lag_2_scaled)[0] as float) AS perc_belw_lag_2_scaled#18211]
+- Project [cast(UDF(mean_lag_1_scaled#16805)[0] as float) AS mean_lag_1_scaled#18209]
   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 44 more fields]
      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 43 more fields]
         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 42 more fields]
            +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 41 more fields]
               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 40 more fields]
                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 39 more fields]
                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 38 more fields]
                        +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 37 more fields]
                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 36 more fields]
                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 35 more fields]
                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 34 more fields]
                                    +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 33 more fields]
                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 32 more fields]
                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 31 more fields]
                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 30 more fields]
                                                +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 29 more fields]
                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 28 more fields]
                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 27 more fields]
                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 26 more fields]
                                                            +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 25 more fields]
                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 24 more fields]
                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 23 more fields]
                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 22 more fields]
                                                                        +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 21 more fields]
                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 20 more fields]
                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 19 more fields]
                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 18 more fields]
                                                                                    +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 17 more fields]
                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 16 more fields]
                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 15 more fields]
                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 14 more fields]
                                                                                                +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 13 more fields]
                                                                                                   +- Filter atleastnnonnulls(36, PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, ... 13 more fields)
                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 12 more fields]
                                                                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 13 more fields]
                                                                                                            +- Window [lag(PercentageAbove#159, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS perc_abv_lag_2#889], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 11 more fields]
                                                                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 11 more fields]
                                                                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 12 more fields]
                                                                                                                        +- Window [lag(PercentageBelow#157, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS perc_belw_lag_2#853], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 10 more fields]
                                                                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 10 more fields]
                                                                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 11 more fields]
                                                                                                                                    +- Window [lag(CountAbove#155L, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS cnt_abv_lag_2#818L], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 9 more fields]
                                                                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 9 more fields]
                                                                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 10 more fields]
                                                                                                                                                +- Window [lag(CountBelow#153L, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS cnt_bel_lag_2#784L], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 8 more fields]
                                                                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 8 more fields]
                                                                                                                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 9 more fields]
                                                                                                                                                            +- Window [lag(Max#151, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS max_lag_2#751], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 7 more fields]
                                                                                                                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 7 more fields]
                                                                                                                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 8 more fields]
                                                                                                                                                                        +- Window [lag(Min#149, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS min_lag_2#719], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 6 more fields]
                                                                                                                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 6 more fields]
                                                                                                                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 7 more fields]
                                                                                                                                                                                    +- Window [lag(Median#147, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS med_lag_2#688], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 5 more fields]
                                                                                                                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 5 more fields]
                                                                                                                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 6 more fields]
                                                                                                                                                                                                +- Window [lag(Std Dev#145, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS std_dev_lag_2#658], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 4 more fields]
                                                                                                                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 4 more fields]
                                                                                                                                                                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 5 more fields]
                                                                                                                                                                                                            +- Window [lag(Mean#135, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS mean_lag_2#629], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 3 more fields]
                                                                                                                                                                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 3 more fields]
                                                                                                                                                                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 4 more fields]
                                                                                                                                                                                                                        +- Window [lag(Value#66, -2, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS value_lag_2#601], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 2 more fields]
                                                                                                                                                                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 2 more fields]
                                                                                                                                                                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 3 more fields]
                                                                                                                                                                                                                                    +- Window [lag(PercentageAbove#159, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS perc_abv_lag_1#574], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, perc_belw_lag_1#548]
                                                                                                                                                                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, perc_belw_lag_1#548]
                                                                                                                                                                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, ... 2 more fields]
                                                                                                                                                                                                                                                +- Window [lag(PercentageBelow#157, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS perc_belw_lag_1#548], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L]
                                                                                                                                                                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L]
                                                                                                                                                                                                                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_abv_lag_1#523L, cnt_abv_lag_1#523L]
                                                                                                                                                                                                                                                            +- Window [lag(CountAbove#155L, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS cnt_abv_lag_1#523L], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L]
                                                                                                                                                                                                                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L]
                                                                                                                                                                                                                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, cnt_bel_lag_1#499L, cnt_bel_lag_1#499L]
                                                                                                                                                                                                                                                                        +- Window [lag(CountBelow#153L, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS cnt_bel_lag_1#499L], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476]
                                                                                                                                                                                                                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476]
                                                                                                                                                                                                                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, max_lag_1#476, max_lag_1#476]
                                                                                                                                                                                                                                                                                    +- Window [lag(Max#151, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS max_lag_1#476], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454]
                                                                                                                                                                                                                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454]
                                                                                                                                                                                                                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, min_lag_1#454, min_lag_1#454]
                                                                                                                                                                                                                                                                                                +- Window [lag(Min#149, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS min_lag_1#454], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433]
                                                                                                                                                                                                                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433]
                                                                                                                                                                                                                                                                                                         +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, med_lag_1#433, med_lag_1#433]
                                                                                                                                                                                                                                                                                                            +- Window [lag(Median#147, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS med_lag_1#433], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                               +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413]
                                                                                                                                                                                                                                                                                                                  +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413]
                                                                                                                                                                                                                                                                                                                     +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, std_dev_lag_1#413, std_dev_lag_1#413]
                                                                                                                                                                                                                                                                                                                        +- Window [lag(Std Dev#145, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS std_dev_lag_1#413], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                                           +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394]
                                                                                                                                                                                                                                                                                                                              +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394]
                                                                                                                                                                                                                                                                                                                                 +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, mean_lag_1#394, mean_lag_1#394]
                                                                                                                                                                                                                                                                                                                                    +- Window [lag(Mean#135, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS mean_lag_1#394], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                                                       +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376]
                                                                                                                                                                                                                                                                                                                                          +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376]
                                                                                                                                                                                                                                                                                                                                             +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159, value_lag_1#376, value_lag_1#376]
                                                                                                                                                                                                                                                                                                                                                +- Window [lag(Value#66, -1, null) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS value_lag_1#376], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                                                                   +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159]
                                                                                                                                                                                                                                                                                                                                                      +- Project [PatientId#65, Chunk#119, GlucoseDisplayTime#64, Value#66, y_Binary#83, index#107, y_summary_binary#133, Mean#135, Std Dev#145, Median#147, Min#149, Max#151, CountBelow#153L, CountAbove#155L, PercentageBelow#157, PercentageAbove#159]
                                                                                                                                                                                                                                                                                                                                                         +- Join Inner, ((PatientId#65 = PatientId#212) AND (Chunk#119 = Chunk#209))
                                                                                                                                                                                                                                                                                                                                                            :- Project [GlucoseDisplayTime#64, PatientId#65, Value#66, y_Binary#83, index#107, cast((cast(index#107 as double) / cast(12 as double)) as int) AS Chunk#119]
                                                                                                                                                                                                                                                                                                                                                            :  +- Project [GlucoseDisplayTime#64, PatientId#65, Value#66, y_Binary#83, index#107]
                                                                                                                                                                                                                                                                                                                                                            :     +- Project [GlucoseDisplayTime#64, PatientId#65, Value#66, y_Binary#83, index#107, index#107]
                                                                                                                                                                                                                                                                                                                                                            :        +- Window [rank(GlucoseDisplayTime#64) windowspecdefinition(PatientId#65, GlucoseDisplayTime#64 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS index#107], [PatientId#65], [GlucoseDisplayTime#64 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                                                                            :           +- Project [GlucoseDisplayTime#64, PatientId#65, Value#66, y_Binary#83]
                                                                                                                                                                                                                                                                                                                                                            :              +- Project [GlucoseDisplayTime#64, PatientId#65, Value#66, CASE WHEN (Value#66 > cast(180 as float)) THEN 1 WHEN (Value#66 < cast(70 as float)) THEN 1 ELSE 0 END AS y_Binary#83]
                                                                                                                                                                                                                                                                                                                                                            :                 +- FlatMapGroupsInPandas [PatientId#0], transform_features(PatientId#0, Value#1, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#3, GlucoseDisplayDate#4)#63, [GlucoseDisplayTime#64, PatientId#65, Value#66]
                                                                                                                                                                                                                                                                                                                                                            :                    +- Project [PatientId#0, PatientId#0, Value#1, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#3, GlucoseDisplayDate#4]
                                                                                                                                                                                                                                                                                                                                                            :                       +- Sort [PatientId#0 ASC NULLS FIRST, GlucoseDisplayTime#20 ASC NULLS FIRST], true
                                                                                                                                                                                                                                                                                                                                                            :                          +- Deduplicate [Value#1, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#3, GlucoseDisplayDate#4, PatientId#0]
                                                                                                                                                                                                                                                                                                                                                            :                             +- Project [PatientId#0, Value#1, date_trunc(minute, GlucoseDisplayTime#2, Some(Etc/UTC)) AS GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#3, GlucoseDisplayDate#4]
                                                                                                                                                                                                                                                                                                                                                            :                                +- Relation [PatientId#0,Value#1,GlucoseDisplayTime#2,GlucoseDisplayTimeRaw#3,GlucoseDisplayDate#4] parquet
                                                                                                                                                                                                                                                                                                                                                            +- Aggregate [PatientId#212, Chunk#209], [PatientId#212, Chunk#209, max(y_binary#83) AS y_summary_binary#133, avg(Value#213) AS Mean#135, stddev_samp(cast(Value#213 as double)) AS Std Dev#145, percentile_approx(Value#213, 0.5, 10000, 0, 0) AS Median#147, min(Value#213) AS Min#149, max(Value#213) AS Max#151, count(CASE WHEN (Value#213 < cast(70 as float)) THEN 1 END) AS CountBelow#153L, count(CASE WHEN (Value#213 > cast(180 as float)) THEN 1 END) AS CountAbove#155L, (cast(count(CASE WHEN (Value#213 < cast(70 as float)) THEN 1 END) as double) / cast(12 as double)) AS PercentageBelow#157, (cast(count(CASE WHEN (Value#213 > cast(180 as float)) THEN 1 END) as double) / cast(12 as double)) AS PercentageAbove#159]
                                                                                                                                                                                                                                                                                                                                                               +- Project [GlucoseDisplayTime#211, PatientId#212, Value#213, y_Binary#83, index#107, cast((cast(index#107 as double) / cast(12 as double)) as int) AS Chunk#209]
                                                                                                                                                                                                                                                                                                                                                                  +- Project [GlucoseDisplayTime#211, PatientId#212, Value#213, y_Binary#83, index#107]
                                                                                                                                                                                                                                                                                                                                                                     +- Project [GlucoseDisplayTime#211, PatientId#212, Value#213, y_Binary#83, index#107, index#107]
                                                                                                                                                                                                                                                                                                                                                                        +- Window [rank(GlucoseDisplayTime#211) windowspecdefinition(PatientId#212, GlucoseDisplayTime#211 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS index#107], [PatientId#212], [GlucoseDisplayTime#211 ASC NULLS FIRST]
                                                                                                                                                                                                                                                                                                                                                                           +- Project [GlucoseDisplayTime#211, PatientId#212, Value#213, y_Binary#83]
                                                                                                                                                                                                                                                                                                                                                                              +- Project [GlucoseDisplayTime#211, PatientId#212, Value#213, CASE WHEN (Value#213 > cast(180 as float)) THEN 1 WHEN (Value#213 < cast(70 as float)) THEN 1 ELSE 0 END AS y_Binary#83]
                                                                                                                                                                                                                                                                                                                                                                                 +- FlatMapGroupsInPandas [PatientId#204], transform_features(PatientId#204, Value#205, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#207, GlucoseDisplayDate#208)#63, [GlucoseDisplayTime#211, PatientId#212, Value#213]
                                                                                                                                                                                                                                                                                                                                                                                    +- Project [PatientId#204, PatientId#204, Value#205, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#207, GlucoseDisplayDate#208]
                                                                                                                                                                                                                                                                                                                                                                                       +- Sort [PatientId#204 ASC NULLS FIRST, GlucoseDisplayTime#20 ASC NULLS FIRST], true
                                                                                                                                                                                                                                                                                                                                                                                          +- Deduplicate [Value#205, GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#207, GlucoseDisplayDate#208, PatientId#204]
                                                                                                                                                                                                                                                                                                                                                                                             +- Project [PatientId#204, Value#205, date_trunc(minute, GlucoseDisplayTime#206, Some(Etc/UTC)) AS GlucoseDisplayTime#20, GlucoseDisplayTimeRaw#207, GlucoseDisplayDate#208]
                                                                                                                                                                                                                                                                                                                                                                                                +- Relation [PatientId#204,Value#205,GlucoseDisplayTime#206,GlucoseDisplayTimeRaw#207,GlucoseDisplayDate#208] parquet


In [20]:
from pyspark.sql.types import DoubleType, FloatType

In [25]:
udf1 = udf(lambda x : int(x[0]),FloatType())

for num_feature in scaled_feats:
    pyspark_numerical_features=pyspark_numerical_features.withColumn(num_feature, udf1(col(num_feature)))

AnalysisException: Reference 'mean_lag_1_scaled' is ambiguous, could be: mean_lag_1_scaled, mean_lag_1_scaled.

In [None]:
from Model_Creation.pyspark_xgboost import Create_PySpark_XGBoost
create_pyspark_xgboost=Create_PySpark_XGBoost()

In [None]:
pyspark_numerical_features.schema

In [None]:
test_model=create_pyspark_xgboost.xgboost_classifier(ml_df=pyspark_numerical_features)

In [None]:
####### PySpark
pyspark_xgboost_classsification_schema=pandas_udf_data_schema.xgboost_classification_schema()

classification_model_outputs=xgboost_classification.pyspark_xgboost(df=pyspark_lag_features_creation, 
                                                                    output_schema=pyspark_xgboost_classsification_schema, 
                                                                    train_split=ml_models_train_split, 
                                                                    test_split=ml_models_test_split)

classification_model_outputs.show()

### 12. PySpark: Model Evaluation

In [None]:
####### PySpark
pyspark_classification_metric_schema=pandas_udf_data_schema.classification_metric_schema()

classification_metric_df=classification_evalaution_metrics.pyspark_classification_model_evaluation_metrics(df=classification_model_outputs, 
                                                                                                           output_schema=pyspark_classification_metric_schema)
classification_metric_df.show()                    

### 13. PySpark: XGBoost Classification Feature Importance

In [None]:
xgboost_classification_plot.read_model_plot_variance(model_storage_location=model_storage_location)

# Pandas

### Pandas: Reading in Data

In [None]:
####### Pandas
pandas_df=reading_data.read_in_pandas()
pandas_df.head(1)

### Pandas: Custom Imputation Pipeline

In [None]:
test_pat=pandas_df[pandas_df['PatientId']=='tHu8WPnIffml5CL+AbOBkXcbFApQnP06KdrHbjinta4=']

In [None]:
pandas_custom_imputation_pipeline=pandas_sklearn_pipeline.pandas_custom_imputation_pipeline(df=pandas_df)
pandas_custom_imputation_pipeline.head(1)

In [None]:
####### Pandas
pandas_custom_imputation_pipeline=pandas_sklearn_pipeline.pandas_custom_imputation_pipeline(df=pandas_df)
pandas_custom_imputation_pipeline.head(1)

### Pandas: Aggregate Data at Level

### Pandas: Adding Binary Labels

In [None]:
pandas_df_added_binary_labels=create_binary_labels.pandas_binary_labels(df=pandas_custom_imputation_pipeline, 
                                                                        lower=daily_stats_features_lower, 
                                                                        upper=daily_stats_features_upper)
pandas_df_added_binary_labels.head(1)

### Pandas: Features: Summary Statistics

In [None]:
pandas_features_summary_stats=summary_stats_features.pandas_compressDailyValues(data=pandas_df_added_binary_labels, 
                                                                                lower=daily_stats_features_lower, 
                                                                                upper=daily_stats_features_upper)
pandas_features_summary_stats.head(1)

### Pandas: Wrapper Functions

### Pandas: Sklearn Categorical Pipeline in Pandas

In [None]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
df=pandas_features_summary_stats[['PatientId', 'Value', 'GlucoseDisplayTime', 'GlucoseDisplayDate', 'inserted', 
        'missing', 'y_Binary', 'Median', 'Mean', 'Std Dev', 'Max', 'Min', 'AreaBelow', 'AreaAbove']]

In [None]:
df.head()

In [None]:
for patient_id in df['PatientId'].unique():
    # Categorical Features
    categorical_features=['inserted', 'missing']
    categorical_transformer=Pipeline([('imputer_cat', SimpleImputer(strategy='constant', fill_value=np.nan)),
                                        ('onehot', OneHotEncoder(handle_unknown='ignore'))])

    preprocessor_2=ColumnTransformer([('categorical', categorical_transformer, categorical_features)],
                                    remainder = 'passthrough')

    cat_pipe_pipeline=Pipeline([('preprocessing_2', preprocessor_2)])

    transformed_data1=cat_pipe_pipeline.fit_transform(df)

    transformed_data_df=pd.DataFrame(transformed_data1)

    transformed_data_df['combine_inserted']=transformed_data_df[[0,1]].values.tolist()
    transformed_data_df['combine_missing']=transformed_data_df[[2,3]].values.tolist()
    transformed_data_df=transformed_data_df.drop(transformed_data_df.iloc[:, 0:4],axis = 1)

    transformed_data_df.columns=['PatientId', 'Value', 'GlucoseDisplayTime', 'GlucoseDisplayDate', 
                                    'y_Binary', 'Median', 'Mean', 'Std Dev', 'Max', 'Min', 'AreaBelow', 
                                    'AreaAbove', 'inserted', 'missing']

In [None]:
transformed_data_df.columns

In [None]:
####### Pandas
pandas_custom_categorical_pipeline=pandas_sklearn_pipeline.pandas_transform_categorical_features(df=pandas_features_summary_stats)
pandas_custom_categorical_pipeline.head(1)

### Pandas: Sklearn Numerical Pipeline in Pandas

In [None]:
####### Pandas
pandas_custom_numerical_pipeline=pandas_sklearn_pipeline.pandas_transform_numerical_features(df=pandas_custom_categorical_pipeline)
pandas_custom_numerical_pipeline.head(1)

### Pandas: Models