# <font color='darkblue'> Generate sleep quantitative features from raw activity count </font>

# <font color='blue'> MESA dataset</font>


### Project structure as follows:
    Teva
        /sleep-wake
            /code
            /data
                /mesa
                    /actigraphy/
                        mesa-sleep-0001.csv
                        mesa-sleep-0002.csv
                        ...
                        
            /tsfresh-outputs


### Packages

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# TSFRESH - for installation: run "conda install -c conda-forge tsfresh" 
from tsfresh import extract_features
from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters

### Set current work directory

In [2]:
# set current actigraphy data directory
main_dir = '/Users/nancy/PycharmProjects/Teva/sleep-wake/'
dir = os.path.join(main_dir, 'data/mesa/actigraphy/')


# Functions

In [3]:
def read_activity_data(file_path):

    try:
        print("\nReading dataset from:", file_path)

        df = pd.read_csv(file_path, sep=',', index_col=0)
    
        #print(df.tail(20))

        return df
    
    except FileNotFoundError:
        print("FileNotFoundError: No such file: ", file_path, ". Moving on ..")
        



def preprocess_activity_data(df):
    
    df.reset_index(inplace=True)
   
    print("Reducing dataset columns - to include only relevant columns")
    _df = df.loc[:, ('mesaid', 'linetime', 'activity')]
    
    print("Reducing dataset rows - to include only records with activity logged")
    _df.dropna(subset=['activity'], inplace=True)
    
    print("Count data reduced from:", df.shape, "to:", _df.shape  )

    return _df


def tsfresh_feature_extractor(activity_data, extractor_settings=None):
    """
    Auto features extractor from a pandas.DataFrame containing the different time series (e.g., "activity")
    
    return: pandas.DataFrame with the calculated features 


    """
    
    assert extractor_settings in [None, MinimalFCParameters(), EfficientFCParameters()], "specified extractor_settings is not supported"
    
    print("\nStrating to extract features..\nExtractor settings:", type(extractor_settings).__name__)
    
    from tsfresh.utilities.dataframe_functions import roll_time_series


    rolled_backward = roll_time_series(activity_data, 
                                              column_id='mesaid', 
                                              column_sort='linetime', 
                                              column_kind=None,
                                              rolling_direction=360)

    rolled_forward = roll_time_series(activity_data, 
                                              column_id='mesaid', 
                                              column_sort='linetime', 
                                              column_kind=None,
                                              rolling_direction=-360)

    print(rolled_backward)
    print(rolled_forward)
    
    extracted_features = extract_features(activity_data, 
                                              column_id="mesaid", 
                                              column_sort="linetime",
                                              default_fc_parameters=extractor_settings,
                                              n_jobs=4)
    print("Feature extraction completed! ", extracted_features.shape[1], "features were calculated.")
    
    return extracted_features


def test_tsfresh_feature_extractor(data, extractor_settings):
    
    print("test_tsfresh_feature_extractor")
    
    # work on small subset of rows "activity_data.head(100)"
    
    extracted_features = tsfresh_feature_extractor(activity_data=data.head(100), 
                                                   extractor_settings=None)
    # take a look at the results 
    extracted_features
    
    return extracted_features


def prepare_mesa_file_name_format(i):
    
    i = str(i).zfill(4) 

    # Set current file name to load and process 
    input_path = os.path.join(dir, str('mesa-sleep-' + str(i) + '.csv'))
    
    return input_path



# Load raw data (activity counts) and calculate features with TSFRESH

In [None]:

def main():
    
    
    """
    The program runs on the CSV files one-by-one
    At each iteration: load raw date, pre-process, extract features by tsfresh and save new dataset to file.
    
    input: CSV file -- raw activity data (actigraphy) of a given subject
    output: CSV file -- features extracted for ALL subjects
    
    """
    
    # settings for TSFRESH.  None / MinimalFCParameters() / EfficientFCParameters()
    # None will generate all features (heavy computations, take longer time)
    extractor_settings=MinimalFCParameters() 
    
    # list of dataframes -- holding a row of features for every subject
    data_to_write = []
    
    # Input file index 
    i='1'
    # number of subjects in the study
    n = '26' # TODO: adjust this to the relevant number
    
    for i in range(1, int(n)+1):
        
        input_path = prepare_mesa_file_name_format(i)

        #Load sleep dataset - activity counts (CSV)
        raw_data = read_activity_data(file_path=input_path)
        
        if raw_data is None: continue

        # Pre-processing
        activity_data = preprocess_activity_data(df=raw_data)

        # TSFRESH, note to set "extractor_settings"
        subject_features = tsfresh_feature_extractor(activity_data, 
                                            extractor_settings=extractor_settings)     
        
        # keep extracted features of the given subjects in a list of dataframs
        data_to_write.append(subject_features)
        
        # iteration index -- go to next file ...
        i = int(i) + 1
        # -----------------------------------------------------------------
    
    # tranform list of dataframes into a single dataframe (i.e., the final dataset)
    dataset = pd.concat(data_to_write)
    # Save dataset to file 
    output_file = os.path.join(main_dir, 
                               'tsfresh-outputs', 
                               'mesa-sleep-tsfresh-features.csv')
    dataset.to_csv(output_file)
    print("\n\nSaving final dataset file -- dataframe with extracted features saved at:", output_file)
        
        
    return None


if __name__ == "__main__":
    main()


Reading dataset from: /Users/nancy/PycharmProjects/Teva/sleep-wake/data/mesa/actigraphy/mesa-sleep-0001.csv
Reducing dataset columns - to include only relevant columns
Reducing dataset rows - to include only records with activity logged
Count data reduced from: (34607, 15) to: (28502, 3)

Strating to extract features..
Extractor settings: MinimalFCParameters


In [None]:
# ------------------------------------------------------------------------------------------
# TEST

#test_tsfresh_feature_extractor(data=activity_data, extractor_settings=MinimalFCParameters())