In [97]:
from imports import *
import datetime
pd.set_option('display.max_rows', None)

In [98]:
def sleep_analysis(df, sleep_break=30):
    prev = 0
    sleep = 0
    start = end = 0
    SPT_start = SPT_end = 0
    sleep_max = 0

    for _, row in df.iterrows():
        curr = row.timeblock
        if (np.round((curr - prev), 2)) <= (sleep_break/10):
            sleep += 1
            end = curr
        else:
            sleep = 0
            start = curr
            end = curr
        
        if sleep>sleep_max:
            sleep_max = sleep
            SPT_start = np.round(start, 2)
            SPT_end = np.round(end, 2)

        prev = row.timeblock
    
    sleep_max = np.round((sleep_max*10/60), 2)
    SPT_start_time = '{:02d}:{:02d}'.format(*divmod(int(SPT_start*10), 60))
    SPT_end_time = '{:02d}:{:02d}'.format(*divmod(int(SPT_end*10), 60))
#     print(f"sleep_max:{sleep_max}hrs SPT_start:{SPT_start}, SPT_end:{SPT_end}, no_sleep_interruptions:{no_sleep_interruptions}, duration_of_sleep_interruptions:{duration_of_sleep_interruptions}")
    # return the sleep times detected by the algorithm
    return  sleep_max, SPT_start_time, SPT_end_time

In [99]:
# screen state data
# dataFilename1 = "Lock_state.csv"
dataFilename1 = "Lock_state_temp.csv"
# brightness data
# dataFilename2 = "Brightness.csv"
dataFilename2 = "Brightness_temp.csv"
# accelerometer data
# dataFilename3 = "Accelerometer.csv"
dataFilename3 = "Accelerometer_temp.csv"
# gyroscope data
# dataFilename4 = "Gyroscope.csv"
dataFilename4 = "Gyroscope_temp.csv"

dataPath = "/csv/backup_frigg1"

pre processing screenstate data

In [100]:
# read file with a header
header_list = ["id", "participant", "attribute", "lockstate", "timestamp", "uploadtimestamp"]
screenstate = pd.read_csv(os.path.join(dataPath, dataFilename1), sep="|")
screenstate.columns = header_list

# removing DND and social media participants
screenstate = screenstate[~screenstate["participant"].str.contains("DND")]
screenstate = screenstate[~screenstate["participant"].str.contains("SM")]

#change time to Halifax time
screenstate["timestamp"] = pd.to_datetime(screenstate["timestamp"], utc=True)
screenstate["timestamp"] = pd.to_datetime(screenstate["timestamp"]).dt.tz_convert(tz='America/Halifax')
screenstate["timestamp"] = pd.to_datetime(screenstate["timestamp"], utc=False)

#add new columns to help extract features
screenstate["date"] = pd.to_datetime(screenstate["timestamp"]).dt.date
screenstate["timeblock"] = (screenstate["timestamp"].dt.hour * 6) + (screenstate["timestamp"].dt.minute/10).astype(int)

# sort data, remove duplicates and drop unecessary columns
screenstate = screenstate.sort_values(["participant", "timestamp"]).reset_index(drop=True)
screenstate.drop_duplicates(subset=["participant", "timestamp", "lockstate"], keep="last", inplace=True)
screenstate.drop(["id", "timestamp", "attribute", "uploadtimestamp"], axis=1, inplace=True)


#keeping only those rows which indicate screen usage
screenstate = screenstate[(screenstate.lockstate == "LOCKED") | (screenstate.lockstate == "UNLOCKED")]

In [101]:
screenstate.head()

Unnamed: 0,participant,lockstate,date,timeblock
0,PROSIT0003,UNLOCKED,2020-07-24,75
1,PROSIT0003,UNLOCKED,2020-07-24,76
2,PROSIT0003,UNLOCKED,2020-07-24,77
3,PROSIT0003,UNLOCKED,2020-07-24,77
4,PROSIT0003,UNLOCKED,2020-07-25,55


In [102]:
# imputing missing values
screenstateImputed = pd.DataFrame()
participants = screenstate.participant.unique()

timeblock = [i for i in range(0,144)]
allMinutes = pd.DataFrame({"timeblock":timeblock})

for participant in participants:
    screenstate_participant = screenstate[screenstate.participant == participant].copy()
    dates = screenstate_participant.date.unique()

    for i, date in enumerate(dates):
        screenstate_participant_date = screenstate_participant[screenstate_participant.date == date].copy()    
        screenstate_participant_date = pd.merge(screenstate_participant_date, allMinutes, how="right", on="timeblock")

        screenstate_participant_date.ffill(inplace=True)
        screenstate_participant_date.bfill(inplace=True)
        screenstateImputed = pd.concat([screenstateImputed, screenstate_participant_date], axis=0)

In [103]:
screenstateImputed.head()

Unnamed: 0,participant,lockstate,date,timeblock
0,PROSIT0003,UNLOCKED,2020-07-24,0
1,PROSIT0003,UNLOCKED,2020-07-24,1
2,PROSIT0003,UNLOCKED,2020-07-24,2
3,PROSIT0003,UNLOCKED,2020-07-24,3
4,PROSIT0003,UNLOCKED,2020-07-24,4


pre processing brightness data

In [19]:
# read file with a header
header_list = ["id", "participant", "attribute", "brightnesslevel", "timestamp", "uploadtimestamp"]
brightness = pd.read_csv(os.path.join(dataPath, dataFilename2), sep="|", header=None)
brightness.columns = header_list

#change time to Halifax time
brightness["timestamp"] = pd.to_datetime(brightness["timestamp"], utc=True)
brightness["timestamp"] = pd.to_datetime(brightness["timestamp"]).dt.tz_convert(tz='America/Halifax')
brightness["timestamp"] = pd.to_datetime(brightness["timestamp"], utc=False)

#add new columns to help extract features
brightness["date"] = brightness["timestamp"].dt.date
brightness["timeblock"] = (brightness["timestamp"].dt.hour * 6) + (np.floor(brightness["timestamp"].dt.minute / 10)).astype(int)

# sort data, remove duplicates and drop unecessary columns
brightness = brightness.sort_values(["participant", "timestamp"]).reset_index(drop=True)
brightness.drop_duplicates(subset=["participant", "timestamp", "brightnesslevel"], keep="last", inplace=True)
brightness.drop(["id", "timestamp", "attribute", "uploadtimestamp"], axis=1, inplace=True)

brightness = brightness.groupby(["participant", "date", "timeblock"]).aggregate(["mean", "std", "min", "max"]).reset_index()

header_list = ["participant", "date", "timeblock", "brightness_mean", "brightness_std", "brightness_min", "brightness_max"]
brightness.columns = header_list
print("Preprocessing completed for brightness data")

Preprocessing completed for brightness data


pre processing accelerometer data

In [114]:
# read file with header
header_list = ["id", "participant", "attribute", "accx", "accy", "accz", "timestamp", "uploadtimestamp"]
accelerometer = pd.read_csv(os.path.join(dataPath, dataFilename3), sep="|", header=None)
accelerometer.columns = header_list

#change time to Halifax time
accelerometer["timestamp"] = pd.to_datetime(accelerometer["timestamp"], utc=True)
accelerometer["timestamp"] = pd.to_datetime(accelerometer["timestamp"]).dt.tz_convert(tz='America/Halifax')
accelerometer["timestamp"] = pd.to_datetime(accelerometer["timestamp"], utc=False)

#add new columns to help extract features
accelerometer["date"] = accelerometer["timestamp"].dt.date
accelerometer["timeblock"] = (accelerometer["timestamp"].dt.hour * 6) + (np.floor(accelerometer["timestamp"].dt.minute / 10)).astype(int)

# sort data, remove duplicates and drop unecessary columns
accelerometer = accelerometer.sort_values(["participant", "timestamp"]).reset_index(drop=True)
accelerometer.drop_duplicates(subset=["participant", "timestamp", "accx", "accy", "accz"], keep="last", inplace=True)
accelerometer.drop(["id", "timestamp", "attribute", "uploadtimestamp"], axis=1, inplace=True)

# ENMO : Euclidean Norm Minus One (ENMO) with negative values rounded to zero in g has been shown to correlate with the magnitude of acceleration and human energy expenditure
accelerometer["acc"] = np.sqrt((accelerometer["accx"]**2) + (accelerometer["accy"]**2) + (accelerometer["accz"]**2))
# accelerometer["acc"] = np.maximum(0, (np.sqrt((accelerometer["accelerationx"]**2) + (accelerometer["accelerationy"]**2) + (accelerometer["accelerationz"]**2)) - 1))

# dropping the accelerometer columns
accelerometer.drop(["accx", "accy", "accz"], axis=1, inplace=True)

#mean #std min max
accelerometer = accelerometer.groupby(["participant", "date", "timeblock"]).agg(["mean"]).reset_index()
accelerometer.columns = ['_'.join(col).strip() if col[1]!="" else col[0] for col in accelerometer.columns.values ]

print("Preprocessing completed for accelerometer data")

Preprocessing completed for accelerometer data


In [113]:
# ['_'.join(col).strip() for col in accelerometer.columns.values]


['participant', 'date', 'timeblock', 'acc_mean']

pre processing gyroscope data

In [115]:
# read file with header
header_list = ["id", "participant", "attribute", "gyrox", "gyroy", "gyroz", "timestamp", "uploadtimestamp"]
gyroscope = pd.read_csv(os.path.join(dataPath, dataFilename4), sep="|", header=None)
gyroscope = gyroscope.sample(frac=0.001)
gyroscope.columns = header_list

#change time to Halifax time
gyroscope["timestamp"] = pd.to_datetime(gyroscope["timestamp"], utc=True)
gyroscope["timestamp"] = pd.to_datetime(gyroscope["timestamp"]).dt.tz_convert(tz='America/Halifax')
gyroscope["timestamp"] = pd.to_datetime(gyroscope["timestamp"], utc=False)

#add new columns to help extract features
gyroscope["date"] = gyroscope["timestamp"].dt.date
gyroscope["time"] = gyroscope["timestamp"].dt.strftime('%H:%M:%S')
gyroscope["timeblock"] = (gyroscope["timestamp"].dt.hour * 6) + (np.floor(gyroscope["timestamp"].dt.minute / 10)).astype(int)

# sort data, remove duplicates and drop unecessary columns
gyroscope = gyroscope.sort_values(["participant", "timestamp"]).reset_index(drop=True)
gyroscope.drop_duplicates(subset=["participant", "timestamp", "gyrox", "gyroy", "gyroz"], keep="last", inplace=True)
gyroscope.drop(["id", "timestamp", "attribute", "uploadtimestamp"], axis=1, inplace=True)

# ENMO : Euclidean Norm Minus One (ENMO) with negative values rounded to zero in g has been shown to correlate with the magnitude of acceleration and human energy expenditure
gyroscope["gyr"] = np.sqrt((gyroscope["gyrox"]**2) + (gyroscope["gyroy"]**2) + (gyroscope["gyroz"]**2))
# gyroscope["gyr"] = np.maximum(0, (np.sqrt((gyroscope["gyrox"]**2) + (gyroscope["gyroy"]**2) + (gyroscope["gyroz"]**2)) - 1))

# dropping the gyroscope columns
gyroscope.drop(["gyrox", "gyroy", "gyroz"], axis=1, inplace=True)

#mean #std min max
gyroscope = gyroscope.groupby(["participant", "date", "timeblock"]).agg(["mean"]).reset_index()
gyroscope.columns = ['_'.join(col).strip() if col[1]!="" else col[0] for col in gyroscope.columns.values ]

print("Preprocessing completed for gyroscope data")

Preprocessing completed for gyroscope data
