In [198]:
import pandas as pd
import numpy

In [6]:
raw_df = pd.read_hdf('Machine Learning\data.h5', 'raw')

In [62]:
print(raw_df)

            timestamp  foreperiod_start  foreperiod_end  response_received  \
0 2016-11-06 15:58:13          57982935        57986553           57986889   

   subject  trial  test  requested_foreperiod  actual_foreperiod  \
0        1      0    80                  3607               3618   

   response_time note  tag garbage_collection  
0            336  NaN  NaN              False  


In [11]:
def get_quantile(data, q):
    """Takes series of values and returns quantile limit as well as the mean of the values above the quantile.
    data: Data as pandas Series.
    q: Quantile (0.75 -> 75%)
    returns: quantile limit, mean value of elements above quantile limit"""
    quantile_limit = data.quantile(q=q)
    quantile_mean = data[data >= quantile_limit].mean()
    return quantile_limit, quantile_mean

In [15]:
def compute_features(test_df, verbose=False):
    """ Takes PVT test results and returns feature vector as a result.
    test_df: Dataframe containing PVT test results.
    Returns: Series containing the feature vector.
    """
    test_time = test_df.timestamp.iloc[0]
    n = test_df.shape[0]
    positive_data = test_df[test_df.response_time > 0] # drop all "too early samples"
    n_positive = positive_data.shape[0]
    positive_mean = positive_data.response_time.mean()
    positive_median = positive_data.response_time.median()
    positive_std = positive_data.response_time.std()
    q50_lim, q50_mean = get_quantile(positive_data.response_time, 0.50)
    q75_lim, q75_mean = get_quantile(positive_data.response_time, 0.75)
    q90_lim, q90_mean = get_quantile(positive_data.response_time, 0.90)
    q95_lim, q95_mean = get_quantile(positive_data.response_time, 0.95)
    features = pd.Series({'Test_time': test_time, 
                          'Subject': test_df.subject.iloc[0], 
                          'Test_nr': test_df.test.iloc[0], 
                          'n_total': n,
                          'n_positive': n_positive,
                          'positive_mean': positive_mean,
                          'positive_median': positive_median,
                          'positive_std': positive_std,
                          'q50_lim': q50_lim, 
                          'q75_lim': q75_lim,
                          'q90_lim': q90_lim,
                          'q95_lim': q95_lim,
                          'q50_mean': q50_mean,
                          'q75_mean': q75_mean,
                          'q90_mean': q90_mean,
                          'q95_mean': q95_mean})
    if verbose: print(features)
    return features

In [16]:
feature_df = pd.DataFrame()

for subject_id, subject_df in raw_df.groupby(raw_df.subject):
    for test_id, test_df in subject_df.groupby(subject_df.test):
        feature_df = feature_df.append(compute_features(test_df), ignore_index=True)
        feature_df.reset_index(inplace=True, drop=True)

In [18]:
# Compute the time of day as a float
h = feature_df.Test_time.apply(lambda x: x.hour)
m = feature_df.Test_time.apply(lambda x: x.minute)
feature_df['time_of_day'] = h + (m/60.0)

In [23]:
print(feature_df[:1])

   Subject  Test_nr           Test_time  n_positive  n_total  positive_mean  \
0      1.0     28.0 2016-10-31 07:24:08        40.0     43.0        346.725   

   positive_median  positive_std  q50_lim    q50_mean  q75_lim    q75_mean  \
0            327.0       85.6723    327.0  397.238095    364.0  449.727273   

   q90_lim  q90_mean  q95_lim  q95_mean  time_of_day  
0    431.6    559.75   497.65     633.0          7.4  


In [24]:
labels_df = pd.read_hdf('Machine Learning\data.h5', 'labels')

In [119]:
print(labels_df[:1])

   Alarmclock  Alcohol  Caffeine  Food  Medication  Nicotine  Participant_ID  \
0           1        0         0     0           0         0               1   

   Sleep  Sports       Time  Workday  
0      1       0 2016-10-31        1  


In [216]:
#Some test code - before clearing
for p in set(labels_df['Participant_ID']):
    arrtryW = []
    arrtryF = []
    datesleeparr = []
    sleepper = 0
    for i in range (0,len(labels_df)):
        if labels_df['Participant_ID'][i]==p:
        # Variables
            datetime = labels_df['Time'][i]
            date = datetime.date()
            hour = datetime.hour
            mins = datetime.minute
            time = hour + mins/60
            mc=0
            
        # WORKDAYS COUNTING    
            if labels_df['Workday'][i]==1:
                arrtryW.append(date)
            if labels_df['Workday'][i]==0:
                arrtryF.append(date)
        # END OF WORKDAYS COUNTING
    
            if labels_df['Sleep'][i]==1:
                sleepper+=1
                if sleepper==1:
                    gosleepdt = datetime
                    gosleeptime = time
            else:
                if labels_df['Sleep'][i]==0 and sleepper!=0:
                    mc = (gosleeptime + sleepper*0.125)
                    datesleeparr.append([p, date, sleepper*0.25, gosleeptime, time,sleepper*0.125, mc, mc%24, labels_df['Alarmclock'][i]])
                    sleepper = 0
                   
    print('Part: ',p)
    #print(sorted(set(arrtryW)))
    #print(sorted(set(arrtryF)))
    #print(len(set(arrtryW)))
    #print(len(set(arrtryF)))
    for item in datesleeparr:
        print(item) 


Part:  1
[1, datetime.date(2016, 10, 31), 7.5, 0.0, 7.5, 3.75, 3.75, 3.75, 1]
[1, datetime.date(2016, 11, 1), 9.0, 0.0, 9.0, 4.5, 4.5, 4.5, 0]
[1, datetime.date(2016, 11, 2), 8.75, 23.0, 7.75, 4.375, 27.375, 3.375, 1]
[1, datetime.date(2016, 11, 3), 9.25, 22.5, 7.75, 4.625, 27.125, 3.125, 1]
[1, datetime.date(2016, 11, 4), 8.5, 0.75, 9.25, 4.25, 5.0, 5.0, 1]
[1, datetime.date(2016, 11, 5), 8.75, 2.25, 11.0, 4.375, 6.625, 6.625, 1]
[1, datetime.date(2016, 11, 6), 10.5, 23.5, 10.0, 5.25, 28.75, 4.75, 1]
[1, datetime.date(2016, 11, 7), 5.75, 22.25, 4.0, 2.875, 25.125, 1.125, 1]
[1, datetime.date(2016, 11, 8), 8.0, 23.0, 7.0, 4.0, 27.0, 3.0, 1]
Part:  2
[2, datetime.date(2016, 10, 31), 6.0, 0.0, 6.0, 3.0, 3.0, 3.0, 1]
[2, datetime.date(2016, 11, 1), 6.25, 0.75, 7.0, 3.125, 3.875, 3.875, 1]
[2, datetime.date(2016, 11, 2), 6.0, 1.0, 7.0, 3.0, 4.0, 4.0, 0]
[2, datetime.date(2016, 11, 4), 7.0, 23.75, 6.75, 3.5, 27.25, 3.25, 1]
[2, datetime.date(2016, 11, 5), 7.0, 23.75, 6.75, 3.5, 27.25, 3.25,

In [222]:
# Output arrays
particMSFsc = []
particMSFperday = []

#getting sleep info before cleaning
for p in set(labels_df['Participant_ID']):
    datesleeparr = []
    sleepper = 0
    for i in range (0,len(labels_df)):
        if labels_df['Participant_ID'][i]==p:
        # Variables
            datetime = labels_df['Time'][i]
            date = datetime.date()
            hour = datetime.hour
            mins = datetime.minute
            time = hour + mins/60
            mc=0
               
            if labels_df['Sleep'][i]==1:
                sleepper+=1
                if sleepper==1:
                    gosleepdt = datetime
                    gosleeptime = time
            else:
                if labels_df['Sleep'][i]==0 and sleepper!=0:
                    mc = (gosleeptime + sleepper*0.125)%24
                    datesleeparr.append([p, date, sleepper*0.25, gosleeptime, time, mc, labels_df['Alarmclock'][i], labels_df['Workday'][i]])
                    sleepper = 0
                   
    #print('Part: ',p)
    
    # Merging sleep periods, gap between which is less than 45 mins
    datesleeparrM = []
    for k in range (0, len(datesleeparr)-1):
        delt = (datesleeparr[k+1][3]-datesleeparr[k][4])
        if ((delt<0) and (24 + delt)<=0.75 and datesleeparr[k+1][1].day-datesleeparr[k][1].day<=1):
            datesleeparr[k+1][3]=datesleeparr[k][3]
            datesleeparr[k+1][2]=datesleeparr[k+1][2]+delt+24+datesleeparr[k][2]
            datesleeparr[k+1][5] =(datesleeparr[k+1][3] + datesleeparr[k+1][2]*0.5)%24
        else:
            if (delt<=0.75 and delt>=0 and datesleeparr[k+1][1].day-datesleeparr[k][1].day<=1):
                datesleeparr[k+1][3]=datesleeparr[k][3]
                datesleeparr[k+1][2]=datesleeparr[k+1][2]+delt+datesleeparr[k][2]
                datesleeparr[k+1][5] =(datesleeparr[k+1][3] + datesleeparr[k+1][2]*0.5)%24
            else:
                datesleeparrM.append(datesleeparr[k])
    datesleeparrM.append(datesleeparr[len(datesleeparr)-1])
    
    # Cleaning short sleep periods per day
    datesleeparr = []    
    a=datesleeparrM[0]
    for k in range (0, len(datesleeparrM)-1):
        if datesleeparrM[k][1]==datesleeparrM[k+1][1]:
            if datesleeparrM[k][2]>datesleeparrM[k+1][2]:
                a=datesleeparrM[k]
            else:
                a=datesleeparrM[k+1]
        else:
            datesleeparr.append(a)
            a = datesleeparrM[k+1] 
    datesleeparr.append(a)

    #for item in datesleeparr:
    #    print(item) 
    
    #---------------------
    # Counting MSFsc
    #---------------------
    
    durWD = 0
    durFD = 0
    countWD = 0
    countFD = 0

    for k1 in range (0,7):
        if datesleeparr[k1][7]==1:
            durWD += datesleeparr[k1][2]
            countWD +=1
        else:
            durFD += datesleeparr[k1][2]
            countFD +=1
    if countWD!=0:
        sdw = durWD/countWD
    else:
        sdw = 0
    if countFD!=0:
        sdf = durFD/countFD
    else:
        sdf = 0
        
    sdweek = (durWD+durFD)/7
    
    MSFmax = 0
    DurMax = 0
    for k1 in range (0,len(datesleeparr)):
        if datesleeparr[k1][6]==0:
            if datesleeparr[k1][2]>DurMax:
                MSFmax = datesleeparr[k1][5]
                DurMax = datesleeparr[k1][2]
    #print('MSFmax: ', MSFmax)        
    
    if sdw<sdf:
        MSFsc =  MSFmax - (sdf-sdweek)/2
    else: MSFsc = MSFmax
        
    particMSFsc.append([p, MSFsc])
    for k1 in range (0,len(datesleeparr)):
        particMSFperday.append([p, datesleeparr[k1][1], datesleeparr[k1][5]])
    
    #print('SDw: ', sdw, durWD, countWD)
    #print('SDf: ', sdf, durFD, countFD)
    #print('SDweek: ', sdweek)
    #print('MSFsc: ', MSFsc)

In [224]:
print(particMSFsc)
print(" ")
for item in particMSFperday:
    print(item)

[[1, 4.238095238095238], [2, 3.107142857142857], [3, 5.928571428571429], [4, 6.375], [5, 5.0], [7, 4.0], [8, 3.8571428571428568]]
 
[1, datetime.date(2016, 10, 31), 3.75]
[1, datetime.date(2016, 11, 1), 4.5]
[1, datetime.date(2016, 11, 2), 3.375]
[1, datetime.date(2016, 11, 3), 3.125]
[1, datetime.date(2016, 11, 4), 5.0]
[1, datetime.date(2016, 11, 5), 6.625]
[1, datetime.date(2016, 11, 6), 4.75]
[1, datetime.date(2016, 11, 7), 1.125]
[1, datetime.date(2016, 11, 8), 3.0]
[2, datetime.date(2016, 10, 31), 3.0]
[2, datetime.date(2016, 11, 1), 3.875]
[2, datetime.date(2016, 11, 2), 4.0]
[2, datetime.date(2016, 11, 4), 3.25]
[2, datetime.date(2016, 11, 5), 3.25]
[2, datetime.date(2016, 11, 6), 3.75]
[2, datetime.date(2016, 11, 7), 3.25]
[3, datetime.date(2016, 10, 31), 4.5]
[3, datetime.date(2016, 11, 1), 6.5]
[3, datetime.date(2016, 11, 2), 6.125]
[3, datetime.date(2016, 11, 3), 4.875]
[3, datetime.date(2016, 11, 4), 7.5]
[3, datetime.date(2016, 11, 5), 8.125]
[3, datetime.date(2016, 11, 6

In [196]:
test_time_try = labels_df['Time'][1]
#test_time_try = labels_df.Time.timestamp.iloc[0]
h = test_time_try.date()
#h = test_time_try.apply(lambda x: x.hour)
#m = test_time_try.apply(lambda x: x.minute)
print(h)
print(str(h))
t = '2016-10-31'
t1 = str(h)
#try3 = t.strftime('%Y-%m-%d %H:%M:%S')
st3 = date.strftime(t)
try123 = test_time_try.day
print(try123)

st = datetime.datetime.strptime(t,'%Y-%m-%d')

print(try123)
print(st3)
print(st)
print('jc')
st1 = datetime.datetime.strptime(t1,'%Y-%m-%d')
print(st1)

import time
import datetime

s = "01/12/2011"

2016-10-31
2016-10-31
31


AttributeError: 'Timestamp' object has no attribute 'datetime'