# Setup

In [1]:
%matplotlib inline

import numpy as np
import scipy.signal as sig
import scipy.stats as stat
import matplotlib.pyplot as plt
import seaborn as sns
import os
import h5py
import pandas as pd

from pandas import DataFrame,Series,read_table

In [2]:
savePlots = False    # whether or not to save plots
saveData = True # whether or not to save csv files

saveAsPath = './'


In [3]:
path = '/Users/svcanavan/Dropbox/Coding in progress/00_BudgieSleep/Data_copies/'

birdPaths =    ['/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/01_BudgieFemale_green1/00_Baseline_night/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/02_BudgieMale_yellow1/00_Baseline_night/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/03_BudgieFemale_white1/00_Baseline_night/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/04_BudgieMale_yellow2/00_Baseline_night/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/05_BudgieFemale_green2/00_Baseline_night/']

arfFilePaths =  ['EEG 2 scored/',
                 'EEG 3 scored/',
                 'EEG 3 scored/',
                 'EEG 4 scored/',
                 'EEG 4 scored/']

### load BEST EEG channels - as determined during manual scoring ####
channelsToLoadEEG = [['5 LEEGf-LEEGp', '6 LEEGm-LEEGp'],
                     ['5 LEEGf-LEEGm', '4 LEEGf-Fgr'],
                     ['4LEEGf-LEEGp', '9REEGm-REEGp'],
                     ['9REEGf-REEGp', '6LEEGm-LEEGf'],
                     ['4LEEGf-LEEGp','7REEGf-REEGp']]

channelsToLoadEOG = [['1 LEOG-Fgr', '2 REOG-Fgr'],
                     ['2 LEOG-Fgr', '3 REOG-Fgr'],
                     ['2LEOG-Fgr', '3REOG-Fgr'],
                     ['2LEOG-Fgr', '3REOG-Fgr'],
                     ['2LEOG-Fgr', '3REOG-Fgr']]

birds_LL = [1,2,3]
nBirds_LL = len(birds_LL)

LL_FilePaths = ['/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/02_BudgieMale_yellow1/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/03_BudgieFemale_white1/',
                '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/04_BudgieMale_yellow2/']

LL_arfFilePaths = ['01_Constant_light/EEG 2 preprocessed/']

lightsOffSec = np.array([7947, 9675, 9861 + 8*3600, 9873, 13467])  # lights off times in seconds from beginning of file
lightsOnSec = np.array([46449, 48168, 48375+ 8*3600, 48381, 52005]) # Bird 3 gets 8 hours added b/c file starts at 8:00 instead of 16:00

epochLength = 3
sr = 200
scalingFactor = (2**15)*0.195       # scaling/conversion factor from amplitude to uV (when recording arf from jrecord)

stages = ['w','d','u','i','s','r'] # wake, drowsy, unihem sleep, intermediate sleep, SWS, REM
stagesSleep =    ['u','i','s','r']

stagesVideo = ['m','q','d','s','u'] # moving wake, quiet wake, drowsy, sleep, unclear

## Path to scores formatted as CSVs
formatted_scores_path = '/Volumes/data/svcanavan/Documents/00_BudgieSleep/01_BudgieSleep_ScoredData/formatted_scores/'


Calculate general variables

In [4]:
lightsOffEp = lightsOffSec / epochLength
lightsOnEp = lightsOnSec / epochLength

nBirds = len(birdPaths)
birds = np.arange(0, nBirds)

epochLengthPts = epochLength*sr

nStages = len(stagesSleep)

# Read in manual & video scores: DIRECTLY FROM ARF FILES

## LD

In [5]:
# LD

AllScores = {}

for b in range(nBirds):

    arf_path = birdPaths[b] + arfFilePaths[b]

    # init
    all_scores_array = np.array([], dtype='str')
    all_video_scores_array = np.array([], dtype='str')
    all_datetime_array = np.array([], dtype='datetime64')
    all_video_datetime_array = np.array([], dtype='datetime64')

    # go through each arf file in order
    for file in os.listdir(arf_path):
        if file.endswith('.arf'):
            # open arf file
            arffile = h5py.File(arf_path+file, 'r')

            # Get datetime from file name
            date = file.split('_')[2]
            if b == 0:      # fill in known time for first bird
                if date == '2014-10-17':
                    hours = '17'
                    minutes = '32'
                else:
                    hours = '08'
                    minutes = '30'
            else:
                time = file.split('_')[3]
                hours = time.split('-')[0]
                minutes = time.split('-')[1]
            datetime_start = np.datetime64(date + 'T' + hours + ':' + minutes + ':06')    # assume 6-s delay in starting recording

            # Get channel names, find the manual scoring channels
            arf_channels = list(arffile['.'].keys())
            scores_channel = [x for x in arf_channels if 'Manual' in x][0]
            video_scores_channel = [x for x in arf_channels if 'Video' in x][0]

            ### SCORES ###

            # Get scores
            arf_scores = arffile['.'][scores_channel].value['name'].astype('str')   # Manual
            all_scores_array = np.append(all_scores_array, arf_scores)

            arf_video_scores = arffile['.'][video_scores_channel].value['name'].astype('str')   # Video
            all_video_scores_array = np.append(all_video_scores_array, arf_video_scores)

            ### TIMES ###

            # Get start times of each epoch in datetime format
            arf_starttimes_s = arffile['.'][scores_channel].value['start']     # Manual
            arf_datetimes = datetime_start + arf_starttimes_s.astype('timedelta64[s]')
            all_datetime_array = np.append(all_datetime_array, arf_datetimes)

            arf_video_starttimes_s = arffile['.'][video_scores_channel].value['start']     # Video
            arf_video_datetimes = datetime_start + arf_video_starttimes_s.astype('timedelta64[s]')
            all_video_datetime_array = np.append(all_video_datetime_array, arf_video_datetimes)

        # Save in dateframe, in dict under bird number
        BirdScores  = DataFrame(all_scores_array, index=all_datetime_array, columns=['Label'])
        videoScores = DataFrame(all_video_scores_array, index=all_video_datetime_array, columns=['Video Label'])
        BirdScores = videoScores.join(BirdScores)

        data_name = 'Bird ' + str(b+1)
        AllScores[data_name] = BirdScores

## LL

In [6]:
# LL

for b in [0]:

    arf_path = LL_FilePaths[b] + LL_arfFilePaths[b]

    # init
    all_scores_array = np.array([], dtype='str')
    all_video_scores_array = np.array([], dtype='str')
    all_datetime_array = np.array([], dtype='datetime64')
    all_video_datetime_array = np.array([], dtype='datetime64')

    # go through each arf file in order
    for file in os.listdir(arf_path):
        if file.endswith('.arf'):
            # open arf file
            arffile = h5py.File(arf_path+file, 'r')

            # Get datetime from file name
            date = file.split('_')[2]
            time = file.split('_')[3]
            hours = time.split('-')[0]
            minutes = time.split('-')[1]
            datetime_start = np.datetime64(date + 'T' + hours + ':' + minutes + ':06')    # assume 6-s delay in starting recording

            # Get channel names, find the manual scoring channels
            arf_channels = list(arffile['.'].keys())
            scores_channel = [x for x in arf_channels if 'Manual' in x][0]
            video_scores_channel = [x for x in arf_channels if 'Video' in x][0]

            ### SCORES ###

            # Get scores
            arf_scores = arffile['.'][scores_channel].value['name'].astype('str')   # Manual
            all_scores_array = np.append(all_scores_array, arf_scores)

            arf_video_scores = arffile['.'][video_scores_channel].value['name'].astype('str')   # Video
            all_video_scores_array = np.append(all_video_scores_array, arf_video_scores)

            ### TIMES ###

            # Get start times of each epoch in datetime format
            arf_starttimes_s = arffile['.'][scores_channel].value['start']     # Manual
            arf_datetimes = datetime_start + arf_starttimes_s.astype('timedelta64[s]')
            all_datetime_array = np.append(all_datetime_array, arf_datetimes)

            arf_video_starttimes_s = arffile['.'][video_scores_channel].value['start']     # Video
            arf_video_datetimes = datetime_start + arf_video_starttimes_s.astype('timedelta64[s]')
            all_video_datetime_array = np.append(all_video_datetime_array, arf_video_datetimes)

        # Save in dateframe, in dict under bird number
        BirdScores  = DataFrame(all_scores_array, index=all_datetime_array, columns=['Label'])
        videoScores = DataFrame(all_video_scores_array, index=all_video_datetime_array, columns=['Video Label'])
        BirdScores = videoScores.join(BirdScores)

        data_name = 'Bird ' + str(b+2) + '_LL'
        AllScores[data_name] = BirdScores

#### Convert score labels to numbers: 
* mark any 'moving' video-labels as -1

In [7]:
# LD #
for b_num in birds:

    b_name = 'Bird ' + str(b_num+1)
    Scores = AllScores[b_name]
    # replace nan's with empty string
    Scores.fillna('', inplace=True)

    Label_num = -1 * np.ones_like(Scores['Label'])
    for st in range(len(stages)):
        stage_inds = [x for x in range(len(Scores['Label'])) if stages[st] in Scores['Label'].iloc[x]]
        Label_num[stage_inds] = st

    # Unihemispheric labels
    stage_inds = [x for x in range(len(Scores['Label']))
                  if ('l' in Scores['Label'].iloc[x])|('g' in Scores['Label'].iloc[x])]
    Label_num[stage_inds] = 2

    # Save to dataframe
    AllScores[b_name]['Label (#)'] = Label_num

#### Convert datetimes to s, ep, Zeitgeber time

In [8]:
# LD #

lightsOffDatetime = np.array([], dtype='datetime64')
lightsOnDatetime = np.array([], dtype='datetime64')

for b_num in birds:

    b_name = 'Bird ' + str(b_num+1)
    Scores = AllScores[b_name]
    startDatetime = Scores.index.values[0]

    # Calc lights off & on using datetime formats
    lightsOffTimedelta = lightsOffSec[b_num].astype('timedelta64[s]')
    lightsOffDatetime = np.append(lightsOffDatetime, startDatetime + lightsOffTimedelta)
    lightsOnTimedelta = lightsOnSec[b_num].astype('timedelta64[s]')
    lightsOnDatetime = np.append(lightsOnDatetime, startDatetime + lightsOnTimedelta)

    # Convert to seconds & hours from start of recording
    timedelta_ns = Scores.index.values - startDatetime
    timedelta_s  = timedelta_ns.astype('timedelta64[s]')

    time_s = timedelta_s.astype('int')
    time_hr = time_s/3600

    AllScores[b_name]['Time (s)'] = time_s
    AllScores[b_name]['Time (h)'] = time_hr

    # Convert to Zeitgeber time
    time_Zeit_s = time_s - lightsOnSec[b_num]
    time_Zeit_hr = time_Zeit_s/3600
    time_Zeir_ep = (time_Zeit_s/3).astype('int')

    AllScores[b_name]['Zeitgeber time (s)'] = time_Zeit_s
    AllScores[b_name]['Zeitgeber time (h)'] = time_Zeit_hr
    AllScores[b_name]['Zeitgeber time (ep)'] = time_Zeir_ep

    # Convert to epoch number
    ind_epoch = (time_s/3).astype('int')

    AllScores[b_name]['Epoch #'] = ind_epoch

#### Calculate lights off in Zeitgeber time (s and hrs)
Lights on is 0 

In [9]:
lightsOffZeit_s = lightsOffSec - lightsOnSec
lightsOffZeit_hr = lightsOffZeit_s / 3600

# Save to csv

In [10]:
for b_num in birds:
    b_name = 'Bird ' + str(b_num+1)
    scores = AllScores[b_name]
    scores.to_csv(saveAsPath + 'All_scores_' + b_name + '.csv')

In [11]:
## LL
b_name = 'Bird 2'
scores = AllScores['Bird 2_LL']
scores.to_csv(saveAsPath + 'All_scores_' + b_name + 'LL.csv')

# LL: calculate TST

In [12]:
scores = AllScores['Bird 2_LL']

In [13]:
scores['Video Label'].value_counts()

m    17478
s     4190
d     3640
q     2742
u      738
Name: Video Label, dtype: int64

## TST as % of recording time

In [14]:
TST_ep = len(scores[scores['Video Label']=='s'])

In [15]:
TST_ep / len(scores)

0.14554675559260802

## TST, including epochs marked "unclear"

In [16]:
TST_w_unclear_ep = len(scores[(scores['Video Label']=='s')|(scores['Video Label']=='u')])

In [17]:
TST_w_unclear_ep / len(scores)

0.17118243712658052