# Script

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from collections import OrderedDict
%matplotlib inline

In [3]:
fps = 30
seconds = 60

folder_dict = OrderedDict({
    'Unc-43 unpaired' : r'Z:\Associative Conditioning Behavior Videos_2017 to 2018\Alex Loftus - Put your videos here\Psychfest\feat_manual_files\unc-43 unpaired',
    'Unc-43 paired' : r'Z:\Associative Conditioning Behavior Videos_2017 to 2018\Alex Loftus - Put your videos here\Psychfest\feat_manual_files\unc-43 paired',
    'N2 unpaired' : r'Z:\Associative Conditioning Behavior Videos_2017 to 2018\Alex Loftus - Put your videos here\Psychfest\feat_manual_files\wt unpaired',
    'N2 paired' : r'Z:\Associative Conditioning Behavior Videos_2017 to 2018\Alex Loftus - Put your videos here\Psychfest\feat_manual_files\wt paired',
})

## Define functions necessary to parse worm data

In [4]:
def parseWormMovement(wormId, curWorm):
    '''
    Converts frames into seconds for a single worm. Parse a set of frames into average movement data per second.
    '''
    frame = 0  # Which frame within current second we're on
    second = 0  # Which second we're on (0-indexed)
    netMovement = 0
    worm = [wormId] #create a new list starting with wormID
        
    for i in range(0, curWorm['worm_index'].size):
        if(second == seconds):
            break
        
        if(frame < fps):  # Potential off by 1?
            if curWorm['motion_modes'].iloc[i] == 1.0:
                netMovement += 1
            if curWorm['motion_modes'].iloc[i] == -1.0:
                netMovement -= 1
        else:
            frame = -1;
            second += 1
            worm.append(netMovement / fps) #append next 1sec average
            netMovement = 0     
        frame += 1
        
    #Populate remaining cells with NaN, Tierspy does not always output data for a worm for the entire video
    for i in range(len(worm)-1, seconds):
        worm.append(np.NaN)
        
    return worm
#end parseWormMovement

In [5]:
def parseFile(filename):
    '''
    Return a DataFrame with motion mode data. Uses 'ParseWormMovement' and loops it on all the worms to get a new DataFrame.
    '''
    hdf = pd.HDFStore(filename)
    wormIds = [] #list of unique ids in an easily iterable format
    wormData= [] #structure for final data to be pushed data frame, then excel
    
    ts = hdf['features_timeseries'] #all worm data
    uniqueWorms = ts.drop_duplicates('worm_index') #only one entry per wormID
    wormCount = uniqueWorms['worm_index'].size

    #Resets indexes to be continuous. Drop=True prevents the creation of a new index column
    uniqueWorms.reset_index(drop=True)

    #populate list of unique worm IDs
    for i in range(0, wormCount):
        wormIds.append(uniqueWorms.iloc[i,0])

    # Run on each individual worm
    for id in wormIds:
        curWorm = ts.loc[ts['worm_index'] == id]
        wormData.append(parseWormMovement(id, curWorm))

    column_names = ['Worm ID']
    for i in range(0, seconds):
        column_names.append('{}'.format(i+1))

    return pd.DataFrame(wormData, columns=column_names).round(2)  # Final DataFrame before excel
#end parseFile


## Making a big DF for all worm data files

### Define functions to make big dataframe

In [6]:
def make_big_df(folder_name):
    '''Concatanate a bunch of different disparate files into one DataFrame. Uses ParseFile and loops on files in a single folder.'''
    file_list = [(folder_name + '\\' + i) for i in os.listdir(folder_name)]
    return pd.concat([parseFile(file_list[i]) for i in range(len(file_list))], axis=0, keys=[i[:-17] for i in os.listdir(folder_name)])

In [7]:
def avg_movement(big_df):
    '''Return transposed dataframe with worm movement averaged in each file. This function merges all the worms into one averaged value.'''
    return big_df.mean(level=0).drop('Worm ID', axis=1).T

In [8]:
def concat_dates(folder_name):  
    '''
    Return list of DataFrames. Merges all the disparate dates together into one, and returns something only separated by minutes (e.g. 1min, 5min, and 10min data for each group type)
    
    df[0] : Pre data.
    df[1] : Post data.
    '''
    df = avg_movement(make_big_df(folder_name))
    
    def split_prepost(df2):  # Split pre and post data into two different DataFrames.
        bdf_pre = df2[[i for i in df2 if 'Pre' in i]][0:10]  # Make DataFrame with only pre-stim data
        bdf_post = df2[[i for i in df2 if 'Post' in i]][0:10] # Make DataFrame with only post-stim data
        
        return (bdf_pre, bdf_post)  # Return tuple with pre and post data
    
    list_of_dfs = []
    for j in split_prepost(df):  # consolidate dates
        j['1min'] = j[[i for i in j.columns if '1min' in i]].mean(axis=1)
        j['5min'] = j[[i for i in j.columns if '5min' in i]].mean(axis=1)
        j['10min'] = j[[i for i in j.columns if '10min' in i]].mean(axis=1)
        a = j[['1min', '5min', '10min']]
        list_of_dfs.append(a)
    
    return list_of_dfs

In [9]:
def send_dfs_to_excel(cd, output):
    '''Take in output from concat_dates, and makes an Excel file.'''
    bdf_means_pre = cd[0] # DataFrame with just Pre data
    bdf_means_post = cd[1] # DataFrame with just Post data

    writer = pd.ExcelWriter(output, engine='xlsxwriter')
    bdf_means_pre.to_excel(writer, sheet_name='Pre')
    bdf_means_post.to_excel(writer, sheet_name='Post')
    writer.save()

### Loop through to do this for a bunch of excel files

In [112]:
for (intg, folder) in enumerate(folder_dict.values()):
    send_dfs_to_excel(concat_dates(folder), (list(folder_dict.keys())[intg] + '.xlsx'))

# ------------------------------------------------------

# Playing

In [11]:
df = make_big_df(r'Z:\Associative Conditioning Behavior Videos_2017 to 2018\Alex Loftus - Put your videos here\Psychfest\feat_manual_files\unc-43 unpaired')

In [17]:
#df.index[0]

('V3 2.19.18 VC1052 Unpaired ARL 10min Post', 0)