In [398]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# =============================================================================
# ** == STILL TO DO
# =============================================================================
# Timeseries variables 
# start time = can get  fromn ini, or leave blank?
# rate = 
# =============================================================================
# Script to build NWB Files
# =============================================================================

# Python Imports
import datetime
import h5py
import numpy as np
import glob
import os
import pandas as pd
from openpyxl import Workbook, load_workbook

# matlab Imports
import matlab.engine
eng = matlab.engine.start_matlab()

# Reading the Experiment Data sheet
from docx import Document

# NWB imports
from pynwb import NWBFile, get_manager
from pynwb import TimeSeries, behavior
from pynwb import NWBHDF5IO

# my scripts
#import from_ini # this has crashed for some reason????? 

In [399]:
# =============================================================================
# Reading metadata from Excel spreadsheet
# =============================================================================

def sbj_sheet_list(datas_dir):
    
    """ Get sheet names from Excel spreadsheet """
    
    data_sheet = pd.ExcelFile(datas_dir)
    sheets = data_sheet.sheet_names
    return(sheets)

def get_exp_data(data_sheet):
    
    """ Get sheet names Experimental Details """
    
    exp_details = pd.read_excel(data_sheet, sheet_name= 'Experiment Details', index_col=0)
    return(exp_details)

def create_mouse_id_folder(mouse_id, nwb_output_path):
    
    """ Creates folder with Mouse ID """

    target_dir = os.path.join(nwb_output_path, mouse_id)
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    return target_dir

def create_mouse_date_folder(mouse_id, exp_date, nwb_output_path):
    
    """ Created a dated folder within the Mouse folder """
    
    mouse_target_dir = create_mouse_id_folder(mouse_id, nwb_output_path)
    target_dir = os.path.join(mouse_target_dir, exp_date)
    if not os.path.exists(target_dir):
        os.makedirs(target_dir)
    return target_dir

def is_sbj_sheet(sheet):
    
    """ Returns list of sheets titled subject details """
    
    if 'template' in sheet.lower():
        return False
    if sheet.lower().startswith('subject details'):
        return True
    return False

# =============================================================================
# Writing NWB File
# =============================================================================

def get_times_from_sheet(subject_sheet):
    
    """ Gets times from sheet, then a list of the 
    imaging times and behaviour times """
    
    times = subject_sheet.loc['Time':].copy()
    times.columns = times.iloc[0]
    times.drop(['Time'],inplace=True)
#   This returns a check on whether there is a TRUE inside the column 
#   So contains list of imaging times that have a corresponding x
#   And a list of behavioral times that have both 2P and x in behvaioural
    imaging_times_list = times[times['2P (X)'].notnull()]
    behav_img_list = imaging_times_list[imaging_times_list['IR (X)'].notnull()]
    
    return times, imaging_times_list, behav_img_list

def mouse_folder_details(subject_sheet, data_sheet, imaging_times_list, nwb_output_path):
    
    """ Building folder name, creating folder if it's not there """
    
    mouse_id = subject_sheet.loc['Number','Unnamed: 1']
    exp_data = get_exp_data(data_sheet)
    exp_date = exp_data.at['Date','Unnamed: 1'].replace('.', '')
    mouse_date_folder = create_mouse_date_folder(mouse_id, exp_date, nwb_output_path)
    # Looking for existing NWB file for this session
    session_start = str(imaging_times_list.index[0])[0:5]
    output_file_name = mouse_id + '_' + exp_date + '_' +  session_start.replace(':','_') + '.nwb'
    output_filepath = os.path.join(mouse_date_folder, output_file_name)    
    
    return mouse_id, exp_date, session_start, mouse_date_folder, output_filepath

def nwb_file_variables(subject_sheet, mouse_id, exp_date, session_start):
    
    """ Required variables to create an NWB File,
        these have to be set """
    
    source = ''
    session_description = mouse_id + ' ' + exp_date + ' ' + session_start
    identifier = mouse_id + ' ' + exp_date + ' ' + session_start
    session_start_time = session_start
    lab = 'Neural Coding'
    institution = 'Imperial College London'
    experiment_description = 'GCamp Imaging'
    virus = subject_sheet.index[6] + ':' + str(subject_sheet.loc['GCaMP Inj','Unnamed: 1']) + ', ' \
                            + subject_sheet.index[7] + ':' + str(subject_sheet.loc['Methoxy Inj','Unnamed: 1'])
    
    return source, session_description, identifier, session_start_time, \
            lab, institution, experiment_description, virus

# returns a list of the 2p epoch folders
# need to match each time from exp sheet with this list
def update_2P_spreadsheet(data_sheet, i, imaging_folder, exp_date, imaging_times_list):
    
    """ Get list of the 2p folder imaging times, compares with the times from the spreadsheet
        overwites the incorrect times in spreadsheet with those from 2p folder list """
    
    twop_folders = os.listdir(imaging_folder)
    twop_folders = sorted([folder_name for folder_name in twop_folders if exp_date in folder_name])
    
    if len(twop_folders) != len(imaging_times_list.index):
         print("Spreadsheet times don't match the imaging folders for " + i + ' ' +  data_sheet,
              twop_folders, imaging_times_list.index)
    else:
        for folder, img_time in zip(twop_folders, imaging_times_list.index):
            folder_time, sheet_time = folder[9:14].replace('_',':'), str(img_time)[0:5]
            
            if folder_time != sheet_time:
                wb = load_workbook(data_sheet, read_only=False, keep_vba=True)
                sheet = wb[i]
                for incorrect_time in sheet['A']:
                    if sheet_time == str(incorrect_time.value)[0:5]:
                        sheet['A' + str(incorrect_time.row)] = sheet_time
                        wb.save(data_sheet[:-4]+'xlsm')
    
    return twop_folders
            
# Building path to 2P.tif file, using imaging time as name for epoch
def twop_TS(imaging_times_list, nwb_file, twop_folders, imaging_folder, exp_date):
    for time in twop_folders:

        tif_paths = [os.path.join(imaging_folder,file) for file in os.listdir(os.path.join(imaging_folder, time)) if file.endswith('_XYT.tif')]
        
        for tif_file in tif_paths: 
            epoch_name = time[9:14].replace('_',':')
#             epoch name needs to be converted into datetime to be able to index row from spreadsheet
            date_time = datetime.strptime(epoch_name, '%H:%M').time()
        
            data = list(range(100, 200, 10))
            timestamps = list(range(10))            
#         # # Matlab script loading and running working
# #         the composite image generation seigfred will incorporate into his script
# #             g_ts, r_ts = eng.pipeline_neuroSEE_batch(tif_path, nargout=2)

#             red_ts = TimeSeries(epoch_name + ' ' + 'red_timeseries', #this names the TS in the epoch 
#                                      'PyNWB tutorial', #THIS NEEDS TO BE CHANGED
#                                      data, 
#                                      'SI unit', 
#                                      timestamps=timestamps
#                            )
#             green_ts = TimeSeries(epoch_name + ' ' + 'green_timeseries', #this names the TS in the epoch 
#                                      'PyNWB tutorial', #THIS NEEDS TO BE CHANGED
#                                      data, 
#                                      'SI unit', 
#                                      starting_time = 0.0, 
#                                      rate = 1.0
#                              )
        
#             nwb_file.add_acquisition(red_ts)
#             nwb_file.add_acquisition(green_ts)
#             nwb_file.create_epoch(epoch_name,' ', 2.0, 4.0, [1, 2], imaging_times_list.loc[date_time , 'Imaging details'] + '. ' + imaging_times_list.loc[date_time, 'Remarks'])
        
#             nwb_file.set_epoch_timeseries(epoch_name , red_ts)
#             nwb_file.set_epoch_timeseries(epoch_name , green_ts)
            
#         return tif_paths
    
def behav_TS(behav_img_list, nwb_file):
    for behaviour in behav_img_list.index:
        epoch_name = str(behaviour)[0:5]
        data = list(range(100, 200, 10))
    
        behav_ts = TimeSeries(epoch_name + ' behaviour',
                             'PyNWB tutorial', #THIS NEEDS TO BE CHANGED
                             data,
                             'SI Unit',
                             starting_time = 0.0,
                             rate = 1.0
                             )
        nwb_file.add_acquisition(behav_ts)
        nwb_file.set_epoch_timeseries(epoch_name , behav_ts)
    


In [430]:
def get_data(data_sheet, imaging_folder, behav_folder, nwb_output_path):
    sheets = sbj_sheet_list(data_sheet)
    sbj_details = [x for x in sheets if is_sbj_sheet(x)]
    
    for i in sbj_details:
        subject_sheet = pd.read_excel(data_sheet, sheet_name= i , index_col=0)
        times, imaging_times_list, behav_img_list = get_times_from_sheet(subject_sheet)
        
        if len(imaging_times_list) > 0:
            mouse_id, exp_date, session_start, mouse_date_folder, output_filepath = mouse_folder_details(subject_sheet, data_sheet, imaging_times_list, nwb_output_path)
            
            if not os.path.exists(output_filepath):
                source, session_description, identifier, session_start_time, lab, institution, experiment_description, virus = nwb_file_variables(subject_sheet, mouse_id, exp_date, session_start)
                
                nwb_file = NWBFile(
                            source = source,
                            session_description = session_description,
                            identifier = identifier, 
                            session_start_time = session_start,
                            lab = lab,
                            institution = institution,
                            experiment_description = experiment_description,
                            virus = virus
                            )  
                
                # get the correct times from the 2p folder list, write these to spreadsheet
                twop_folders = update_2P_spreadsheet(data_sheet, i, imaging_folder, exp_date, imaging_times_list)
#                 Now that times on spreadsheet have been updated need to reload the list
                updated_times, updated_imaging_times_list, updated_behav_img_list = get_times_from_sheet(subject_sheet)
                
#         made index into strings, set the updated_imaging_times_list index to this
                times_to_string = [time.strftime('%H:%M') if isinstance(time, datetime.time) else time for time in updated_imaging_times_list.index]
                
                # GET PATHS
#                 twop_TS(imaging_times_list, nwb_file, twop_folders, imaging_folder, exp_date)
#                 behav_TS(behav_img_list, nwb_file)
        
#               Build path to behaviour file
#               If there is a corresponding X in the behaviour column, then we look for file
                # This builds a file name, but you can also get a list of all available ones and link it back?
                # which one is better? Getting a ist would allow for name changes but then how would it link back
#                 behav_file_names = [mouse_id + '_' + x[1] + '.mp4' for x in behav_img_list['IR (X)']]
#                 behav_file_paths = [os.path.join(behav_folder,x) for x in behav_file_names]
                
#                 file = NWBHDF5IO(output_filepath, manager=get_manager(), mode='w')
#                 file.write(nwb_file)
#                 file.close()
#             else:
#                 io = NWBHDF5IO(output_filepath)
#                 nwbfile = io.read()
#                 find_epoch = nwbfile.get_epochs()
#               print(find_epoch)

In [431]:
get_data('/Volumes/Schultz_group_data/Crazy Eights/Ann/GCaMP6 imaging/2018.03.05/Experiment Data Sheet - 2018.03.05.xlsm','/Volumes/Schultz_group_data/Crazy Eights/Ann/GCaMP6 imaging/2018.03.05/2P/', '/Volumes/Schultz_group_data/Crazy Eights/Ann/GCaMP6 imaging/2018.03.05/IR tracking','/Users/solomia/Dropbox/2p_da/data_files/nwb_folder/')

['16:16', '16:26', '16:33', '16:40', '16:43', '16:47', '16:53', '16:59', '17:04', '17:11', '17:16', '17:22', '17:28', '17:34', '17:37', '17:43', '17:44', '17:51', '17:56', '18:00', '18:01']
