# Daily Log to SQL

This is a file to help clean up data from the daily logs and insert them into the Limblab MySQL database. You will need to know the sesames and either be connected to the VPN or running this remotely on Shrek or Donkey to use this.


## Required Dependencies:

- sqlalchemy
- pymysql
- numpy
- pandas


### Linux specific
You'll need to run <code> sudo apt install libmysqlclient mysql-client-core </code>

### macOS specific
You'll need to run <code> brew install mysql </code>

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from os import path, system
from sys import platform
import glob
from tkinter import Tk, ttk # to allow for graphical folder loading
from Blackrock_Python_Utilities import brpylib # added an init file so it's seen as a package

### Background data

what is the monkey's name? What directory are we going to be scraping (better to do this once per monkey)

In [2]:
# Monkey data
monkeyName = "Greyson"
ccmID = "18E2"

# directory info
scrape_dir = '/home/kevin/Documents/L_MillerLab/data/Greyson_17L2/CerebusData'
if not path.exists(scrape_dir):
    print(f"{scrape_dir} does not exist! Try again!")

### Get a list of all threshold files

We'll work with nev and plx files for now. Going to assume that all recordings have those, so we'll work off that

In [3]:
# # List of files. Going to take a minute
# if 'win' in platform:
#     _ = input()
#     file_list = system(f"dir /B /S {scrape_dir}\\*.nev ")
#     print(file_list[-1])
# if 'nix' in platform:
#     file_list = system(f"ls -R {scrape_dir}/*.nev")

nev_list = glob.glob(f"{scrape_dir}/*/*.nev")
plx_list = glob.glob(f"{scrape_dir}/*/*.plx")

### Create a pandas array to keep track of the files as we go through them

Not sure whether it would be better to insert an item as we go through or just insert the entire dataframe when we're done, but we'll just start with the dataframe

In [4]:
# Skipping fields that will need to be parsed from the daily log

# a mix of day and session info. Not the cleanest thing, but here we are.
session_cols = ['day_key', # The date
                'rec_date', # when was this recorded -- we'll want to start with that
                'rec_time', # the recording time
                'task_id', # foreign key for the task type
                'lab_num', # 1,2,6, cage etc
                'duration', # in seconds
                'numChannels', # how many channels recorded? -- this may be different from num_chans if it has two simulateous recordings
                'hasBumps', # from the S1 jobs
                'numTrials', # from the trial table
                'numReward', # from the trial table
                'numAbort', # from the trial table
                'numFail', # from the trial table
                'numIncomplete'] # from the trial table

nev_cols = ['array_serial',
                'nev_filename', # like it says on the box...
                'sessions_key', # key for the associated session -- this will be incremented automatically
                'setting_file', # should be just the filename with .ccf, but can check
                'rec_system', # should be cerebus for the .nev files
                'threshold_quality', # based on average firing rates, artifact rejection... maybe for later
                'num_chans', # will be different from numChannels in session if there are multiple arrays
                'num_units'] # number of sorted units. Meaning not 0 or 255

kin_cols = ['sessions_key', # session foreign key
                'filename', # obvio che
                'sampling_rate', # otra vez
                'kin_quality'] # how good is the recording? maybe look at psd or something?

emg_cols = ['sessions_key',
                'filename',
                'rec_system', # if .ns3 should be jim baker's or tucker's. Could add rhd and tbsi support]
                'sampling_rate', # typically 2k, but sometimes other stuff.
                'emg_quality', # refer to Josie's code
                'muscle_list'] # what did we record?

force_cols = ['sessions_key',
                'filename',
                'force_labels'] # from the .nsx or whatever file


# create the empty dataframes
session_df = pd.DataFrame(columns=session_cols)
nev_df = pd.DataFrame(columns=nev_cols)
kin_df = pd.DataFrame(columns=kin_cols)
emg_df = pd.DataFrame(columns=emg_cols)
force_df = pd.DataFrame(columns=force_cols)

define the words

In [5]:
words = {'reward':0x20, 'abort':0x21, 'fail':0x22, 'incomp':0x23}
tasks = {'CO':0x01, 'RW':0x02, 'FC':0x03, 'MG':0x06, 'WF':0x07}


now for the interesting stuff -- start ripping through each nev file

In [None]:
for i_nev, nev_file in enumerate(nev_list):
    if i_nev == 0:

        NevFileObj = brpylib.NevFile(nev_file) # initialize the Nev
        output = NevFileObj.getdata() # load the data -- need it for the num trial info etc

        # first let's see if we can find out the task
        


        # word counts
        words_count = {'reward':0, 'abort':0, 'fail': 0, 'incomp':0}

        # parse the date info etc
        session_dict = dict()

        session_dict['rec_date'] = NevFileObj.basic_header['TimeOrigin'].date()
        session_dict['rec_time'] = NevFileObj.basic_header['TimeOrigin'].time()

        # counting the words
        for word,code in words.items():
            words_count[word] += np.sum([((out & 0xFF00) >> 8)==code for out in output['digital_events']['UnparsedData']])
    
        # session_dict['']

        if 



In [None]:
NevFileObj = brpylib.NevFile(nev_list[0])
# NevFileObj.extended_headers
output = NevFileObj.getdata()

In [None]:
nev_list = nev_list[0]

In [None]:
output.keys()

In [None]:
np.where(session_df['rec_date'] == session_dict['rec_date'])
np.where(session_df['rec_time'] == session_dict['rec_time'])

In [None]:
for ii,nev in enumerate(nev_list):
    print(f"{ii},{nev}")

In [None]:
import os
os.getcwd()

In [None]:
ls

In [6]:
NevFileObj = brpylib.NevFile(nev_list[0])
output = NevFileObj.getdata()



20181220_Greyson_FreeReaching_001.nev opened


In [12]:
output['digital_events'].keys()

dict_keys(['TimeStamps', 'InsertionReason', 'UnparsedData'])

In [14]:

words = {'reward':0x20, 'abort':0x21, 'fail':0x22, 'incomp':0x23}
words_count = {'reward':0, 'abort':0, 'fail':0, 'incomp':0}

for word,code in words.items():
    words_count[word] += np.sum([((out & 0xFF00) >> 8)==code for out in output['digital_events']['UnparsedData']])

In [None]:
np.unique([((out & 0xFF00) >> 8) for out in output['digital_events']['UnparsedData'] if ((out & 0xFF00)>>8)<0x10])