In [None]:
import os
from boxsdk import OAuth2, Client
import pandas as pd
import re

In [None]:
# ----- Support functions -----

def visit_all_dirs_files(usedir,full_list,curr_path):
    
    # get all the items in the current folder
    theseitems = client.folder(folder_id=usedir).get_items()
    
    for item in theseitems:     # loop over items
        tp = item.type # get key fields from directory items
        nm = item.name
        iid = item.id
        
        if tp == 'folder': # recursion if a folder is found
            curr_path.append(nm) # add directory to path
            full_list = visit_all_dirs_files(iid,full_list,curr_path) # look for more folders/files
            curr_path.pop() # clean the folder back off the path when going up a level
        else: # keep track of all the paths when there are files
            tpath = os.path.join(*curr_path)
            full_list.append(os.path.join(tpath,nm))           
    
    return full_list

def find_in_list(lst, item):
    result = []
    for i, x in enumerate(lst):
        if re.search(f'{item}[_|-|I|i]',x,re.IGNORECASE):
            result.append(i)
    return result

In [None]:
# ---- Get login credentials from environment variables -----

# - note this needs to be adjusted for each authorized user... ask Nerissa about getting and setting this information
# - developer access tokens expire quickly, so this probably needs to be set at startup and perhaps even while working
client_id = os.environ.get('box_client_id')
client_secret = os.environ.get('box_client_secret')
# access_token = os.environ.get('box_access_token') # just put this one in here because it expires anyway
access_token = '0mpL1aOubspUV7vl7VT43oa8G5HD8yJa'


# ----- Open a connection to the Box server -----

auth = OAuth2(
    client_id=client_id, # put these in environmental variables
    client_secret=client_secret,
    access_token=access_token,
)
client = Client(auth)

# ----- Work on directory structure -----

# get the list of items in the whole Scn2a folder
items = client.folder(folder_id='196168550606').get_items()

# list contents
for item in items:
    print(f'{item.type.capitalize()} {item.id} is named "{item.name}"')

In [None]:
# ----- Figure out which directories have the assay of interest

assaykeyword = 'Intros' # set up for intros
projectDir = '196168550606'
rootpath = ['Scn2a_X1Behavior_Nov22']

usedirs = []
items = client.folder(folder_id=projectDir).get_items() # top level folder

for item in items:
    iid = item.id
    inm = item.name
    if inm.find(assaykeyword)>-1:
        usedirs.append(iid)

# print(usedirs)

# --- collect all the paths and filenames

allpaths = []

for usedir in usedirs: 
    
    # add starting folder to curr_path
    udnm = client.folder(folder_id=usedir).get(fields=['name']).name
    curr_path = rootpath + [udnm]
    
    # search for files
    thesepaths = visit_all_dirs_files(usedir,[],curr_path)

    # consolidate
    allpaths += thesepaths
    
print(allpaths)

In [None]:
# ----- Get a list of just file names to hunt through -----
justfiles = []
for pth in allpaths:
    justfiles.append(os.path.basename(pth))

In [None]:
# ----- Use metadata table to find expected files and add those columns to the table -----

# load up metadata
meta = pd.read_csv('metadata_intros_v1.csv')

# generate a regex of possible matches for filename
fileIndex = []
for tag in meta.PairTag:
    match = find_in_list(justfiles,tag)
    if len(match)==1:
        fileIndex.append(match[0])
    else:
        print(f'No match for {tag}:')
        
# use fileIndex to look up file names and paths
metafiles = []
metapaths = []
for i,tag in enumerate(meta.PairTag):
    metafiles.append(justfiles[fileIndex[i]])
    metapaths.append('\\'+os.path.dirname(allpaths[fileIndex[i]]))

# add files and paths to the metadata table
meta.VideoFile = metafiles
meta.VideoPath = metapaths

In [None]:
# ----- Get BORIS scored aggregated events file names -----
keyword = "Intro"
aggfiles = []
items = client.folder(folder_id='238576018931').get_items()

for item in items:
    inm = item.name
    if inm.find(keyword)>-1:
        aggfiles.append(inm)
        
print(aggfiles)
print(len(aggfiles))