In [33]:
import os
from boxsdk import OAuth2, Client
import pandas as pd
import re

In [63]:
# ---- Get login credentials from environment variables -----

# - note this needs to be adjusted for each authorized user... ask Nerissa about getting and setting this information
# - developer access tokens expire quickly, so this probably needs to be set at startup and perhaps even while working
client_id = os.environ.get('box_client_id')
client_secret = os.environ.get('box_client_secret')
# access_token = os.environ.get('box_access_token') # just put this one in here because it expires anyway
access_token = '0mpL1aOubspUV7vl7VT43oa8G5HD8yJa'


# ----- Open a connection to the Box server -----

auth = OAuth2(
    client_id=client_id, # put these in environmental variables
    client_secret=client_secret,
    access_token=access_token,
)
client = Client(auth)

In [4]:
# ----- Work on directory structure -----

# get the list of items in the whole Scn2a folder
items = client.folder(folder_id='196168550606').get_items()

# list contents
for item in items:
    print(f'{item.type.capitalize()} {item.id} is named "{item.name}"')

Folder 238576018931 is named "Aggregated_Events"
Folder 202955487033 is named "April2023_ShortCoHab_Females"
Folder 226890392440 is named "BorisFiles"
Folder 196172227913 is named "Female Intros"
Folder 196173015061 is named "Female PPTs"
Folder 196170422974 is named "Female RI"
Folder 196169230497 is named "Female SepReunion"
Folder 196171174766 is named "Female TMs"
Folder 214178119262 is named "June2023_ControlBehaviors"
Folder 196166676922 is named "Male Intros"
Folder 196172624162 is named "Male PPTs"
Folder 196171675763 is named "Male RI"
Folder 196172896956 is named "Male SepReunion"
Folder 196172739371 is named "Male TMs"
Folder 248394520156 is named "Naive_Choice"
Folder 229587753245 is named "Oct2023_JuvenileBehavior"
File 1177298175085 is named "Scn2aX1Nov22_Key.xlsx"


In [26]:
def visit_all_dirs_files(usedir,full_list,curr_path):
    
    # get all the items in the current folder
    theseitems = client.folder(folder_id=usedir).get_items()
    
    for item in theseitems:     # loop over items
        tp = item.type # get key fields from directory items
        nm = item.name
        iid = item.id
        
        if tp == 'folder': # recursion if a folder is found
            curr_path.append(nm) # add directory to path
            full_list = visit_all_dirs_files(iid,full_list,curr_path) # look for more folders/files
            curr_path.pop() # clean the folder back off the path when going up a level
        else: # keep track of all the paths when there are files
            tpath = os.path.join(*curr_path)
            full_list.append(os.path.join(tpath,nm))           
    
    return full_list

In [25]:
# --- figure out which directories have intros

assaykeyword = 'Intros'

usedirs = []
items = client.folder(folder_id='196168550606').get_items()

for item in items:
    iid = item.id
    inm = item.name
    if inm.find(assaykeyword)>-1:
        usedirs.append(iid)

# print(usedirs)

# --- collect all the paths and filenames

rootpath = ['Scn2a_X1Behavior_Nov22']

allpaths = []

for usedir in usedirs: 
    
    # add starting folder to curr_path
    udnm = client.folder(folder_id=usedir).get(fields=['name']).name
    curr_path = rootpath + [udnm]
    
    # search for files
    thesepaths = visit_all_dirs_files(usedir,[],curr_path)

    # consolidate
    allpaths += thesepaths
    
print(allpaths)
    
# --- loop over info in metadata to find intro video files

# load up metadata
# meta = pd.read_csv('metadata_intros_v1.csv')

# keep track of paths to log file names and full paths
# do the same thing for the Boris aggregated events
# do the same thing for the Boris files

['Scn2a_X1Behavior_Nov22', 'Female Intros', '01032023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '01242023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '02072023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '02282023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '08082023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '09052023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '11072023']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '11142022']
['Scn2a_X1Behavior_Nov22', 'Female Intros', '11292022']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '01042023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '01252023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '02082023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '03012023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '03152023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '03222023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '04052023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '04122023']
['Scn2a_X1Behavior_Nov22', 'Male Intros', '04192023']
['Scn2a_X1

In [28]:
# get a list of just file names to hunt through
justfiles = []
for pth in allpaths:
    justfiles.append(os.path.basename(pth))

In [30]:
for fname in justfiles:
    print(fname)

Nov22_Pair13_Intro_WIN_20230103_13_45_14_Pro.mp4
Nov22_Pair14-Intro_WIN_20230103_13_45_36_Pro.mp4
Nov22_Pair15_Intro.mov
Nov22_Pair16_Intro_WIN_20230103_13_46_48_Pro.mp4
Nov22_Pair21_Intro_2023-01-24 13-25-51.mp4
Nov22_Pair22_Intro_2023-01-24 13-25-52.mp4
Nov22_Pair23_Intro_2023-01-24 13-25-55.mp4
Nov22_Pair28_Intro_2023-02-07 13-18-12.mp4
Nov22_Pair29_Intro_2023-02-07 13-18-14.mp4
Nov22_Pair30_Intro_2023-02-07 13-18-15.mp4
Nov22_Pair31_Intro_WIN_20230207_13_18_15_Pro.mp4
Nov22_Pair35_Intro_2023-02-28 14-02-19.mp4
Nov22_Pair36_Intro_2023-02-28 14-02-28.mp4
Nov22_Pair56_Intro_2023-08-08 13-13-52.mp4
Nov22_Pair59_Intro_2023-09-05 13-52-22.mp4
Nov22_Pair60_Intro_2023-09-05 13-52-23.mp4
Nov22_Pair61_Intro_2023-11-07 13-05-43.mp4
Nov22_Pair62_Intro_2023-11-07 13-05-44.mp4
Nov22_Pair63_Intro_2023-11-07 13-05-47.mp4
Nov22_Pair64_Intro.mov
Nov22_Pair1Intro_WIN_20221114_12_47_25_Pro.mp4
Nov22_Pair2Intro_WIN_20221114_12_47_37_Pro.mp4
Nov22_Pair3Intro.mov
Nov22_Pair4Intro.mov
Nov22_Pair5_Intro_WI

In [44]:
def find_in_list(lst, item):
    result = []
    for i, x in enumerate(lst):
        if re.search(f'{item}[_|-|I|i]',x,re.IGNORECASE):
            result.append(i)
    return result

In [46]:
# loop over metadata table
# load up metadata
meta = pd.read_csv('metadata_intros_v1.csv')

# generate a regex of possible matches for filename
fileIndex = []
for tag in meta.PairTag:
    match = find_in_list(justfiles,tag)
    if len(match)==1:
        fileIndex.append(match[0])
    else:
        print(f'No match for {tag}:')

# will have pair number, dash, underscore, or no space and then intro or Intro

In [55]:
# use fileIndex to look up file names and paths and add to the metadata table -- then save table
metafiles = []
metapaths = []
for i,tag in enumerate(meta.PairTag):
    metafiles.append(justfiles[fileIndex[i]])
    metapaths.append('\\'+os.path.dirname(allpaths[fileIndex[i]]))

In [51]:
os.path.dirname(allpaths[0])

'Scn2a_X1Behavior_Nov22\\Female Intros\\01032023'

In [67]:
len(metafiles)

53

In [58]:
meta.VideoFile = metafiles

In [60]:
meta.VideoPath = metapaths

In [66]:
# ----- Get BORIS scored aggregated events file names -----
keyword = "Intro"
aggfiles = []
items = client.folder(folder_id='238576018931').get_items()

for item in items:
    inm = item.name
    if inm.find(keyword)>-1:
        aggfiles.append(inm)
        
print(aggfiles)
print(len(aggfiles))

['Nov22_Pair15_Intro.csv', 'Nov22_Pair16_Intro.csv', 'Nov22_Pair17_Intro.csv', 'Nov22_Pair18_Intro.csv', 'Nov22_Pair19_Intro.csv', 'Nov22_Pair1_Intro.csv', 'Nov22_Pair21_Intro.csv', 'Nov22_Pair22_Intro.csv', 'Nov22_Pair23_Intro.csv', 'Nov22_Pair24_Intro.csv', 'Nov22_Pair25_Intro.csv', 'Nov22_Pair26_Intro.csv', 'Nov22_Pair27_Intro.csv', 'Nov22_Pair2_Intro.csv', 'Nov22_Pair3_Intro.csv', 'Nov22_Pair4_Intro.csv', 'Nov22_Pair5_Intro.csv', 'Nov22_Pair6_Intro.csv', 'Nov22_Pair7_Intro.csv', 'Nov22_Pair8_Intro.csv']
20


In [61]:
meta.head()

Unnamed: 0,PairTag,AssayType,RecDate,VideoFile,ScoreFile,FemaleID,FemaleGT,FemaleFam,FemaleDOB,MaleID,...,Timeline,Ethogram,RanBy,ScoredBy,FullTimeline,FocalColor,StrangerID,StrangerGT,PPTlane,PartnerChamber
0,Nov22_Pair1,introduction,11/14/2022,Nov22_Pair1Intro_WIN_20221114_12_47_25_Pro.mp4,,B8002,Het,,,,...,,,Gina Williams,Josh Steighner,True,,,,,
1,Nov22_Pair15,introduction,1/3/2023,Nov22_Pair15_Intro.mov,,B6614,Het,,,,...,,,Gina Williams,Josh Steighner,True,,,,,
2,Nov22_Pair16,introduction,1/3/2023,Nov22_Pair16_Intro_WIN_20230103_13_46_48_Pro.mp4,,B6615,WT,,,,...,,,Gina Williams,Josh Steighner,True,,,,,
3,Nov22_Pair17,introduction,1/4/2023,Nov22_Pair17_Intro_WIN_20230104_12_24_30_Pro.mp4,,,,,,B6611,...,,,Gina Williams,Josh Steighner,True,,,,,
4,Nov22_Pair18,introduction,1/4/2023,Nov22_Pair18_Intro_WIN_20230104_12_25_08_Pro.mp4,,,,,,B6613,...,,,Gina Williams,Josh Steighner,True,,,,,


In [None]:
# ----- BELOW THIS IS OUTDATED SCRATCH TESTING -----

In [2]:
from boxsdk import DevelopmentClient
client = DevelopmentClient()

Enter developer token: H8AyUtNZ9dFurHIpYNSgjza5FeH7E2jA


In [3]:
user = client.user().get()

[36mGET https://api.box.com/2.0/users/me {'headers': {'Authorization': '---E2jA',
             'User-Agent': 'box-python-sdk-3.9.2',
             'X-Box-UA': 'agent=box-python-sdk/3.9.2; env=python/3.10.6'},
 'params': None}[0m
[32m"GET https://api.box.com/2.0/users/me" 200 468
{'Date': 'Fri, 29 Mar 2024 00:52:41 GMT', 'Content-Type': 'application/json', 'x-envoy-upstream-service-time': '126', 'box-request-id': '05a3a064a1faabb3244d7c86099a582a1', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'Transfer-Encoding': 'chunked'}
{'address': '',
 'avatar_url': 'https://ucsf.app.box.com/api/avatar/large/226757841',
 'created_at': '2014-10-31T14:13:47-07:00',
 'id': '226757841',
 'job_title': '',
 'language': 'en',
 'login': 'nerissa.hoglen@ucsf.edu',
 'max_upload_size': 53687091200,
 'modified_at': '2024-03-28T17:41:33-07:00',
 'name': 'Nerissa Hoglen',
 'notification

In [4]:
print(f'The current user ID is {user.id}')

The current user ID is 226757841


In [5]:
from boxsdk import OAuth2, Client

auth = OAuth2(
    client_id='xxx', # put these in environmental variables
    client_secret='xxx',
    access_token='xxx',
)
client = Client(auth)

user = client.user().get()
print(f'The current user ID is {user.id}')

# box_client_id
# box_client_secret
# box_access_token

[36mGET https://api.box.com/2.0/users/me {'headers': {'Authorization': '---E2jA',
             'User-Agent': 'box-python-sdk-3.9.2',
             'X-Box-UA': 'agent=box-python-sdk/3.9.2; env=python/3.10.6'},
 'params': None}[0m
[32m"GET https://api.box.com/2.0/users/me" 200 468
{'Date': 'Fri, 29 Mar 2024 00:54:30 GMT', 'Content-Type': 'application/json', 'x-envoy-upstream-service-time': '299', 'box-request-id': '039ca7333399583dc1c05f00d61dd6b99', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'Transfer-Encoding': 'chunked'}
{'address': '',
 'avatar_url': 'https://ucsf.app.box.com/api/avatar/large/226757841',
 'created_at': '2014-10-31T14:13:47-07:00',
 'id': '226757841',
 'job_title': '',
 'language': 'en',
 'login': 'nerissa.hoglen@ucsf.edu',
 'max_upload_size': 53687091200,
 'modified_at': '2024-03-28T17:41:33-07:00',
 'name': 'Nerissa Hoglen',
 'notification

In [7]:
file = client.get_shared_item('https://ucsf.box.com/s/bqdy1uhw0i0l3dxfq06reaoylwoqc9sd')
# https://ucsf.app.box.com/folder/196172739371
print(file)

[36mGET https://api.box.com/2.0/shared_items {'headers': {'Authorization': '---E2jA',
             'BoxApi': 'shared_link=https://ucsf.box.com/s/bqdy1uhw0i0l3dxfq06reaoylwoqc9sd',
             'User-Agent': 'box-python-sdk-3.9.2',
             'X-Box-UA': 'agent=box-python-sdk/3.9.2; env=python/3.10.6'}}[0m
[32m"GET https://api.box.com/2.0/shared_items" 200 1117
{'Date': 'Fri, 29 Mar 2024 01:03:57 GMT', 'Content-Type': 'application/json', 'x-envoy-upstream-service-time': '484', 'etag': '"0"', 'box-request-id': '03787fbc377a00a44b7dfdf600c85038f', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'Transfer-Encoding': 'chunked'}
{'content_created_at': '2023-03-15T12:24:04-07:00',
 'content_modified_at': '2023-03-16T12:19:46-07:00',
 'created_at': '2023-03-15T12:24:04-07:00',
 'created_by': {'id': '6932270721',
                'login': 'gina.williams@ucsf.edu',
      

In [8]:
colls = client.collections()

<boxsdk.pagination.limit_offset_based_object_collection.LimitOffsetBasedObjectCollection object at 0x000001EDFCB32F80>


In [12]:
file

<Box Folder - 199141947208 (02102023)>

In [13]:
items = client.folder(folder_id='199141947208').get_items()
for item in items:
    print(f'{item.type.capitalize()} {item.id} is named "{item.name}"')

[36mGET https://api.box.com/2.0/folders/199141947208/items {'headers': {'Authorization': '---E2jA',
             'User-Agent': 'box-python-sdk-3.9.2',
             'X-Box-UA': 'agent=box-python-sdk/3.9.2; env=python/3.10.6'},
 'params': {'offset': 0}}[0m
[32m"GET https://api.box.com/2.0/folders/199141947208/items?offset=0" 200 951
{'Date': 'Fri, 29 Mar 2024 01:07:57 GMT', 'Content-Type': 'application/json', 'x-envoy-upstream-service-time': '252', 'box-request-id': '0718c90a6e576119b95f1c8eccccfe176', 'cache-control': 'no-cache, no-store', 'strict-transport-security': 'max-age=31536000', 'Via': '1.1 google', 'Alt-Svc': 'h3=":443"; ma=2592000,h3-29=":443"; ma=2592000', 'Transfer-Encoding': 'chunked'}
{'entries': [{'etag': '1',
              'file_version': {'id': '1270039394831',
                               'sha1': 'efb1bd1b1d2bf768216b3d2c7125afb2f2d537bc',
                               'type': 'file_version'},
              'id': '1166052074831',
              'name': 'Nov22_Pai

In [2]:
test = os.environ.get('my_var')
print(test)

value
