In [1]:
import os
import re

from med_associates_utils import parse_ma_directory, SessionCollection

In [2]:
base_path = r'../sample_data/CRF MedAssoc Files/CRF MedAssoc Files'
data_sources = {
    os.path.join(base_path, "MT - Day 1"): {'Genotype': 'MT', 'Day': 'Day1'},
    os.path.join(base_path, "MT - Day 2"): {'Genotype': 'MT', 'Day': 'Day2'},
    os.path.join(base_path, "MT - Day 3"): {'Genotype': 'MT', 'Day': 'Day3'},
    os.path.join(base_path, "MT - Day 4"): {'Genotype': 'MT', 'Day': 'Day4'},

    os.path.join(base_path, "WT - Day 1"): {'Genotype': 'WT', 'Day': 'Day1'},
    os.path.join(base_path, "WT - Day 2"): {'Genotype': 'WT', 'Day': 'Day2'},
    os.path.join(base_path, "WT - Day 3"): {'Genotype': 'WT', 'Day': 'Day3'},
    os.path.join(base_path, "WT - Day 4"): {'Genotype': 'WT', 'Day': 'Day4'},
}

sessions = SessionCollection()
for path, meta in data_sources.items():
    print(f'reading path "{path}"')
    ss = parse_ma_directory(path)
    ss.update_metadata(meta)
    sessions.extend(ss)

def set_animal_id(session):
    animal_id = re.search("EN#\d+", session.metadata['Subject']).group(0)
    session.metadata['Animal'] = animal_id

sessions.apply(set_animal_id)

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\MT - Day 1"


  0%|          | 0/30 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\MT - Day 2"


  0%|          | 0/30 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\MT - Day 3"


  0%|          | 0/30 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\MT - Day 4"


  0%|          | 0/30 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\WT - Day 1"


  0%|          | 0/26 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\WT - Day 2"


  0%|          | 0/26 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\WT - Day 3"


  0%|          | 0/26 [00:00<?, ?it/s]

reading path "../sample_data/CRF MedAssoc Files/CRF MedAssoc Files\WT - Day 4"


  0%|          | 0/26 [00:00<?, ?it/s]

In [3]:
sessions.metadata

Unnamed: 0,StartDate,EndDate,Subject,Experiment,Group,Box,StartTime,StartDateTime,EndTime,EndDateTime,MSN,Genotype,Day,Animal
0,2022-05-08,2022-05-08,1_EN#20_1215,20220508,1,4,07:16:07,2022-05-08 07:16:07,08:46:25,2022-05-08 08:46:25,CRF,MT,Day1,EN#20
1,2022-05-08,2022-05-08,EN#26_1016,20220508,7,4,17:41:51,2022-05-08 17:41:51,19:11:59,2022-05-08 19:11:59,CRF,MT,Day1,EN#26
2,2022-05-08,2022-05-08,EN#76_0924,20220508,7,1,17:41:21,2022-05-08 17:41:21,19:11:54,2022-05-08 19:11:54,CRF,MT,Day1,EN#76
3,2022-05-08,2022-05-08,EN#77_0924,20220508,7,2,17:41:27,2022-05-08 17:41:27,19:11:56,2022-05-08 19:11:56,CRF,MT,Day1,EN#77
4,2022-05-13,2022-05-13,EN#35_1101,20220513,8,2,12:58:24,2022-05-13 12:58:24,13:54:32,2022-05-13 13:54:32,CRF,MT,Day1,EN#35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,2023-03-04,2023-03-04,2_EN#50-1125,20230304,2,2,10:05:08,2023-03-04 10:05:08,10:31:15,2023-03-04 10:31:15,CRFv2023,WT,Day4,EN#50
220,2023-03-04,2023-03-04,4_EN#33-1012,20230304,4,4,11:10:05,2023-03-04 11:10:05,12:34:00,2023-03-04 12:34:00,CRFv2023,WT,Day4,EN#33
221,2023-03-04,2023-03-04,4_EN#44-1126,20230304,4,3,11:39:38,2023-03-04 11:39:38,11:54:45,2023-03-04 11:54:45,CRFv2023,WT,Day4,EN#44
222,2023-03-04,2023-03-04,4_EN#31-1012,20230304,4,1,11:45:14,2023-03-04 11:45:14,12:44:01,2023-03-04 12:44:01,CRFv2023,WT,Day4,EN#31


In [4]:
sessions.describe()

Number of sessions: 224

Arrays present in data with counts:
(224) "A"
(224) "B"
(224) "C"
(224) "D"
(60) "F"

Scalars present in data with counts:
(224) "R"
(224) "S"




In [5]:
def rename_arrays_by_program(session):
    if session.metadata['MSN'] in ['CRF', 'CRF-Cue2-rewire']:
        session.rename_array('B', 'rewarded_nosepoke')
        session.rename_array('C', 'magazine_entry')
        session.rename_array('D', 'reward_retrieval_latency')

    elif session.metadata['MSN'] == 'CRFv2023':
        session.rename_array('B', 'nosepoke')
        session.rename_array('C', 'magazine_entry')
        session.rename_array('D', 'reward_retrieval_latency')
        session.rename_array('F', 'rewarded_nosepoke')

sessions.apply(rename_arrays_by_program)
sessions.describe()

Number of sessions: 224

Arrays present in data with counts:
(224) "A"
(224) "rewarded_nosepoke"
(224) "magazine_entry"
(224) "reward_retrieval_latency"
(60) "nosepoke"

Scalars present in data with counts:
(224) "R"
(224) "S"




In [6]:
event_df = sessions.to_dataframe(include_meta=['Genotype', 'Day', 'Animal', 'Subject'])
event_df

Unnamed: 0,Subject,Genotype,Day,Animal,event,time
0,7_EN#59-0709,MT,Day4,EN#59,reward_retrieval_latency,1.39
1,3_EN#61-0101,MT,Day4,EN#61,reward_retrieval_latency,1.44
2,6_EN#60-0823,WT,Day4,EN#60,reward_retrieval_latency,1.45
3,EN#26_1220,MT,Day4,EN#26,reward_retrieval_latency,1.46
4,EN#26_1220,MT,Day4,EN#26,reward_retrieval_latency,1.48
...,...,...,...,...,...,...
18658,4_EN#31-1012,WT,Day3,EN#31,nosepoke,5385.06
18659,2_EN#81-0824,MT,Day1,EN#81,rewarded_nosepoke,5390.22
18660,EN#49_1224,WT,Day2,EN#49,rewarded_nosepoke,5392.14
18661,2_EN#81-0824,MT,Day1,EN#81,magazine_entry,5396.94
