# Calculating mean durations of SEM schemas and scripted actions

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap, LogNorm, Normalize
from matplotlib.offsetbox import AnchoredOffsetbox, TextArea, HPacker, VPacker
from matplotlib import animation
import seaborn as sns
import pickle as pkl
from glob import glob
from sklearn.metrics import adjusted_mutual_info_score
from copy import deepcopy
from tqdm import tqdm
from random import shuffle
from scipy.optimize import linear_sum_assignment
import sys

In [2]:
fps=25 #kinect

# high-level event annotations
annotdf = pd.read_csv("event_annotation_timing_average.csv")

In [3]:
def compute_schemadf(diagfiles):    
    # build a dataframe of active SEM models and high-level event annotations
    schemadf=pd.DataFrame()
    for dfile in tqdm(diagfiles):
        #try:
        #print(i/len(diagfiles)*100)
        run=dfile.split('/')[-1].split('_')[0]
        epoch=dfile.split('/')[-1].split('_')[-1].split('.')[0]
        readout_dataframes=pkl.load(open(dfile,'rb'))
        e_hat=readout_dataframes['e_hat'] # SEM active events

        # find frame index from input df:
        input_file=glob(f'output/run_sem/{cache_tag}/{run}_kinect_trim{cache_tag}_inputdf_*.pkl')[0]
        input_dataframes = pkl.load(open(input_file, 'rb'))
        sec=input_dataframes.x_train.index/fps

        #ev=fulldfpca[fulldfpca['run']==run]['ev']

        tempdf=pd.DataFrame({'run':run,'epoch':epoch,'e_hat':e_hat,'sec':sec})
        tempdf['ev']='none'
        rundf=annotdf[annotdf['run']==run]
        for i in range(len(rundf)):
            ev=rundf.iloc[i]
            start=ev['startsec']
            end=ev['endsec'] 
            tempdf.loc[(tempdf['sec'] >= start) & (tempdf['sec'] <= end), 'ev'] = ev['evname']
        schemadf=pd.concat([schemadf,tempdf])
        #except:
        #    print('error',dfile)
    #factorize event labels for numeric analyses:
    schemadf['ev_fact']=pd.factorize(schemadf['ev'])[0]
    return schemadf

In [4]:
# https://github.com/mbezdek/extended-event-modeling/blob/main/qualified_valid_.8_.8.txt
valid_runs=['1.3.3',
'2.2.10',
'4.4.3',
'6.1.8',
'2.2.9',
'1.1.6',
'3.4.3',
'1.3.6',
'2.2.1',
'6.3.4',
'1.2.7',
'4.4.2',
'6.2.3',
'4.3.5',
'6.3.8',
'2.4.9',
'2.4.2',
'3.1.3',
'6.1.5',
'1.1.8']

#cache_tag='april_04_grid_lr1E-03_alfa1E-01_lmda1E+05'
cache_tag='july_18_full_.8_.8_1070_1E-03_1E-01_1E+07'
diagfiles=glob(f'output/run_sem/{cache_tag}/*_diagnostic*.pkl')

schemadf=compute_schemadf(diagfiles)

# Add factorized instances 
schemadf['ev_instance'] = schemadf['ev'] + ' ' + schemadf['run']
schemadf['ev_fact_instance'] = pd.factorize(schemadf['ev_instance'])[0]

  0%|                                                   | 0/328 [06:47<?, ?it/s]


OSError: [Errno 89] Operation canceled

In [None]:
# Filter to late epoch (101) and validation runs:
testdf=schemadf[(schemadf['epoch'] == '101') & (schemadf['run'].isin(valid_runs))]

# Scripted actions:
s = testdf['ev_fact_instance']
mask = s.ne(s.shift())
ids = s[mask].to_numpy()
counts = s.groupby(mask.cumsum()).cumcount().add(1).groupby(mask.cumsum()).max().to_numpy()
ev_instance_count = pd.Series(counts, index=ids, name='counts')

# SEM schemas:
s = testdf['e_hat']
mask = s.ne(s.shift())
ids = s[mask].to_numpy()
counts = s.groupby(mask.cumsum()).cumcount().add(1).groupby(mask.cumsum()).max().to_numpy()
e_hat_count = pd.Series(counts, index=ids, name='counts')


In [None]:
print(f'Average scripted action length is {ev_instance_count.mean()/3} seconds')

In [None]:
print(f'Average SEM schema length is {e_hat_count.mean()/3} seconds')