# Computing PE for last scene and recent scene models

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
import pickle as pkl
from glob import glob
from tqdm import tqdm
import sys


In [2]:
# https://github.com/mbezdek/extended-event-modeling/blob/main/qualified_valid_.8_.8.txt
valid_runs=['1.3.3',
'2.2.10',
'4.4.3',
'6.1.8',
'2.2.9',
'1.1.6',
'3.4.3',
'1.3.6',
'2.2.1',
'6.3.4',
'1.2.7',
'4.4.2',
'6.2.3',
'4.3.5',
'6.3.8',
'2.4.9',
'2.4.2',
'3.1.3',
'6.1.5',
'1.1.8']

#cache_tag='april_04_grid_lr1E-03_alfa1E-01_lmda1E+05'
cache_tag='july_18_full_.8_.8_1070_1E-03_1E-01_1E+07'

In [3]:
def compute_change_models(runs):    
    # build a dataframe of active SEM models and high-level event annotations
    inputdf=pd.DataFrame()
    # Combine PE for all timepoints
    pe_last_df=pd.DataFrame()
    pe_recent_df=pd.DataFrame()
    # Average PE for each run separately, this matches the SEM calculation averaging by run first
    pe_last_run_df=pd.DataFrame(columns=['pe','run'])
    pe_recent_run_df=pd.DataFrame(columns=['pe','run'])
    for run in tqdm(runs):
        input_file=glob(f'output/run_sem/{cache_tag}/{run}_kinect_trim{cache_tag}_inputdf_*.pkl')[0]
        input_dataframes = pkl.load(open(input_file, 'rb'))
        tempdf=input_dataframes.x_train_pca
        # Compute pe, predicting current scene from 'last scene':
        pe = np.linalg.norm(tempdf.shift(1)-tempdf,axis=1)      
        temp_pe_df = pd.DataFrame({'pe':pe,'run':run})
        pe_last_df = pd.concat([pe_last_df,temp_pe_df],ignore_index=True)
        pe_last_run_df = pd.concat([pe_last_run_df,pd.DataFrame({'pe':[pe_last_df['pe'].mean()],'run':run})],ignore_index=True)
        # Compute pe, predicting current from average of n 'recent scenes':
        pe = np.linalg.norm(tempdf - ((tempdf.shift(1)+tempdf.shift(2)+tempdf.shift(3))/3), axis=1)
        temp_pe_df = pd.DataFrame({'pe':pe,'run':run})
        pe_recent_df = pd.concat([pe_recent_df,temp_pe_df],ignore_index=True)
        pe_recent_run_df = pd.concat([pe_recent_run_df,pd.DataFrame({'pe':[pe_recent_df['pe'].mean()],'run':run})],ignore_index=True)
        # save inputdf pca:
        tempdf['run'] = run
        inputdf=pd.concat([inputdf,tempdf],ignore_index=True)
    
    return inputdf,pe_last_df,pe_recent_df,pe_last_run_df,pe_recent_run_df

In [4]:
inputdf,pe_last_df,pe_recent_df,pe_last_run_df,pe_recent_run_df = compute_change_models(valid_runs)

100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 37.40it/s]


# Compute mean and 95 percent confidence interval

In [5]:
m = pe_last_run_df['pe'].mean()
a = pe_last_run_df['pe']
ci = st.t.interval(0.95, len(a)-1, loc=np.mean(a), scale=st.sem(a))
print(f'Last scene model mean is {m}, 95 percent CI = {ci}')

Last scene model mean is 1.988403749399147, 95 percent CI = (1.9265505277364483, 2.0502569710618457)


In [6]:
m = pe_recent_run_df['pe'].mean()
a = pe_recent_run_df['pe']
ci = st.t.interval(0.95, len(a)-1, loc=np.mean(a), scale=st.sem(a))
print(f'Recent scenes model mean is {m}, 95 percent CI = {ci}')

Recent scenes model mean is 2.2253720333920217, 95 percent CI = (2.168199644947134, 2.2825444218369095)
