In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import os, shutil

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import glob

import pandas as pd
import numpy as np

import seaborn as sns
from matplotlib import pyplot as plt
# from matplotlib.ticker import AutoMinorLocator
# import matplotlib as mpl

from factor_analyzer import FactorAnalyzer
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo
from copy import deepcopy

from argparse import Namespace

In [12]:
from utils import *
from pca import CustomPCA
from fa import CustomFactorAnalysis as CustomFA

In [13]:
HOME = os.getcwd()
DATA = os.path.join(HOME, '..', 'data')
calib_mode = 'hats'

df = load_data(DATA, calib_mode)

In [14]:
sns.set_theme('paper', 'whitegrid')

In [15]:
p1df = pd.read_csv('../data/p1tha.csv').set_index('PAQ')

# PCA (Bartlett's test of sphericity and KMO test)

In [16]:
def unitary_transform(pca, targets_proj):
    rpca = deepcopy(pca)

    A = rpca.components_[:2, :] @ targets_proj
    B = np.eye(2, 2)  
    M = B @ A.T
    U, S, Vt = np.linalg.svd(M)

    R = U@Vt

    rpca.components_[:2, :] = R @ rpca.components_[:2, :]
    
    return rpca, R

In [17]:
dfx = df.groupby('stimulus_id').mean()    
pca_raw = CustomPCA(n_components=8, whiten=False, center=False)
pca_mean = CustomPCA(n_components=8, whiten=False, center=False)

pca_raw = pca_raw.fit(df[PAQ_CCW])
pca_mean = pca_mean.fit(dfx[PAQ_CCW])

rpca_raw, Rr = unitary_transform(pca_raw, iso_proj)
rpca_mean, Rm = unitary_transform(pca_mean, iso_proj)

for n, pca in [('ur', pca_raw), ('um', pca_mean), ('rr', rpca_raw), ('rm', rpca_mean)]:
    pcm = pca.transform(dfx[PAQ_CCW])
    pcr = pca.transform(df[PAQ_CCW])
    for i in range(pca.n_components):
        dfx[f'{n}{i+1}proj'] = pcm[:, i]
        df[f'{n}{i+1}proj'] = pcr[:, i]

Note that we only use the results from the raw data, but we also do the same analyses on the mean data for reference as well.

In [40]:
statistic_raw, p_value_raw = calculate_bartlett_sphericity(df[PAQ_CCW])
statistic_mean, p_value_mean = calculate_bartlett_sphericity(dfx[PAQ_CCW])
kmo_per_variable_raw, kmo_total_raw = calculate_kmo(df[PAQ_CCW])
kmo_per_variable_mean, kmo_total_mean = calculate_kmo(dfx[PAQ_CCW]) # Gives warning
print(f"Bartlett's test of sphericity: Statistic on raw data (n=675) = {statistic_raw:.4f}, p-val = {p_value_raw:.4f}")
print(f"Bartlett's test of sphericity: Statistic on averaged data (n=27) = {statistic_mean:.4f}, p-val = {p_value_mean:.4f}")

Bartlett's test of sphericity: Statistic on raw data (n=675) = 4780.9290, p-val = 0.0000
Bartlett's test of sphericity: Statistic on averaged data (n=27) = 414.1433, p-val = 0.0000




In [59]:
print(f'  PAQ |   raw  |  mean')
for PAQ, val_raw, val_mean in zip(PAQ_CCW, kmo_per_variable_raw, kmo_per_variable_mean):
    print(f'   {PAQ} | {val_raw:.4f} | {val_mean:.4f}')
print(f'total | {kmo_total_raw:.4f} | {kmo_total_mean:.4f}')

  PAQ |   raw  |  mean
   pl | 0.8800 | 0.7613
   vi | 0.8812 | 0.8988
   ev | 0.6466 | 0.6587
   ch | 0.8180 | 0.7043
   an | 0.8948 | 0.8111
   mo | 0.9072 | 0.8000
   un | 0.6508 | 0.6739
   ca | 0.8531 | 0.7960
total | 0.8217 | 0.7665
