In [1]:
import pandas as pd
from questions_columns import sci_af_ca
from factor_analyzer import ConfirmatoryFactorAnalyzer, ModelSpecificationParser
import re



In [2]:
df = pd.read_csv(r"../create_dataset/data_for_research.csv")

In [3]:
f = pd.read_excel("Factors.xlsx")
f = f.dropna(how='all', subset = ['question', 'F'])
f['question_number'] = f.question.str.extract(r'(\d+)')
f['factor_number'] = f.F.str.extract(r'(\d+)')

In [4]:
model_dict = {str(i): [] for i in range(1,6)}
for i, row in f.iterrows():
    model_dict[row['factor_number']].append(f"sci_af_ca_{row['question_number']}")

In [26]:
model_dict

{'1': ['sci_af_ca_5',
  'sci_af_ca_6',
  'sci_af_ca_13',
  'sci_af_ca_15',
  'sci_af_ca_16',
  'sci_af_ca_19',
  'sci_af_ca_20',
  'sci_af_ca_22',
  'sci_af_ca_24',
  'sci_af_ca_25'],
 '2': ['sci_af_ca_1',
  'sci_af_ca_2',
  'sci_af_ca_3',
  'sci_af_ca_7',
  'sci_af_ca_8',
  'sci_af_ca_9',
  'sci_af_ca_10',
  'sci_af_ca_11',
  'sci_af_ca_14',
  'sci_af_ca_17',
  'sci_af_ca_26'],
 '3': ['sci_af_ca_4',
  'sci_af_ca_12',
  'sci_af_ca_18',
  'sci_af_ca_21',
  'sci_af_ca_23',
  'sci_af_ca_27',
  'sci_af_ca_28'],
 '4': ['sci_af_ca_29',
  'sci_af_ca_30',
  'sci_af_ca_31',
  'sci_af_ca_32',
  'sci_af_ca_33',
  'sci_af_ca_34',
  'sci_af_ca_40'],
 '5': ['sci_af_ca_35',
  'sci_af_ca_36',
  'sci_af_ca_37',
  'sci_af_ca_38',
  'sci_af_ca_39']}

In [5]:
df_features  = df[sci_af_ca]
model_spec = ModelSpecificationParser.parse_model_specification_from_dict(df_features, model_dict)

In [22]:
cfa = ConfirmatoryFactorAnalyzer(model_spec)

In [23]:
cfa.fit(df_features.values)



ConfirmatoryFactorAnalyzer(n_obs=237,
                           specification=<factor_analyzer.confirmatory_factor_analyzer.ModelSpecification object at 0x0000018C9789B6A0>)

In [16]:
from factor_analyzer.factor_analyzer import calculate_bartlett_sphericity, calculate_kmo, FactorAnalyzer

In [18]:
chi_square_value,p_value=calculate_bartlett_sphericity(df_features)
kmo_all,kmo_model=calculate_kmo(df_features)
print('Chi-square value:',chi_square_value)
print('P-value:',p_value)
print('KMO Model:',kmo_model)

Chi-square value: 9313.148344339079
P-value: 0.0
KMO Model: 0.961215407671803


In [25]:
cfa.__dict__

{'specification': <factor_analyzer.confirmatory_factor_analyzer.ModelSpecification at 0x18c9789b6a0>,
 'n_obs': 237,
 'is_cov_matrix': False,
 'bounds': None,
 'max_iter': 200,
 'tol': None,
 'impute': 'median',
 'disp': True,
 'cov_': array([[1.75228329, 0.67633392, 0.51577813, ..., 0.44610017, 0.49629589,
         0.56171882],
        [0.67633392, 2.04201606, 1.22015297, ..., 0.82567424, 0.9455981 ,
         1.09967531],
        [0.51577813, 1.22015297, 2.38344398, ..., 1.13068059, 1.17431835,
         1.14369905],
        ...,
        [0.44610017, 0.82567424, 1.13068059, ..., 1.98728476, 1.46443158,
         1.46154144],
        [0.49629589, 0.9455981 , 1.17431835, ..., 1.46443158, 1.81446454,
         1.45739793],
        [0.56171882, 1.09967531, 1.14369905, ..., 1.46154144, 1.45739793,
         2.05580694]]),
 'mean_': array([1.24050633, 1.57383966, 1.86313123, 2.23206751, 2.2742616 ,
        2.16726535, 1.17721519, 1.8264167 , 1.9535865 , 1.27848101,
        1.17299578, 2.2152676

In [24]:
cfa.loadings_

array([[0.45462069, 0.        , 0.        , 0.        , 0.        ],
       [0.8294226 , 0.        , 0.        , 0.        , 0.        ],
       [1.05211291, 0.        , 0.        , 0.        , 0.        ],
       [1.08183858, 0.        , 0.        , 0.        , 0.        ],
       [1.19666567, 0.        , 0.        , 0.        , 0.        ],
       [1.15890849, 0.        , 0.        , 0.        , 0.        ],
       [0.83678197, 0.        , 0.        , 0.        , 0.        ],
       [0.91814784, 0.        , 0.        , 0.        , 0.        ],
       [1.11084853, 0.        , 0.        , 0.        , 0.        ],
       [0.80622513, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.60990279, 0.        , 0.        , 0.        ],
       [0.        , 1.00843487, 0.        , 0.        , 0.        ],
       [0.        , 1.07826527, 0.        , 0.        , 0.        ],
       [0.        , 1.03104985, 0.        , 0.        , 0.        ],
       [0.        , 0.96870557, 0.