In [1]:
import sys
import numpy as np
import pandas as pd

from sklearn.datasets import load_digits
from sklearn.decomposition import FactorAnalysis

In [6]:
X_load = np.random.randint(1,5,(1000,100))
X = pd.DataFrame(X_load)
X.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,2,4,4,4,2,4,2,1,2,1,...,4,4,1,1,3,2,2,4,1,3
1,2,3,3,2,4,1,2,4,1,4,...,2,4,4,2,3,2,4,2,3,2
2,1,3,1,1,1,3,3,2,4,3,...,3,4,3,4,4,4,1,4,1,4
3,2,2,2,3,4,1,4,4,2,3,...,3,3,2,4,2,4,1,4,3,1
4,2,2,4,2,1,3,3,2,4,2,...,4,2,1,2,2,3,4,1,1,3


In [3]:
analysis = FactorAnalysis(n_components=6)
transformed = pd.DataFrame(analysis.fit_transform(X))
transformed.head()

Unnamed: 0,0,1,2,3,4,5
0,0.526543,-0.33842,-0.448368,0.30086,-0.189331,0.511701
1,0.378077,-0.099612,0.053779,-0.012358,-0.286883,0.81299
2,2.335834,-0.617197,-0.163898,-0.5431,-0.038211,-0.224474
3,-0.199622,1.620684,0.373131,0.214629,0.465672,-0.330972
4,-0.742452,0.138031,0.067242,0.393575,-0.311945,0.210839


In [4]:
"""
Factor analysis using MINRES or ML,
with optional rotation using Varimax or Promax.

:author: Jeremy Biggs (jbiggs@ets.org)
:date: 10/25/2017
:organization: ETS
"""

import warnings

import numpy as np
import scipy as sp
import pandas as pd

from scipy.stats import chi2, pearsonr
from scipy.optimize import minimize

from sklearn.base import BaseEstimator, TransformerMixin

from factor_analyzer.utils import (corr,
                                   impute_values,
                                   partial_correlations,
                                   smc)
from factor_analyzer.rotator import Rotator
from factor_analyzer.rotator import POSSIBLE_ROTATIONS, OBLIQUE_ROTATIONS


from sklearn.utils.extmath import randomized_svd
from sklearn.utils import check_array
from sklearn.utils.validation import check_is_fitted


POSSIBLE_IMPUTATIONS = ['mean', 'median', 'drop']

POSSIBLE_METHODS = ['ml', 'mle', 'uls', 'minres', 'principal']


def calculate_kmo(x):
    """
    Calculate the Kaiser-Meyer-Olkin criterion
    for items and overall. This statistic represents
    the degree to which each observed variable is
    predicted, without error, by the other variables
    in the dataset. In general, a KMO < 0.6 is considered
    inadequate.

    Parameters
    ----------
    x : array-like
        The array from which to calculate KMOs.

    Returns
    -------
    kmo_per_variable : numpy array
        The KMO score per item.
    kmo_total : float
        The KMO score overall.
    """

    # calculate the partial correlations
    partial_corr = partial_correlations(x)

    # calcualte the pair-wise correlations
    x_corr = corr(x)

    # fill matrix diagonals with zeros
    # and square all elements
    np.fill_diagonal(x_corr, 0)
    np.fill_diagonal(partial_corr, 0)

    partial_corr = partial_corr**2
    x_corr = x_corr**2

    # calculate KMO per item
    partial_corr_sum = np.sum(partial_corr, axis=0)
    corr_sum = np.sum(x_corr, axis=0)
    kmo_per_item = corr_sum / (corr_sum + partial_corr_sum)

    # calculate KMO overall
    corr_sum_total = np.sum(x_corr)
    partial_corr_sum_total = np.sum(partial_corr)
    kmo_total = corr_sum_total / (corr_sum_total + partial_corr_sum_total)
    return kmo_per_item, kmo_total


In [5]:
kmo_per_item, kmo_total = calculate_kmo(X)
print(kmo_per_item)
print(kmo_total)

[0.44614781 0.45551409 0.45679191 0.49585853 0.4750126  0.48798351
 0.4829226  0.45644812 0.48743424 0.4745612  0.48782277 0.43788245
 0.47217048 0.46187206 0.44116425 0.5226065  0.46808566 0.47792646
 0.46940553 0.46387614 0.47950636 0.48675037 0.45187976 0.50684477
 0.48881365 0.42919886 0.445915   0.51656755 0.51690057 0.45603665
 0.43624149 0.43985226 0.47052643 0.43728097 0.49046798 0.48211519
 0.45300306 0.49381324 0.47301107 0.46042152 0.51321001 0.44667276
 0.51680631 0.44697677 0.4265617  0.47986522 0.46280774 0.4891087
 0.43123981 0.4520366  0.4637498  0.45745468 0.43254125 0.45081727
 0.48152002 0.47841148 0.44269902 0.48510523 0.42316827 0.4623309
 0.46168909 0.50094191 0.45065914 0.46926844 0.46587476 0.44984673
 0.49149428 0.50806232 0.4875381  0.48509003 0.5176     0.48807497
 0.49323756 0.49146787 0.43453942 0.45128684 0.47526069 0.46633743
 0.4888387  0.47597508 0.45376336 0.47828944 0.46716672 0.46043578
 0.49652382 0.46314809 0.4487402  0.51215954 0.481356   0.451481