# Factor Analysis: World Values Survey - Cultural Dimensions\n## Global values, beliefs, and attitudes across countries revealing Post-Materialism and Social Trust\n\n**Focus:** Complex dimensionality reduction and cultural/psychological factor identification

In [None]:
import numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nfrom factor_analyzer import FactorAnalyzer, calculate_bartlett_sphericity, calculate_kmo\nfrom sklearn.preprocessing import StandardScaler\nimport warnings\nwarnings.filterwarnings('ignore')\n\nsns.set_style('whitegrid')\nplt.rcParams['figure.figsize'] = (14, 8)\nprint('Ready!')

## 1. Data Loading\n\nThis notebook demonstrates FA methodology. Replace with your actual dataset.

In [None]:
# Create synthetic data for demonstration\nnp.random.seed(42)\nn_samples = 2000\nn_vars = 30\nn_factors = 4\n\n# Generate latent factors\nfactors = np.random.randn(n_samples, n_factors)\n\n# Create loadings\nloadings_true = np.random.randn(n_vars, n_factors) * 0.5\nfor i in range(n_factors):\n    start_idx = i * (n_vars // n_factors)\n    end_idx = (i + 1) * (n_vars // n_factors)\n    loadings_true[start_idx:end_idx, i] += 0.7\n\n# Generate observed data\nnoise = np.random.randn(n_samples, n_vars) * 0.4\nX = factors @ loadings_true.T + noise\n\ndf = pd.DataFrame(X, columns=[f'V{i+1}' for i in range(n_vars)])\nprint(f'Dataset: {df.shape}')\ndisplay(df.head())

## 2. Exploratory Analysis

In [None]:
# Correlation matrix\ncorr = df.corr()\nplt.figure(figsize=(12, 10))\nsns.heatmap(corr, cmap='coolwarm', center=0, square=True)\nplt.title('Correlation Matrix')\nplt.tight_layout()\nplt.show()

## 3. Suitability Tests

In [None]:
chi2, p = calculate_bartlett_sphericity(df)\nkmo_all, kmo_model = calculate_kmo(df)\n\nprint(f'Bartlett: p={p:.6f}')\nprint(f'KMO: {kmo_model:.3f}')\nprint(f'Suitable: {p < 0.05 and kmo_model > 0.6}')

## 4. Determine Number of Factors

In [None]:
scaler = StandardScaler()\ndf_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)\n\ncorr_matrix = df_scaled.corr()\neigenvalues = np.linalg.eigvalsh(corr_matrix)\neigenvalues = sorted(eigenvalues, reverse=True)\n\n# Scree plot\nplt.figure(figsize=(12, 6))\nplt.plot(range(1, len(eigenvalues)+1), eigenvalues, 'bo-')\nplt.axhline(y=1, color='r', linestyle='--', label='Kaiser criterion')\nplt.xlabel('Factor')\nplt.ylabel('Eigenvalue')\nplt.title('Scree Plot')\nplt.legend()\nplt.grid(True, alpha=0.3)\nplt.show()\n\nn_factors_suggested = sum(eigenvalues > 1)\nprint(f'Suggested factors: {n_factors_suggested}')

## 5. Factor Analysis

In [None]:
fa = FactorAnalyzer(n_factors=n_factors_suggested, rotation='varimax')\nfa.fit(df_scaled)\n\nloadings_df = pd.DataFrame(fa.loadings_, index=df.columns,\n                          columns=[f'Factor_{i+1}' for i in range(n_factors_suggested)])\n\nprint('Factor Loadings:')\ndisplay(loadings_df.round(3))\n\n# Heatmap\nplt.figure(figsize=(12, 10))\nsns.heatmap(loadings_df, annot=True, cmap='RdBu_r', center=0, fmt='.2f')\nplt.title('Factor Loadings')\nplt.tight_layout()\nplt.show()

## 6. Interpretation

In [None]:
variance = fa.get_factor_variance()\nprint(f'Total variance explained: {variance[2][-1]*100:.1f}%')\n\nfor i in range(n_factors_suggested):\n    print(f'\\nFactor {i+1}: {variance[1][i]*100:.1f}% variance')\n    high_loaders = loadings_df.iloc[:, i].abs().sort_values(ascending=False).head(5)\n    for var, loading in high_loaders.items():\n        print(f'  {var}: {loadings_df.loc[var].iloc[i]:.3f}')

## 7. Factor Scores

In [None]:
factor_scores = fa.transform(df_scaled)\nscores_df = pd.DataFrame(factor_scores, \n                        columns=[f'Factor_{i+1}' for i in range(n_factors_suggested)])\n\nprint('Factor Scores:')\ndisplay(scores_df.describe())\n\nif n_factors_suggested >= 2:\n    plt.figure(figsize=(10, 8))\n    plt.scatter(scores_df.iloc[:, 0], scores_df.iloc[:, 1], alpha=0.3)\n    plt.xlabel(f'Factor 1')\n    plt.ylabel(f'Factor 2')\n    plt.title('Factor Scores Distribution')\n    plt.grid(True, alpha=0.3)\n    plt.show()

## Summary\n\nThis notebook demonstrated Factor Analysis methodology on World Values Survey - Cultural Dimensions.\n\nKey steps:\n1. Data preparation and cleaning\n2. Suitability assessment (Bartlett, KMO)\n3. Factor number determination (scree plot, parallel analysis)\n4. Factor extraction with rotation\n5. Interpretation and naming\n6. Factor score calculation\n\nReplace synthetic data with real dataset for actual analysis.