In [1]:
!pip install factor-analyzer pandas

Collecting factor-analyzer
  Downloading factor_analyzer-0.5.1.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m564.9 kB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: factor-analyzer
  Building wheel for factor-analyzer (pyproject.toml) ... [?25l[?25hdone
  Created wheel for factor-analyzer: filename=factor_analyzer-0.5.1-py2.py3-none-any.whl size=42564 sha256=e6db1ba615aea0219d8fbb33612b56fa4c3da2dd4bf094aeb3fcecf3fe7be49f
  Stored in directory: /root/.cache/pip/wheels/24/59/82/6493618e30ed1cb7a013b9e1b0c9e17de80b04dfcef4ba8a4d
Successfully built factor-analyzer
Installing collected packages: factor-analyzer
Successfully installed factor-analyzer-0.5.1


# 2.1

In [9]:
from factor_analyzer import FactorAnalyzer
import numpy as np
import pandas as pd

# Đọc dữ liệu
data_path = '/content/places_tf.csv'
data = pd.read_csv(data_path)

# Log transformation trên các cột (trừ cột cuối cùng)
transformed_columns = data.columns[:-1]
data[transformed_columns] = data[transformed_columns].applymap(lambda x: np.log10(x))

# Xác định các biến để thực hiện phân tích nhân tố
variable = data.columns[:-1]
X = data[variable]

# Số factor
m = 3

# Tạo và fit model
fa = FactorAnalyzer(n_factors=m, method='principal', rotation = None )
fa.fit(X)

# Kiểm tra eigenvalues
eigenvalues, _ = fa.get_eigenvalues()
print("Eigenvalues:", eigenvalues)

# Lấy factor loadings
factor_loadings = fa.loadings_

# Hiển thị kết quả dưới dạng bảng
print("Factor Loadings:")
print(pd.DataFrame(factor_loadings, index=variable, columns=[f"Factor {i+1}" for i in range(m)]))

communalities = np.sum(factor_loadings**2, axis=1)
total_communality = np.sum(communalities)

print("Communalities:", communalities)
print("Total Communality:", total_communality)

Eigenvalues: [3.20788406 1.21888092 1.10461342 0.92433502 0.86004479 0.57763963
 0.48214885 0.33014925 0.29430406]
Factor Loadings:
          Factor 1  Factor 2  Factor 3
climate   0.264538  0.104883  0.858217
housing   0.699044  0.144250  0.050406
health    0.709545 -0.436887  0.003515
crime     0.465943  0.532585  0.168383
trans     0.686249 -0.162201 -0.137764
educate   0.489905 -0.499309 -0.193239
arts      0.839382 -0.103141  0.006206
recreate  0.647737  0.308219  0.004496
econ      0.305964  0.575587 -0.529898
Communalities: [0.81751791 0.51201084 0.6943371  0.52910188 0.51622613 0.5266573
 0.71523884 0.51458278 0.70570563]
Total Communality: 5.53137839855572


In [5]:
# Hiển thị ma trận tương quan của các biến
correlation_matrix = X.corr()
print("Correlation Matrix:")
correlation_matrix

Correlation Matrix:


Unnamed: 0,climate,housing,health,crime,trans,educate,arts,recreate,econ
climate,1.0,0.252183,0.131649,0.234255,0.011641,0.078954,0.143408,0.10588,-0.099372
housing,0.252183,1.0,0.411334,0.139176,0.321329,0.201507,0.48901,0.461754,0.298056
health,0.131649,0.411334,1.0,0.161644,0.400603,0.44699,0.611331,0.22867,0.044961
crime,0.234255,0.139176,0.161644,1.0,0.271709,0.052474,0.327249,0.282848,0.276923
trans,0.011641,0.321329,0.400603,0.271709,1.0,0.308652,0.538818,0.392046,0.062694
educate,0.078954,0.201507,0.44699,0.052474,0.308652,1.0,0.316654,0.094467,0.129097
arts,0.143408,0.48901,0.611331,0.327249,0.538818,0.316654,1.0,0.495983,0.134248
recreate,0.10588,0.461754,0.22867,0.282848,0.392046,0.094467,0.495983,1.0,0.175348
econ,-0.099372,0.298056,0.044961,0.276923,0.062694,0.129097,0.134248,0.175348,1.0


In [6]:
# Tính phương sai đặc thù (uniquenesses)
uniquenesses = 1 - communalities

# Tính toán ma trận tương quan tái tạo
reproduced_corr_matrix = factor_loadings @ factor_loadings.T + np.diag(uniquenesses)

# Tính toán ma trận tương quan dư
residual_matrix = correlation_matrix - reproduced_corr_matrix

# Chuyển đổi ma trận tương quan dư thành mảng NumPy
residual_matrix_np = residual_matrix.to_numpy()

# Thêm phương sai đặc thù vào đường chéo của ma trận tương quan dư
np.fill_diagonal(residual_matrix_np, uniquenesses)

# Chuyển đổi trở lại thành DataFrame
residual_matrix_df = pd.DataFrame(residual_matrix_np, index=variable, columns=variable)

print("Residual Correlation Matrix with Uniqueness on the Diagonal:")
residual_matrix_df

Residual Correlation Matrix with Uniqueness on the Diagonal:


Unnamed: 0,climate,housing,health,crime,trans,educate,arts,recreate,econ
climate,0.182482,0.008871,-0.013247,-0.089372,-0.034654,0.167566,-0.073148,-0.101656,0.214087
housing,0.008871,0.487989,-0.021826,-0.271851,-0.128048,-0.059192,-0.083189,-0.03573,0.027856
health,-0.013247,-0.021826,0.305663,0.063124,-0.156701,-0.118082,-0.029332,-0.096288,0.081195
crime,-0.089372,-0.271851,0.063124,0.470898,0.061539,0.122669,-0.009968,-0.18387,-0.082962
trans,-0.034654,-0.128048,-0.156701,0.061539,0.483774,-0.135154,-0.053082,-0.00185,-0.126913
educate,0.167566,-0.059192,-0.118082,0.122669,-0.135154,0.473343,-0.144864,-0.068097,0.164203
arts,-0.073148,-0.083189,-0.029332,-0.009968,-0.053082,-0.144864,0.284761,-0.015954,-0.059918
recreate,-0.101656,-0.03573,-0.096288,-0.18387,-0.00185,-0.068097,-0.015954,0.485417,-0.197861
econ,0.214087,0.027856,0.081195,-0.082962,-0.126913,0.164203,-0.059918,-0.197861,0.294294


# 2.2

In [10]:
from factor_analyzer import FactorAnalyzer, calculate_bartlett_sphericity
import numpy as np
import pandas as pd
from scipy.stats import chi2

# Đọc dữ liệu
data_path = '/content/places_tf.csv'
data = pd.read_csv(data_path)

# Chuyển đổi các cột bằng log10, trừ cột 'id'
transformed_columns = data.columns[:-1]
data[transformed_columns] = data[transformed_columns].applymap(lambda x: np.log10(x))

# Chọn các biến để phân tích
variables = data.columns[:-1]
X = data[variables]

# Số lượng nhân tố cần trích xuất
m = 3

# Tạo và fit model với phương pháp MLE
fa = FactorAnalyzer(n_factors=m, method='ml', rotation=None)
fa.fit(X)

# Kiểm tra eigenvalues
eigenvalues, _ = fa.get_eigenvalues()
print("Eigenvalues:", eigenvalues)

# Lấy factor loadings
factor_loadings = fa.loadings_

# Hiển thị kết quả dưới dạng bảng
factor_loadings_df = pd.DataFrame(factor_loadings, index=variables, columns=[f"Factor {i+1}" for i in range(m)])
print("Factor Loadings:")
print(factor_loadings_df)

# Tính communalities
communalities = np.sum(factor_loadings**2, axis=1)
total_communality = np.sum(communalities)

print("Communalities:", communalities)
print("Total Communality:", total_communality)

# Bartlett's test of sphericity
chi_square_value, p_value = calculate_bartlett_sphericity(X)

# Test: No common factors
df_no_factors = X.shape[1] * (X.shape[1] - 1) / 2
chi_square_no_factors = chi_square_value
p_value_no_factors = chi2.sf(chi_square_no_factors, df_no_factors)
print(f"Test: No Common Factors: DF = {df_no_factors}, Chi-Square = {chi_square_no_factors:.4f}, p-value = {p_value_no_factors:.4f}")

# Test: 3 Factors are sufficient
correlation_matrix = np.corrcoef(X, rowvar=False)
uniquenesses = np.diag(fa.get_uniquenesses())
residual_matrix = np.dot(factor_loadings, factor_loadings.T) + uniquenesses
chi_square_3_factors = (X.shape[0] - 1 - (2 * X.shape[1] + 4 * m - 5) / 6) * np.log(np.linalg.det(residual_matrix) / np.linalg.det(correlation_matrix))
df_3_factors = ((X.shape[1] - m)**2 - X.shape[1] - m)/2

# Tính p-value
p_value_3_factor = chi2.sf(chi_square_3_factors, df_3_factors)
print(f"Test: 3 Factors are sufficient: DF = {df_3_factors}, Chi-Square = {chi_square_3_factors:.4f}, p-value = {p_value:.4f}")

Eigenvalues: [3.20788406 1.21888092 1.10461342 0.92433502 0.86004479 0.57763963
 0.48214885 0.33014925 0.29430406]
Factor Loadings:
          Factor 1  Factor 2  Factor 3
climate   0.252908  0.031210 -0.003257
housing   0.997367 -0.015998 -0.002332
health    0.423219  0.716957 -0.296106
crime     0.145312  0.274142  0.368471
trans     0.330388  0.487917  0.249484
educate   0.208555  0.423197 -0.177128
arts      0.501175  0.646240  0.226077
recreate  0.467759  0.234278  0.460929
econ      0.298108 -0.042619  0.159950
Communalities: [0.06494716 0.99500168 0.78082161 0.23203989 0.40946163 0.25396516
 0.71991403 0.48614021 0.11626855]
Total Communality: 4.058559918253862
Test: No Common Factors: DF = 36.0, Chi-Square = 770.2431, p-value = 0.0000
Test: 3 Factors are sufficient: DF = 12.0, Chi-Square = 84.0472, p-value = 0.0000
