In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns
from factor_analyzer import FactorAnalyzer
from factor_analyzer.rotator import Promax
from factor_analyzer.factor_analyzer import calculate_kmo


In [None]:
# Function to read the CSV file
survey_df = pd.read_csv('D:\\Assignments_SCMA632\\Data\\Survey.csv')


In [None]:
# Print dataset dimensions, column names, head, and structure
print(survey_df.shape)
print(survey_df.columns)
print(survey_df.head())
print(survey_df.info())


In [None]:
# Check for missing values
print(survey_df.isna().sum())


In [None]:
# Select the relevant columns for analysis
sur_int = survey_df.iloc[:, 19:46]

In [None]:
# Print structure and dimensions of selected data
print(sur_int.info())
print(sur_int.shape)


In [None]:
# Standardize the data before PCA
scaler = StandardScaler()
sur_int_scaled = scaler.fit_transform(sur_int)


In [None]:
# Perform PCA
pca = PCA(n_components=5)
pca_result = pca.fit_transform(sur_int_scaled)


In [None]:
# Print PCA components and explained variance
print(pca.components_)
print(pca.explained_variance_ratio_)


In [None]:
# Biplot of the first two principal components
plt.figure(figsize=(10, 7))
plt.scatter(pca_result[:, 0], pca_result[:, 1])
for i, txt in enumerate(sur_int.columns):
    plt.annotate(txt, (pca.components_[0, i], pca.components_[1, i]))
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.title('PCA Biplot')
plt.grid(True)
plt.show()


In [None]:
# Perform Factor Analysis
fa = FactorAnalyzer(n_factors=5, rotation=None)
fa.fit(sur_int_scaled)

In [None]:
# Get eigenvalues and plot
ev, v = fa.get_eigenvalues()
plt.scatter(range(1, sur_int_scaled.shape[1] + 1), ev)
plt.plot(range(1, sur_int_scaled.shape[1] + 1), ev)
plt.title('Scree Plot')
plt.xlabel('Factors')
plt.ylabel('Eigenvalue')
plt.grid(True)
plt.show()

In [None]:
# Perform Factor Analysis with Promax rotation
fa = FactorAnalyzer(n_factors=5, rotation='promax')
fa.fit(sur_int_scaled)
print(fa.loadings_)


In [None]:
# Factor analysis with omega method
kmo_all, kmo_model = calculate_kmo(sur_int_scaled)
print('KMO: ', kmo_model)

In [None]:
# Summary of PCA
print(pd.DataFrame(pca_result).describe())


In [None]:
# Show structure and dimensions of sur_int
print(sur_int.info())
print(sur_int.shape)
print(sur_int.head())