<a href="https://colab.research.google.com/github/kirthana729/EDA-DA1/blob/main/Module6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.manifold import MDS, Isomap
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = "/content/elem94_95.csv"  # Replace with the correct file path
data = pd.read_csv(file_path)

# Select numerical columns for analysis
numerical_columns = ['lunch', 'enrol', 'staff', 'exppp', 'avgsal',
                     'avgben', 'math4', 'story4', 'bs', 'lavgsal',
                     'lenrol', 'lstaff']
data_numeric = data[numerical_columns]

# Standardize the data
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_numeric)

# Dimensionality Reduction Techniques

# 1. Principal Component Analysis (PCA)
print("\nPrincipal Component Analysis (PCA):")
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data_scaled)
print("Explained Variance Ratio:", pca.explained_variance_ratio_)
print("PCA Components:\n", pca.components_)

# 2. Singular Value Decomposition (SVD)
print("\nSingular Value Decomposition (SVD):")
U, S, VT = np.linalg.svd(data_scaled)
print("Singular Values:", S)

# 3. Factor Analysis
print("\nFactor Analysis:")
factor = FactorAnalysis(n_components=2, random_state=42)
data_factor = factor.fit_transform(data_scaled)
print("Factor Loadings:\n", factor.components_)

# 4. Intrinsic Dimensionality Estimation (using PCA)
explained_variance = np.cumsum(pca.explained_variance_ratio_)
intrinsic_dim = np.argmax(explained_variance >= 0.95) + 1  # 95% variance threshold
print("\nIntrinsic Dimensionality (95% Variance):", intrinsic_dim)

# Non-Linear Methods

# 5. Multidimensional Scaling (MDS)
print("\nMultidimensional Scaling (MDS):")
mds = MDS(n_components=2, random_state=42)
data_mds = mds.fit_transform(data_scaled)
print("MDS Results Shape:", data_mds.shape)

# 6. Manifold Learning (Isomap)
print("\nManifold Learning (Isomap):")
isomap = Isomap(n_neighbors=5, n_components=2)
data_isomap = isomap.fit_transform(data_scaled)
print("Isomap Results Shape:", data_isomap.shape)




Principal Component Analysis (PCA):
Explained Variance Ratio: [0.35822487 0.19840339]
PCA Components:
 [[-0.20689666  0.08509308 -0.39963335  0.03293186  0.4398398   0.41032275
   0.14933661  0.13670695 -0.02675931  0.44620752  0.12344638 -0.41387361]
 [ 0.31262986  0.4556952  -0.18327787 -0.32216128 -0.05750379 -0.01124903
  -0.39651867 -0.38051808  0.12704976 -0.05155537  0.4466991  -0.18284366]]

Singular Value Decomposition (SVD):
Singular Values: [89.12908959 66.33093899 62.63127279 48.01331162 43.3801589  29.54987122
 18.7448734  15.57333179 12.35413178  7.7885686   4.86224096  4.28925651]

Factor Analysis:
Factor Loadings:
 [[ 0.28632156 -0.10702247  0.8219671  -0.07983204 -0.94418281 -0.87289259
  -0.15442652 -0.12136259  0.07327442 -0.94720965 -0.18473252  0.8670861 ]
 [ 0.02340106 -0.28014947  0.51189798  0.87296727  0.32057836  0.2924608
  -0.00824924 -0.01302364 -0.04123828  0.27457713 -0.224881    0.49065866]]

Intrinsic Dimensionality (95% Variance): 1

Multidimensional 