In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

# 1. Load High-Dimensional Sensor Data
# [cite_start]In autonomous systems, we often deal with n >> m scenarios[cite: 14, 1, 14].
data = pd.read_csv('data/high_dim_sensors.csv')
X = data.values

# [cite_start]2. Pre-processing: Mean Normalization and Scaling [cite: 14, 4-5]
scaler = StandardScaler()
X_norm = scaler.fit_transform(X)

# 3. Principal Component Analysis (PCA) Implementation
# [cite_start]Objective: Reduce 50 sensors to 2 principal components for visualization [cite: 14, 15]
pca = PCA(n_components=2)
X_reduced = pca.fit_transform(X_norm)

# 4. Variance Analysis
# [cite_start]Understanding how much information is retained after compression [cite: 14, 14-15]
explained_variance = pca.explained_variance_ratio_
print(f"Explained Variance Ratio: {explained_variance}")
print(f"Total Variance Retained: {np.sum(explained_variance):.2f}")

# 5. Visualization
plt.figure(figsize=(8, 6))
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], alpha=0.7, c='green', edgecolors='k')
plt.title('Autonomous Systems: 50-D Sensor Data Projected to 2-D PCA Space')
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.grid(alpha=0.3)
plt.show()