# Visualizing Data Distribution with PCA, t-SNE, and UMAP
This notebook loads the dataset `MSL_train.npy` and visualizes it using PCA, t-SNE, and UMAP.

In [ ]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap.umap_ as umap
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load the dataset using a relative path
data = np.load('MSL_train.npy')

# Standardize features before dimensionality reduction
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data)

# 1. PCA
pca = PCA(n_components=2, random_state=42)
X_pca = pca.fit_transform(X_scaled)

# 2. t-SNE
tsne = TSNE(n_components=2, random_state=42, init='random', learning_rate='auto')
X_tsne = tsne.fit_transform(X_scaled)

# 3. UMAP
umap_model = umap.UMAP(n_components=2, random_state=42)
X_umap = umap_model.fit_transform(X_scaled)

fig, axes = plt.subplots(1, 3, figsize=(18, 5))
axes[0].scatter(X_pca[:, 0], X_pca[:, 1], s=2, alpha=0.7)
axes[0].set_title('PCA')
axes[1].scatter(X_tsne[:, 0], X_tsne[:, 1], s=2, alpha=0.7)
axes[1].set_title('t-SNE')
axes[2].scatter(X_umap[:, 0], X_umap[:, 1], s=2, alpha=0.7)
axes[2].set_title('UMAP')
plt.tight_layout()
plt.show()


*Ensure that the required packages (`numpy`, `scikit-learn`, `umap-learn`, `matplotlib`) are installed in your Python environment before running the notebook.*