# Visualizing Data Segments with PCA
This notebook divides the dataset `MSL_train.npy` into five chronological sections and visualizes each section in a different color using PCA.

In [ ]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load data
data = np.load('MSL_train.npy')

# Standardize features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)

# Apply PCA to reduce dimensions to 2 for visualization
pca = PCA(n_components=2, random_state=42)
data_pca = pca.fit_transform(data_scaled)

n_sections = 5
section_len = len(data_pca) // n_sections
colors = plt.cm.viridis(np.linspace(0, 1, n_sections))
plt.figure(figsize=(8, 6))
for i in range(n_sections):
    start = i * section_len
    end = len(data_pca) if i == n_sections - 1 else (i + 1) * section_len
    plt.scatter(
        data_pca[start:end, 0],
        data_pca[start:end, 1],
        c=[colors[i]],
        s=2,
        alpha=0.7,
        label=f'Section {i + 1}'
    )
plt.legend()
plt.title('Data distribution by time sections (PCA)')
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.tight_layout()
plt.show()


*Ensure that `numpy`, `scikit-learn`, and `matplotlib` are installed before running the notebook.*