# EDA Raman Spectra

Этот ноутбук выполняет базовый анализ спектров:
- загрузка данных (здоровые / больные),
- сводка по размерам,
- средние спектры,
- PCA визуализация.


In [None]:
# параметры
file_path = "Raman_krov_SSZ-zdorovye.xlsx"  # путь к файлу

In [None]:
import pandas as pd
import numpy as np

df_health = pd.read_excel(file_path, sheet_name="health")
df_disease = pd.read_excel(file_path, sheet_name="heart disease")

wavenumber = df_health["wavenumber"].values
X_health = df_health.drop(columns=["wavenumber"]).T.values
X_disease = df_disease.drop(columns=["wavenumber"]).T.values

print("Healthy shape:", X_health.shape)
print("Disease shape:", X_disease.shape)
print("Wavenumber points:", len(wavenumber))

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(wavenumber, X_health.mean(axis=0), label="Healthy mean")
plt.plot(wavenumber, X_disease.mean(axis=0), label="Disease mean")
plt.legend(); plt.xlabel("Wavenumber"); plt.ylabel("Intensity")
plt.title("Mean spectra by class")
plt.show()

plt.figure()
plt.plot(wavenumber, X_health.std(axis=0), label="Healthy std")
plt.plot(wavenumber, X_disease.std(axis=0), label="Disease std")
plt.legend(); plt.xlabel("Wavenumber"); plt.ylabel("Std")
plt.title("Spectral variability by class")
plt.show()

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

X = np.vstack([X_health, X_disease])
y = np.array([0]*X_health.shape[0] + [1]*X_disease.shape[0])

Xn = StandardScaler().fit_transform(X)
Z = PCA(n_components=2, random_state=42).fit_transform(Xn)

plt.figure()
plt.scatter(Z[y==0,0], Z[y==0,1], label="Healthy", alpha=0.7)
plt.scatter(Z[y==1,0], Z[y==1,1], label="Disease", alpha=0.7)
plt.xlabel("PC1"); plt.ylabel("PC2"); plt.legend()
plt.title("PCA scatter (StandardScaler)")
plt.show()