# Import Libraries

In [4]:
import numpy as np
import json

# Import Data
- import pre-processedd data
- import z scores
- import comfort scores


# SVM Info

As other classifiers, SVC, NuSVC and LinearSVC take as input two arrays: an array X of shape (n_samples, n_features) holding the training samples, 
and an array y of class labels (strings or integers), of shape (n_samples):

Support Vector Machine algorithms are not scale invariant, so it is highly recommended to scale your data. For example, scale each attribute on the input vector X to [0,1] or [-1,+1],
or standardize it to have mean 0 and variance 1. Note that the same scaling must be applied to the test vector to obtain meaningful results. This can be done easily by using a Pipeline

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Simulate example dataset
data = pd.DataFrame({
    "Epoch_Features": [np.random.rand(32, 1280) for _ in range(24)],  # 32 channels, 1280 time points (sampling rate = 128 * 10 for 10 sec)
    "Size": [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], 
    "Contrast": [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4],  
    "Comfort_Score": [2, 3, 4, 7, 2, 4, 6, 7, 2, 4, 5, 7, 2, 3, 4, 6, 2, 4, 6, 7, 4, 4, 5, 7],  
    "Z_Score": [2.1, 3, 2.2, 2.2, 2.2, 2.1, 1.5, 1, 3, 3, 4, 2.1, 2.1, 3, 2.2, 2.2, 2.2, 2.1, 1.5, 1, 3, 3, 4, 2.1]  
})

# Filter by Z-Score
filtered_data = data[data['Z_Score'] > 2.0]

# Flatten EEG features
flattened_features = np.array([epoch.flatten() for epoch in filtered_data['Epoch_Features']])

# Combine features
X = np.hstack([
    flattened_features,
    filtered_data[['Size', 'Contrast']].values
])
y = filtered_data['Comfort_Score'].values

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split (to retain indices for Size/Contrast output)
X_train, X_test, y_train, y_test, train_indices, test_indices = train_test_split(
    X_scaled, y, range(len(y)), test_size=0.2, random_state=42
)

# Get the corresponding Size and Contrast for the test set using test_indices
test_data = filtered_data.iloc[test_indices]
test_sizes = test_data['Size'].values
test_contrasts = test_data['Contrast'].values

# Define SVM model
model = SVR(kernel='rbf', C=1.0, gamma='scale')

# Cross-validation prediction
y_pred_cv = cross_val_predict(model, X_scaled, y, cv=10)  # 10-fold cross-validation

# Extract predictions only for the test set
y_pred_test = y_pred_cv[test_indices]

# Print out the Size, Contrast, and Prediction for each test epoch
for i in range(len(y_pred_test)):
    print(f"Prediction: {y_pred_test[i]}, Size: {test_sizes[i]}, Contrast: {test_contrasts[i]}")

# Calculate RMSE for the cross-validation predictions on the test set
rmse_cv = np.sqrt(mean_squared_error(y_test, y_pred_test))
print(f"RMSE (Cross-Validation): {rmse_cv}")

print(f"Training epochs: {len(X_train)}")
print(f"Testing epochs: {len(X_test)}")
