In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import NuSVR
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the Diabetes dataset
diabetes = load_diabetes()
diabetes_df = pd.DataFrame(data=np.c_[diabetes['data'], diabetes['target']],
                            columns=np.append(diabetes['feature_names'], 'target'))


In [3]:
# Feature Selection
features = diabetes['feature_names']

In [4]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(diabetes_df, test_size=0.2, random_state=42)


In [5]:
# Preprocessing
def preprocess_data(data, features):
    X = data[features]
    y = data['target']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    y = np.array(y).reshape(-1, 1)  # NuSVR expects 2D array for the target
    y_scaled = scaler.fit_transform(y)
    return X_scaled, y_scaled

In [6]:
# Train a Nu-SVR model
def train_nusvr(X_train, y_train):
    nusvr_model = NuSVR(kernel='linear', nu=0.5)
    nusvr_model.fit(X_train, y_train.ravel())
    return nusvr_model

In [7]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = y_pred.reshape(-1, 1)  # Reshape predictions to match the target shape
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, r2

In [8]:
# Cross-validate the model
def cross_validate_nusvr(X, y, folds=5):
    nusvr_model = NuSVR(kernel='linear', nu=0.5)
    scores = cross_val_score(nusvr_model, X, y.ravel(), cv=folds, scoring='neg_mean_squared_error')
    return scores

In [9]:
# Preprocess the training data
X_train_scaled, y_train_scaled = preprocess_data(train_data, features)

In [10]:
# Train a Nu-SVR model
nusvr_model = train_nusvr(X_train_scaled, y_train_scaled)

In [11]:
# Preprocess the test data
X_test_scaled, y_test_scaled = preprocess_data(test_data, features)

In [12]:
# Evaluate the model
mse, r2 = evaluate_model(nusvr_model, X_test_scaled, y_test_scaled)
print(f'Mean Squared Error on the test set: {mse:.2f}')
print(f'R-squared score on the test set: {r2:.2f}')

Mean Squared Error on the test set: 0.55
R-squared score on the test set: 0.45


In [13]:
# Cross-validate the model
X_all_scaled, y_all_scaled = preprocess_data(diabetes_df, features)
cross_val_scores = cross_validate_nusvr(X_all_scaled, y_all_scaled)
print(f'\nCross-Validation Mean Squared Errors: {cross_val_scores}')



Cross-Validation Mean Squared Errors: [-0.4675857  -0.52208899 -0.54907258 -0.51230895 -0.49350211]
