In [2]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
# Load the Diabetes dataset
diabetes = load_diabetes()
diabetes_df = pd.DataFrame(data=np.c_[diabetes['data'], diabetes['target']],
                            columns=np.append(diabetes['feature_names'], 'target'))

In [4]:
# Feature Selection
features = diabetes['feature_names']

In [5]:
# Split the data into training and testing sets
train_data, test_data = train_test_split(diabetes_df, test_size=0.2, random_state=42)

In [6]:
# Preprocessing
def preprocess_data(data, features):
    X = data[features]
    y = data['target']
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    y = np.array(y).reshape(-1, 1)  # SVR expects 2D array for the target
    y_scaled = scaler.fit_transform(y)
    return X_scaled, y_scaled

In [7]:
# Train an SVR model
def train_svr(X_train, y_train):
    svr_model = SVR(kernel='linear', C=1)
    svr_model.fit(X_train, y_train)
    return svr_model

In [8]:
# Evaluate the model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred = y_pred.reshape(-1, 1)  # Reshape predictions to match the target shape
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, r2

In [9]:
# Cross-validate the model
def cross_validate_svr(X, y, folds=5):
    svr_model = SVR(kernel='linear', C=1)
    scores = cross_val_score(svr_model, X, y.ravel(), cv=folds, scoring='neg_mean_squared_error')
    return scores

In [10]:
# Preprocess the training data
X_train_scaled, y_train_scaled = preprocess_data(train_data, features)

In [11]:
# Train an SVR model
svr_model = train_svr(X_train_scaled, y_train_scaled.ravel())

In [13]:
# Preprocess the test data
X_test_scaled, y_test_scaled = preprocess_data(test_data, features)

In [14]:
# Evaluate the model
mse, r2 = evaluate_model(svr_model, X_test_scaled, y_test_scaled)
print(f'Mean Squared Error on the test set: {mse:.2f}')
print(f'R-squared score on the test set: {r2:.2f}')

Mean Squared Error on the test set: 0.55
R-squared score on the test set: 0.45


In [15]:
# Cross-validate the model
X_all_scaled, y_all_scaled = preprocess_data(diabetes_df, features)
cross_val_scores = cross_validate_svr(X_all_scaled, y_all_scaled)
print(f'\nCross-Validation Mean Squared Errors: {cross_val_scores}')


Cross-Validation Mean Squared Errors: [-0.45176582 -0.5244183  -0.55535588 -0.53405758 -0.49162234]
