<a href="https://colab.research.google.com/github/solobt777/Python_solobt777/blob/main/Regression_Assignment/Support_Vector_Machine_Regression_Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Regression Assignment
- 1. Domain -- Supervise Learning
- 2. Type - Regression
- 3. Multiple Linear Regression

- kernel - Type of kernel function: 'rbf', 'linear', 'poly', 'sigmoid'
- C - Regularization (higher = fit data more closely, lower = more regularization)
- epsilon - Size of epsilon-tube (predictions within this margin have no penalty)
- gamma - Kernel coefficient affecting decision boundary curvature
- degree - Polynomial degree (only for 'poly' kernel)
- coef0 - Independent term for 'poly' and 'sigmoid' kernels
- shrinking - Heuristic to speed up training
- cache_size - Kernel cache memory in MB

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
from sklearn.preprocessing import StandardScaler
import time




In [None]:
df = pd.read_csv('insurance_pre.csv')
df = pd.get_dummies(df, drop_first=True)

# X = df.drop('charges', axis=1)
# y = df['charges']
X_train = df.drop('charges', axis=1).iloc[X_train.index]
X_test = df.drop('charges', axis=1).iloc[X_test.index]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

df.head()

Unnamed: 0,age,bmi,children,charges,sex_male,smoker_yes
0,19,27.9,0,16884.924,False,True
1,18,33.77,1,1725.5523,True,False
2,28,33.0,3,4449.462,True,False
3,33,22.705,0,21984.47061,True,False
4,32,28.88,0,3866.8552,True,False


In [None]:
# Properly scale y_train and y_test using StandardScaler
sc = StandardScaler()


# Scale features
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)


# Now you can use y_train_scaled for fitting
# And use scaler_y.inverse_transform to get predictions back to original scale

In [None]:
# Define different parameter combinations
param_combinations = [
    # Default RBF kernel
    {'kernel': 'rbf', 'C': 3000, 'epsilon': 0.1, 'gamma': 'scale'},

    # RBF with different C values (regularization)
    {'kernel': 'rbf', 'C': 0.1, 'epsilon': 0.1, 'gamma': 'scale'},
    {'kernel': 'rbf', 'C': 10.0, 'epsilon': 0.1, 'gamma': 'scale'},

    # RBF with different epsilon values (margin of tolerance)
    {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.01, 'gamma': 'scale'},
    {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.5, 'gamma': 'scale'},

    # RBF with different gamma values
    {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1, 'gamma': 'auto'},
    {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1, 'gamma': 0.1},

    # Linear kernel
    {'kernel': 'linear', 'C': 1.0, 'epsilon': 0.1},

    # Polynomial kernel
    {'kernel': 'poly', 'degree': 2, 'C': 1.0, 'epsilon': 0.1, 'coef0': 1.0},
    {'kernel': 'poly', 'degree': 3, 'C': 1.0, 'epsilon': 0.1, 'coef0': 1.0},

    # Sigmoid kernel
    {'kernel': 'sigmoid', 'C': 1.0, 'epsilon': 0.1, 'gamma': 'scale', 'coef0': 0.0},

    # Advanced: shrinking and cache_size
    {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1, 'gamma': 'scale', 'shrinking': False, 'cache_size': 500},
]

In [None]:
results = []
for idx, params in enumerate(param_combinations):
    print(f"\n--- Configuration {idx} ---")
    print(f"Parameters: {params}")

    # Create and train model
    start_time = time.time()
    model = SVR(**params)
    model.fit(X_train, y_train)
    train_time = time.time() - start_time

    # Make predictions
    y_pred_scaled = model.predict(X_test)
    y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).ravel()

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # print(f"Training time: {train_time:.4f}s")
    # print(f"Support vectors: {len(model.support_)}")
    # print(f"MSE: {mse:.4f}")
    # print(f"MAE: {mae:.4f}")
    print(f"R² Score: {r2:.4f}")

    results.append({
        'config': idx,
        'kernel': params.get('kernel', 'rbf'),
        'C': params.get('C', 1.0),
        'epsilon': params.get('epsilon', 0.1),
        'gamma': params.get('gamma', 'scale'),
        'mse': mse,
        'mae': mae,
        'r2': r2,
        'train_time': train_time,
        'n_support': len(model.support_)
    })



--- Configuration 0 ---
Parameters: {'kernel': 'rbf', 'C': 3000, 'epsilon': 0.1, 'gamma': 'scale'}
R² Score: -247231295.6832

--- Configuration 1 ---
Parameters: {'kernel': 'rbf', 'C': 0.1, 'epsilon': 0.1, 'gamma': 'scale'}
R² Score: -85221683.8562

--- Configuration 2 ---
Parameters: {'kernel': 'rbf', 'C': 10.0, 'epsilon': 0.1, 'gamma': 'scale'}
R² Score: -82564963.5655

--- Configuration 3 ---
Parameters: {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.01, 'gamma': 'scale'}
R² Score: -85031298.9455

--- Configuration 4 ---
Parameters: {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.5, 'gamma': 'scale'}
R² Score: -85031298.9455

--- Configuration 5 ---
Parameters: {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1, 'gamma': 'auto'}
R² Score: -85031298.9455

--- Configuration 6 ---
Parameters: {'kernel': 'rbf', 'C': 1.0, 'epsilon': 0.1, 'gamma': 0.1}
R² Score: -85100145.2597

--- Configuration 7 ---
Parameters: {'kernel': 'linear', 'C': 1.0, 'epsilon': 0.1}
R² Score: -83262125.9485

--- Configuration 8 ---

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

r2

0.7810112863132487