In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder ,MinMaxScaler,StandardScaler,RobustScaler
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error,r2_score,accuracy_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
from sklearn.svm import SVR,  LinearSVR, NuSVR,OneClassSVM
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')



In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/mujahidashraf/data/refs/heads/main/insurance.csv")

In [None]:
le = LabelEncoder()
df['sex'] = le.fit_transform(df['sex'])
df['smoker'] = le.fit_transform(df['smoker'])
df['region'] = le.fit_transform(df['region'])
X = df[['age','sex','bmi','children','smoker','region']]
y = df['expenses']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
linear_svr = LinearSVR(random_state=42)
linear_svr.fit(X_train, y_train)
y_pred = linear_svr.predict(X_test)


mae = mean_absolute_error(y_test , y_pred)
mse = mean_squared_error(y_test , y_pred)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')


MAE: 12081.248308457712
MSE: 292580869.701092
RMSE: 17104.995460423015
R2: -0.9954458552993437
____________________________________________________________________________________________________


In [None]:
param_grid = {
    'C': [0.1, 1, 10, 100,200,500,600,700],       # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2,0.3],  # Tube width parameter
}


grid_search = GridSearchCV(estimator=LinearSVR(random_state=42), param_grid=param_grid, cv=5, scoring='r2')
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

print(f"Best Parameters: {best_params}")

# Predict with the best model
y_pred_tuned = best_model.predict(X_test)

mae = mean_absolute_error(y_test , y_pred_tuned)
mse = mean_squared_error(y_test , y_pred_tuned)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred_tuned)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

Best Parameters: {'C': 700, 'epsilon': 0.1}
MAE: 3248.153815598796
MSE: 37762764.975462615
RMSE: 6145.141574891714
R2: 0.7424522220748295
____________________________________________________________________________________________________


In [None]:
nu_svr = NuSVR()
nu_svr.fit(X_train, y_train)
y_pred_nu = nu_svr.predict(X_test)

mae = mean_absolute_error(y_test , y_pred_nu)
mse = mean_squared_error(y_test , y_pred_nu)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred_nu)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 8431.824855041963
MSE: 149972510.72088766
RMSE: 12246.326417374627
R2: -0.022835242893923136
____________________________________________________________________________________________________


In [None]:
param_grid_nu = {
    'nu': [0.1, 0.3, 0.5],
    'C': [0.1, 1, 10],
    'kernel': ['rbf', 'linear', 'poly'],
    'degree': [2, 3, 4]
}

grid_search_nu = GridSearchCV(NuSVR(), param_grid=param_grid_nu, cv=5, scoring='r2')
grid_search_nu.fit(X_train, y_train)
best_params_nu = grid_search_nu.best_params_
best_nu_svr = grid_search_nu.best_estimator_
print(f"Best Parameters (NuSVR): {best_params_nu}")

y_pred_best_nu = best_nu_svr.predict(X_test)

mae = mean_absolute_error(y_test , y_pred_best_nu)
mse = mean_squared_error(y_test , y_pred_best_nu)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred_best_nu)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

Best Parameters (NuSVR): {'C': 10, 'degree': 2, 'kernel': 'linear', 'nu': 0.5}
MAE: 5132.680597668994
MSE: 79996218.45999084
RMSE: 8944.060512987982
R2: 0.4544136712400585
____________________________________________________________________________________________________


In [None]:
svr = SVR()
svr.fit(X_train, y_train)
y_pred_svr = svr.predict(X_test)

mae = mean_absolute_error(y_test , y_pred_svr)
mse = mean_squared_error(y_test , y_pred_svr)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred_svr)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

MAE: 8233.450210996702
MSE: 158121240.06907204
RMSE: 12574.626836175777
R2: -0.07841081152355556
____________________________________________________________________________________________________


In [None]:
param_grid_svr = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.2],
    'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1],  # Custom gamma values
    'degree': [2, 3, 4]
}

grid_search_svr = GridSearchCV(SVR(), param_grid=param_grid_svr, cv=5, scoring='r2')
grid_search_svr.fit(X_train, y_train)

best_params_svr = grid_search_svr.best_params_
best_svr_model = grid_search_svr.best_estimator_

print(f"Best Parameters (SVR): {best_params_svr}")

y_pred_best_svr = best_svr_model.predict(X_test)

mae = mean_absolute_error(y_test , y_pred_best_svr)
mse = mean_squared_error(y_test , y_pred_best_svr)
rmse = np.sqrt(mse)
r2=r2_score(y_test , y_pred_best_svr)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R2: {r2}")
print(100*'_')

Best Parameters (SVR): {'C': 10, 'degree': 3, 'epsilon': 0.2, 'gamma': 1, 'kernel': 'poly'}
MAE: 2158.0867617006907
MSE: 21790130.6535045
RMSE: 4667.990001435789
R2: 0.8513880078920127
____________________________________________________________________________________________________


In [None]:
clf = OneClassSVM(kernel="rbf", gamma=0.1, nu=0.1)
clf.fit(X_train)

# Step 5: Predict
y_pred_train = clf.predict(X_train)
y_pred_test = clf.predict(X_test)
print(y_pred_train)
print(y_pred_test)



[ 1  1  1  1 -1  1  1  1  1  1 -1  1  1  1  1 -1  1 -1  1  1  1 -1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1
  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1 -1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1 -1  1  1 -1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1 -1  1  1  1  1  1  1
 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1 -1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1
 -1 -1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1 -1  1
  1  1  1  1  1 -1  1  1 -1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1 -1  1  1  1  1  1  1