In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor

In [4]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
df = pd.DataFrame(
    diabetes.data, columns=diabetes.feature_names
)
df.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,0.038076,0.05068,0.061696,0.021872,-0.044223,-0.034821,-0.043401,-0.002592,0.019908,-0.017646
1,-0.001882,-0.044642,-0.051474,-0.026328,-0.008449,-0.019163,0.074412,-0.039493,-0.06833,-0.092204
2,0.085299,0.05068,0.044451,-0.005671,-0.045599,-0.034194,-0.032356,-0.002592,0.002864,-0.02593
3,-0.089063,-0.044642,-0.011595,-0.036656,0.012191,0.024991,-0.036038,0.034309,0.022692,-0.009362
4,0.005383,-0.044642,-0.036385,0.021872,0.003935,0.015596,0.008142,-0.002592,-0.031991,-0.046641


In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    diabetes.data, diabetes.target, test_size=0.1, random_state=2022
)

In [6]:
lr = LinearRegression()
sv = SVR()
rf = RandomForestRegressor()
xgb = XGBRegressor()

In [7]:
lr.fit(X_train, y_train)
sv.fit(X_train, y_train)
rf.fit(X_train, y_train)
xgb.fit(X_train, y_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=16,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [8]:
from sklearn.metrics import r2_score, mean_squared_error
pred_lr = lr.predict(X_test)
pred_sv = sv.predict(X_test)
pred_rf = rf.predict(X_test)
pred_xgb = rf.predict(X_test)

In [9]:
r2_lr = r2_score(y_test, pred_lr)
r2_sv = r2_score(y_test, pred_sv)
r2_rf = r2_score(y_test, pred_rf)
r2_xgb = r2_score(y_test, pred_xgb)

mse_lr = mean_squared_error(y_test, pred_lr)
mse_sv = mean_squared_error(y_test, pred_sv)
mse_rf = mean_squared_error(y_test, pred_rf)
mse_xgb = mean_squared_error(y_test, pred_xgb)

In [11]:
predict = pd.DataFrame({
    'target':y_test, 'LR':pred_lr, 'SVR':pred_sv, 'RF':pred_rf, 'XGB':pred_xgb
})
predict.head()

Unnamed: 0,target,LR,SVR,RF,XGB
0,55.0,67.62636,129.028779,129.61,129.61
1,96.0,56.43958,118.128113,73.16,73.16
2,317.0,221.344882,149.680325,193.23,193.23
3,40.0,137.305152,133.980151,128.44,128.44
4,178.0,168.678201,149.156143,179.74,179.74


In [14]:
print('Comparison')
print(f'R2_score\nLR:{r2_lr:.4f}\tSVR:{r2_sv:.4f}\tRF:{r2_rf:.4f}\tXGB:{r2_xgb:.4f}')
print(f'MSE\nLR:{mse_lr:.4f}\tSVR:{mse_sv:.4f}\tRF:{mse_rf:.4f}\tXGB:{mse_xgb:.4f}')

Comparison
R2_score
LR:0.5944	SVR:0.1632	RF:0.4673	XGB:0.4673
MSE
LR:2391.6049	SVR:4933.6951	RF:3140.7733	XGB:3140.7733
