In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor, XGBClassifier, XGBRFRegressor, XGBRFClassifier
from lightgbm import LGBMRegressor,LGBMClassifier
from catboost import CatBoostRegressor,CatBoostClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as MSE
import warnings
warnings.filterwarnings('ignore')

In [2]:
X, y = load_diabetes(return_X_y=True)

In [3]:
kfold = KFold(n_splits=5, shuffle=True, random_state=0)

## Regression

In [4]:
def regression_model(model):
    # Obtain scores of cross-validation using 10 splits and mean squared error
    scores = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=kfold)

    # Take square root of the scores
    rmse = (-scores)**0.5

    # Return mean score
    return rmse.mean()

In [5]:
regression_model(LinearRegression())

54.527072461271416

In [6]:
regression_model(Lasso())

62.162444800807904

In [7]:
regression_model(Ridge())

58.357301395247546

In [8]:
regression_model(RandomForestRegressor())

57.76574055785875

In [9]:
regression_model(XGBRegressor(booster='gblinear'))

54.51729900480238

In [10]:
regression_model(XGBRegressor(booster='gbtree'))

62.410594913347246

In [11]:
regression_model(XGBRegressor(booster='dart'))

62.4105945214063

In [12]:
regression_model(XGBRegressor(num_parallel_tree=25))

62.43212372005752

In [13]:
regression_model(LGBMRegressor())

57.69405016579075

In [14]:
regression_model(LGBMRegressor(boosting_type='goss'))

56.01286579552364

In [15]:
regression_model(LGBMRegressor(boosting_type='rf'))

nan

In [16]:
regression_model(CatBoostRegressor())

Learning rate set to 0.032427
0:	learn: 77.2301931	total: 144ms	remaining: 2m 23s
1:	learn: 76.1167619	total: 147ms	remaining: 1m 13s
2:	learn: 75.0878579	total: 149ms	remaining: 49.6s
3:	learn: 74.1413601	total: 152ms	remaining: 37.7s
4:	learn: 73.2458988	total: 154ms	remaining: 30.7s
5:	learn: 72.4287346	total: 157ms	remaining: 26s
6:	learn: 71.6074244	total: 159ms	remaining: 22.6s
7:	learn: 70.8223454	total: 161ms	remaining: 20s
8:	learn: 70.0099643	total: 163ms	remaining: 18s
9:	learn: 69.2367242	total: 165ms	remaining: 16.4s
10:	learn: 68.4539074	total: 167ms	remaining: 15.1s
11:	learn: 67.7438959	total: 169ms	remaining: 13.9s
12:	learn: 67.0746252	total: 171ms	remaining: 13s
13:	learn: 66.5705214	total: 173ms	remaining: 12.1s
14:	learn: 65.9825608	total: 174ms	remaining: 11.4s
15:	learn: 65.3864625	total: 175ms	remaining: 10.8s
16:	learn: 64.7486786	total: 177ms	remaining: 10.2s
17:	learn: 64.1239684	total: 178ms	remaining: 9.7s
18:	learn: 63.6515519	total: 179ms	remaining: 9.23s

57.81780366543895

In [17]:
regression_model(CatBoostRegressor(boosting_type='Plain'))

Learning rate set to 0.032427
0:	learn: 77.2301931	total: 1.55ms	remaining: 1.55s
1:	learn: 76.1167619	total: 3.07ms	remaining: 1.53s
2:	learn: 75.0878579	total: 4.16ms	remaining: 1.38s
3:	learn: 74.1413601	total: 5.25ms	remaining: 1.31s
4:	learn: 73.2458988	total: 6.47ms	remaining: 1.29s
5:	learn: 72.4287346	total: 7.58ms	remaining: 1.25s
6:	learn: 71.6074244	total: 8.75ms	remaining: 1.24s
7:	learn: 70.8223454	total: 9.96ms	remaining: 1.23s
8:	learn: 70.0099643	total: 10.9ms	remaining: 1.2s
9:	learn: 69.2367242	total: 11.8ms	remaining: 1.16s
10:	learn: 68.4539074	total: 12.6ms	remaining: 1.14s
11:	learn: 67.7438959	total: 13.5ms	remaining: 1.11s
12:	learn: 67.0746252	total: 14.3ms	remaining: 1.09s
13:	learn: 66.5705214	total: 15.2ms	remaining: 1.07s
14:	learn: 65.9825608	total: 16ms	remaining: 1.05s
15:	learn: 65.3864625	total: 16.9ms	remaining: 1.04s
16:	learn: 64.7486786	total: 17.8ms	remaining: 1.03s
17:	learn: 64.1239684	total: 18.8ms	remaining: 1.02s
18:	learn: 63.6515519	total: 

57.81780366543895

## Classification

In [18]:
df_census = pd.read_csv('census_cleaned.csv')
X_census = df_census.iloc[:, :-1]
y_census = df_census.iloc[:, -1]

In [19]:
def classification_model(model):
    # Obtain scores of cross-validation using 10 splits and mean squared error
    scores = cross_val_score(model, X_census, y_census, scoring='accuracy', cv=kfold)

    # Return mean score
    return scores.mean()

In [21]:
classification_model(LogisticRegression())

0.7972729696532092

In [22]:
classification_model(RandomForestClassifier())

0.8555635091563236

In [23]:
classification_model(XGBClassifier(booster='gbtree'))



0.8728848341872293

In [24]:
classification_model(XGBClassifier(booster='dart'))



0.8728848341872293

In [25]:
classification_model(XGBClassifier(booster='gblinear'))



0.8508339161782276

In [27]:
classification_model(LGBMClassifier())

0.8740519245010263

In [28]:
classification_model(LGBMClassifier(boosting_type='goss'))

0.8703665575671563

In [35]:
classification_model(LGBMClassifier(begging_fraction=1,boosting_type='rf'))

nan

In [34]:
classification_model(CatBoostClassifier())

Learning rate set to 0.041445
0:	learn: 0.6529395	total: 9.9ms	remaining: 9.89s
1:	learn: 0.6167712	total: 17.4ms	remaining: 8.66s
2:	learn: 0.5821931	total: 24.8ms	remaining: 8.24s
3:	learn: 0.5536346	total: 31.9ms	remaining: 7.93s
4:	learn: 0.5270338	total: 39.1ms	remaining: 7.77s
5:	learn: 0.5044125	total: 46.4ms	remaining: 7.69s
6:	learn: 0.4830628	total: 53.5ms	remaining: 7.59s
7:	learn: 0.4672588	total: 60.6ms	remaining: 7.51s
8:	learn: 0.4520490	total: 67.5ms	remaining: 7.43s
9:	learn: 0.4369097	total: 74.5ms	remaining: 7.38s
10:	learn: 0.4245415	total: 81.5ms	remaining: 7.33s
11:	learn: 0.4141656	total: 88.3ms	remaining: 7.27s
12:	learn: 0.4046016	total: 94.9ms	remaining: 7.2s
13:	learn: 0.3964742	total: 101ms	remaining: 7.14s
14:	learn: 0.3894544	total: 108ms	remaining: 7.11s
15:	learn: 0.3825403	total: 115ms	remaining: 7.07s
16:	learn: 0.3765838	total: 122ms	remaining: 7.06s
17:	learn: 0.3702204	total: 129ms	remaining: 7.04s
18:	learn: 0.3649580	total: 136ms	remaining: 7.02s


0.8750961411141052

In [None]:
classification_model(CatBoostClassifier(boosting_type='Plain'))

Learning rate set to 0.041445
0:	learn: 0.6529395	total: 8.47ms	remaining: 8.46s
1:	learn: 0.6167712	total: 15.8ms	remaining: 7.87s
2:	learn: 0.5821931	total: 23.3ms	remaining: 7.73s
3:	learn: 0.5536346	total: 30ms	remaining: 7.47s
4:	learn: 0.5270338	total: 37.6ms	remaining: 7.48s
5:	learn: 0.5044125	total: 44.5ms	remaining: 7.37s
6:	learn: 0.4830628	total: 51.6ms	remaining: 7.31s
7:	learn: 0.4672588	total: 58.4ms	remaining: 7.24s
8:	learn: 0.4520490	total: 65.4ms	remaining: 7.2s
9:	learn: 0.4369097	total: 72.4ms	remaining: 7.17s
10:	learn: 0.4245415	total: 78.9ms	remaining: 7.1s
11:	learn: 0.4141656	total: 85.8ms	remaining: 7.07s
12:	learn: 0.4046016	total: 92.9ms	remaining: 7.05s
13:	learn: 0.3964742	total: 100ms	remaining: 7.06s
14:	learn: 0.3894544	total: 107ms	remaining: 7.05s
15:	learn: 0.3825403	total: 115ms	remaining: 7.05s
16:	learn: 0.3765838	total: 122ms	remaining: 7.05s
17:	learn: 0.3702204	total: 129ms	remaining: 7.04s
18:	learn: 0.3649580	total: 136ms	remaining: 7.02s
19