# cat boost for classification

In [2]:
!pip install catboost

Collecting catboost
  Downloading https://files.pythonhosted.org/packages/15/90/b2b8c7f2ed46071741cefc8f522104abc81068b0231ce7171c78059b6682/catboost-0.21-cp37-none-win_amd64.whl (63.4MB)
Collecting plotly (from catboost)
  Downloading https://files.pythonhosted.org/packages/06/e1/88762ade699460dc3229c890f9845d16484a40955a590b65052f0958613c/plotly-4.5.0-py2.py3-none-any.whl (7.1MB)
Collecting graphviz (from catboost)
  Downloading https://files.pythonhosted.org/packages/f5/74/dbed754c0abd63768d3a7a7b472da35b08ac442cf87d73d5850a6f32391e/graphviz-0.13.2-py2.py3-none-any.whl
Collecting retrying>=1.3.3 (from plotly->catboost)
  Downloading https://files.pythonhosted.org/packages/44/ef/beae4b4ef80902f22e3af073397f079c96969c69b2c7d52a57ea9ae61c9d/retrying-1.3.3.tar.gz
Building wheels for collected packages: retrying
  Building wheel for retrying (setup.py): started
  Building wheel for retrying (setup.py): finished with status 'done'
  Created wheel for retrying: filename=retrying-1.3.3-cp37

In [3]:
def cat_boost_classifier():
    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import RandomizedSearchCV
    from scipy.stats import uniform as sp_randFloat
    from scipy.stats import randint as sp_randInt
    from catboost import CatBoostClassifier

    # load the iris datasets
    dataset = datasets.load_wine()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    model = CatBoostClassifier()
    parameters = {'depth'         : sp_randInt(4, 10),
                  'learning_rate' : sp_randFloat(),
                  'iterations'    : sp_randInt(10, 100)
                 }

    randm = RandomizedSearchCV(estimator=model, param_distributions = parameters,
                               cv = 2, n_iter = 10, n_jobs=-1)
    randm.fit(X_train, y_train)

    # Results from Random Search
    print("\n========================================================")
    print(" Results from Random Search " )
    print("========================================================")
    print("\n The best estimator across ALL searched params:\n",
          randm.best_estimator_)
    print("\n The best score across ALL searched params:\n",
          randm.best_score_)
    print("\n The best parameters across ALL searched params:\n",
          randm.best_params_)
    print("\n ========================================================")

cat_boost_classifier()

0:	learn: 0.7170821	total: 66.8ms	remaining: 5.75s
1:	learn: 0.5790565	total: 78.7ms	remaining: 3.35s
2:	learn: 0.4564394	total: 90.2ms	remaining: 2.52s
3:	learn: 0.3683137	total: 101ms	remaining: 2.1s
4:	learn: 0.3150130	total: 112ms	remaining: 1.84s
5:	learn: 0.2734426	total: 124ms	remaining: 1.67s
6:	learn: 0.2402901	total: 135ms	remaining: 1.55s
7:	learn: 0.2086175	total: 146ms	remaining: 1.44s
8:	learn: 0.1858602	total: 158ms	remaining: 1.37s
9:	learn: 0.1677474	total: 169ms	remaining: 1.3s
10:	learn: 0.1424875	total: 180ms	remaining: 1.24s
11:	learn: 0.1280245	total: 192ms	remaining: 1.2s
12:	learn: 0.1138532	total: 204ms	remaining: 1.16s
13:	learn: 0.1049740	total: 216ms	remaining: 1.13s
14:	learn: 0.0949547	total: 228ms	remaining: 1.1s
15:	learn: 0.0877344	total: 240ms	remaining: 1.06s
16:	learn: 0.0814523	total: 251ms	remaining: 1.03s
17:	learn: 0.0773797	total: 265ms	remaining: 1.02s
18:	learn: 0.0723603	total: 281ms	remaining: 1s
19:	learn: 0.0668466	total: 294ms	remaining: 

# cat boost for Regression

In [4]:
def cat_boost_regressor():
    import warnings
    warnings.filterwarnings("ignore")

    # load libraries
    from sklearn import datasets
    from sklearn.model_selection import train_test_split
    from sklearn.model_selection import GridSearchCV
    from catboost import CatBoostRegressor

    # load the iris datasets
    dataset = datasets.load_boston()
    X = dataset.data; y = dataset.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

    model = CatBoostRegressor()
    parameters = {'depth'         : [6,8,10],
                  'learning_rate' : [0.01, 0.05, 0.1],
                  'iterations'    : [30, 50, 100]
                 }
    grid = GridSearchCV(estimator=model, param_grid = parameters, cv = 2, n_jobs=-1)
    grid.fit(X_train, y_train)

    # Results from Grid Search
    print("\n========================================================")
    print(" Results from Grid Search " )
    print("========================================================")
    print("\n The best estimator across ALL searched params:\n",
          grid.best_estimator_)
    print("\n The best score across ALL searched params:\n",
          grid.best_score_)
    print("\n The best parameters across ALL searched params:\n",
          grid.best_params_)
    print("\n ========================================================")

cat_boost_regressor()

0:	learn: 8.5984183	total: 4.09ms	remaining: 405ms
1:	learn: 8.1133053	total: 6.24ms	remaining: 306ms
2:	learn: 7.6550405	total: 7.91ms	remaining: 256ms
3:	learn: 7.2670334	total: 9.6ms	remaining: 231ms
4:	learn: 6.9222042	total: 11ms	remaining: 209ms
5:	learn: 6.5717330	total: 12.6ms	remaining: 198ms
6:	learn: 6.2671699	total: 14.1ms	remaining: 188ms
7:	learn: 5.9805047	total: 15.6ms	remaining: 179ms
8:	learn: 5.7299902	total: 16.9ms	remaining: 171ms
9:	learn: 5.4616342	total: 18.3ms	remaining: 164ms
10:	learn: 5.2558051	total: 19.7ms	remaining: 159ms
11:	learn: 5.0335481	total: 22.4ms	remaining: 164ms
12:	learn: 4.8395881	total: 23.7ms	remaining: 159ms
13:	learn: 4.6736155	total: 25.2ms	remaining: 155ms
14:	learn: 4.5418971	total: 26.9ms	remaining: 152ms
15:	learn: 4.3692954	total: 28.3ms	remaining: 149ms
16:	learn: 4.2446208	total: 29.6ms	remaining: 145ms
17:	learn: 4.1344792	total: 31.1ms	remaining: 142ms
18:	learn: 4.0270480	total: 33.7ms	remaining: 144ms
19:	learn: 3.8963991	tota