In [54]:
from catboost import CatBoostClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import pandas as pd

## Load data from CSV

In [55]:
data = pd.read_csv('datasets/social-network-ads-dataset/Social_Network_Ads.csv')
X = data.iloc[:, 1:4]
y = data.iloc[:, 4]

## Split dataset into training and testing sets

In [56]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Data preprocessing

In [57]:
lb_encoder = LabelEncoder()
X_train_encoded = X_train
X_test_encoded = X_test
X_train_encoded.iloc[:, 0] = lb_encoder.fit_transform(X_train_encoded.iloc[:, 0])
X_test_encoded.iloc[:, 0] = lb_encoder.fit_transform(X_test_encoded.iloc[:, 0])

sc_scaler = StandardScaler()
X_train_scaled = sc_scaler.fit_transform(X_train_encoded)
X_test_scaled = sc_scaler.fit_transform(X_test_encoded)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


## Fit LightGBM Classifier

In [58]:
xg_boost_classifier = CatBoostClassifier(iterations=15,
                                         learning_rate = 0.01,
                                         depth = 5)
xg_boost_classifier.fit(X_train_scaled, y_train)

0:	learn: 0.6882216	total: 1.42ms	remaining: 19.9ms
1:	learn: 0.6829541	total: 3.31ms	remaining: 21.5ms
2:	learn: 0.6784713	total: 4.8ms	remaining: 19.2ms
3:	learn: 0.6732795	total: 6.17ms	remaining: 17ms
4:	learn: 0.6686383	total: 7.6ms	remaining: 15.2ms
5:	learn: 0.6638580	total: 9.3ms	remaining: 13.9ms
6:	learn: 0.6592573	total: 10.9ms	remaining: 12.4ms
7:	learn: 0.6550542	total: 12.5ms	remaining: 10.9ms
8:	learn: 0.6506416	total: 14ms	remaining: 9.32ms
9:	learn: 0.6461100	total: 15.4ms	remaining: 7.7ms
10:	learn: 0.6424343	total: 16.9ms	remaining: 6.13ms
11:	learn: 0.6383960	total: 18ms	remaining: 4.49ms
12:	learn: 0.6339691	total: 19.3ms	remaining: 2.97ms
13:	learn: 0.6307781	total: 20.7ms	remaining: 1.48ms
14:	learn: 0.6265923	total: 21.7ms	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7f8a06d935c0>

## Evaluate model

In [59]:
predictions = xg_boost_classifier.predict(X_test_scaled)
print('Training Accuracy = {} \n'.format(xg_boost_classifier.score(X_train_scaled, y_train) * 100))
print('Testing Accuracy = {} \n'.format(xg_boost_classifier.score(X_test_scaled, y_test) * 100))
print('Confusion Matrix : \n {} \n'.format(confusion_matrix(y_true=y_test, y_pred=predictions)))
print('Classification Report : \n {}'.format(classification_report(y_true=y_test, y_pred=predictions)))

Training Accuracy = 90.0 

Testing Accuracy = 93.33333333333333 

Confusion Matrix : 
 [[68  5]
 [ 3 44]] 

Classification Report : 
               precision    recall  f1-score   support

           0       0.96      0.93      0.94        73
           1       0.90      0.94      0.92        47

    accuracy                           0.93       120
   macro avg       0.93      0.93      0.93       120
weighted avg       0.93      0.93      0.93       120

