# Boost of Power

Add some power to your fraud detection algorithm by using a GradientTreeClassifier to leverage ensemble learning.

In [None]:
# Initial imports
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.ensemble import GradientBoostingClassifier

# Needed for decision tree visualization
import pydotplus
from IPython.display import Image

### Preprocessing

In [None]:
# Loading data
file_path = Path("../Resources/sba_loans_encoded.csv")
df_transactions = pd.read_csv(file_path)
df_transactions.head()

In [None]:
# Define features set
X = df_transactions.copy()
X.drop("Default", axis=1, inplace=True)
X.head()

In [None]:
# Define target vector
y = df_transactions["Default"].values.reshape(-1, 1)
y[:5]

In [None]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [None]:
# Create the StandardScaler instance
scaler = StandardScaler()

In [None]:
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

In [None]:
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

### Choose Optimal Learning Rate

In [None]:
# Choose learning rate
learning_rates = [0.05, 0.1, 0.25, 0.5, 0.75, 1]
for learning_rate in learning_rates:
    model = GradientBoostingClassifier(n_estimators=20,
                                      learning_rate=learning_rate,
                                      max_features=2,
                                      max_depth=3,
                                      random_state=0)
    model.fit(X_train_scaled,y_train.ravel())
    print("Learning rate: ", learning_rate)
    
    # Score the model
    print("Accuracy score (training): {0:.3f}".format(
        model.score(
            X_train_scaled, 
            y_train.ravel())))
    print("Accuracy score (validation): {0:.3f}".format(
        model.score(
            X_test_scaled, 
            y_test.ravel())))
    print()

### Build Model with Optimal Learning Rate

In [None]:
# Create GradientBoostingClassifier model


# Fit the model
    
# Score the model
print("Accuracy score (training): {0:.3f}".format(
    model.score(
        X_train_scaled, 
        y_train)))
print("Accuracy score (validation): {0:.3f}".format(
    model.score(
        X_test_scaled, 
        y_test)))

In [None]:
# Make predictions


# Generate accuracy score for predictions using y_test


### Model Evaluation

In [None]:
# Generatring the confusion matrix


In [None]:
# Generate classification report

