# Gradient Boosting model train

- https://www.kaggle.com/beagle01/prediction-with-gradient-boosting-classifier

## Importing the libraries

In [2]:
import pandas as pd

## Importing the dataset

In [4]:
train = pd.read_csv('./data/processed/preprocessed_application_train.csv')
X = train.iloc[:, :-1].values
y = train.iloc[:, -1].values

In [5]:
train.shape

(65789, 78)

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

## Training the Gradient Boosting model on the training set

In [7]:
from sklearn.ensemble import GradientBoostingClassifier

clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
clf = clf.fit(X_train, y_train)

## Adding MLFLow workflow

In [8]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

def eval_metrics(actual, pred):
    accuracy = accuracy_score(actual, pred)
    conf_matrix = confusion_matrix(actual, pred)
    return accuracy, conf_matrix

In [None]:
import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

with mlflow.start_run():
  clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
  clf = clf.fit(X_train, y_train)
  predicted_repayments = clf.predict(X_test)

  (accuracy, conf_matrix) = eval_metrics(y_test, predicted_repayments)

  clf_params = clf.get_params()

  for param in clf_params:
    print(param)

  mlflow.log_metric('accuracy', accuracy)
  mlflow.log_metric('conf_matrix', conf_matrix)

  mlflow_tracking_uri = mlflow.get_tracking_uri()
  print(mlflow_tracking_uri)

  tracking_url_type_store = urlparse(mlflow_tracking_uri).scheme

  # Model registry does not work with file store
  if tracking_url_type_store != 'file':

      # Register the model
      # There are other ways to use the Model Registry, which depends on the use case,
      # please refer to the doc for more information:
      # https://mlflow.org/docs/latest/model-registry.html#api-workflow
      mlflow.sklearn.log_model(clf, 'model', registered_model_name='GradientBoostingClassifier')
  else:
      mlflow.sklearn.log_model(clf, 'model')