In [1]:
import os
import pandas as pd
import neptune.new as neptune
from pathlib import Path
from sklearn import tree
from sklearn.metrics import f1_score
from getpass import getpass
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn_genetic.callbacks.base import BaseCallback

In [2]:
if "NEPTUNE_API_TOKEN" not in os.environ.keys() or os.environ["NEPTUNE_API_TOKEN"] == '':
    os.environ["NEPTUNE_API_TOKEN"] = getpass("Enter your Neptune API token: ")

In [3]:
training_df = pd.read_parquet(f"../data/loan_table_training.parquet") # load train dataset

In [4]:
target = "loan_status"

x = training_df[
    training_df.columns.drop(target)
    .drop("event_timestamp")
    .drop("created_timestamp")
    .drop("loan_id")
    .drop("zipcode")
    .drop("dob_ssn")
]
X = x.reindex(sorted(x.columns), axis=1)
y = training_df.loc[:, target]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=1234
)

In [6]:
# start experiment in neptune

run = neptune.init_run(
    source_files=['neptune_experiment.ipynb'],
    project="zsotnich/mltest"
)

https://app.neptune.ai/zsotnich/mltest/e/MLTES-30


Info (NVML): NVML Shared Library Not Found. GPU usage metrics may not be reported. For more information, see https://docs.neptune.ai/help/nvml_error/


Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [7]:
classifier = tree.DecisionTreeClassifier()

In [8]:
cv = StratifiedKFold(n_splits=3, shuffle=True)

In [11]:
param_grid = {
    "max_depth": Integer(5, 20),
    'max_leaf_nodes': Integer(5, 20)
}

In [13]:
# class to log accuracy in neptune while doing params tuning

class NeptuneCallback(BaseCallback):
    def __init__(self, neptune_run):
        self.epoc = 0
        self.neptune_run = neptune_run

    def on_step(self, record=None, logbook=None, estimator=None):
        self.neptune_run['accuracy'].append(record['fitness'])
        return False

In [14]:
# create params tuning

evolved_estimator = GASearchCV(
    estimator=classifier,
    cv=cv,
    scoring='accuracy',
    param_grid=param_grid,
    n_jobs=-1,
    verbose=True,
)

In [15]:
# execute searching of optimal params

n_callback = NeptuneCallback(run)
evolved_estimator.fit(X_train, y_train, callbacks=n_callback)

gen	nevals	fitness 	fitness_std	fitness_max	fitness_min
0  	50    	0.829076	0.00719423 	0.842681   	0.817023   
1  	100   	0.835314	0.00431951 	0.842273   	0.819816   
2  	100   	0.836943	0.00297068 	0.842273   	0.823132   
3  	100   	0.837483	0.00244798 	0.842273   	0.827961   
4  	100   	0.838828	0.00167844 	0.842273   	0.833837   
5  	100   	0.838496	0.00209772 	0.842274   	0.830929   
6  	100   	0.838358	0.00213081 	0.842273   	0.832615   
7  	100   	0.837803	0.00241783 	0.841518   	0.832615   
8  	100   	0.837632	0.0022122  	0.841518   	0.831743   
9  	100   	0.837847	0.0021935  	0.841518   	0.831744   
10 	100   	0.838228	0.00209793 	0.841518   	0.830522   
11 	100   	0.837883	0.00208143 	0.841518   	0.830987   
12 	100   	0.838407	0.00203439 	0.841925   	0.833954   
13 	100   	0.838543	0.00275672 	0.841925   	0.83151    
14 	100   	0.837971	0.00287556 	0.841925   	0.831044   
15 	100   	0.837932	0.00255751 	0.841925   	0.832383   
16 	100   	0.837943	0.00309595 	0.841925   	0.83

In [16]:
# log the best score in neptune

score = classifier.score(X_test, y_test)

run["test/score"] = evolved_estimator.best_score_

In [17]:
y_train_pred = evolved_estimator.predict_proba(X_train)
y_test_pred = evolved_estimator.predict_proba(X_test)

train_f1 = f1_score(y_train, y_train_pred.argmax(axis=1), average="macro")
test_f1 = f1_score(y_test, y_test_pred.argmax(axis=1), average="macro")

run["train/f1"] = train_f1
run["test/f1"] = test_f1

In [18]:
run["model/parameters"] = evolved_estimator.best_params_

In [19]:
run.stop()

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 19 operations to synchronize with Neptune. Do not kill this process.
All 19 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/zsotnich/mltest/e/MLTES-30
