In [60]:
import os
import pandas as pd
import neptune.new as neptune
from pathlib import Path
from sklearn import tree
from sklearn.metrics import f1_score
from getpass import getpass
from sklearn_genetic import GASearchCV
from sklearn_genetic.space import Integer
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn_genetic.callbacks.base import BaseCallback

In [61]:
if "NEPTUNE_API_TOKEN" not in os.environ.keys() or os.environ["NEPTUNE_API_TOKEN"] == '':
    os.environ["NEPTUNE_API_TOKEN"] = getpass("Enter your Neptune API token: ")

In [62]:
home_dir = str(Path.home())

In [63]:
training_df = pd.read_parquet(f"{home_dir}/loan_table_training.parquet") # load train dataset

In [64]:
target = "loan_status"

x = training_df[
    training_df.columns.drop(target)
    .drop("event_timestamp")
    .drop("created_timestamp")
    .drop("loan_id")
    .drop("zipcode")
    .drop("dob_ssn")
]
X = x.reindex(sorted(x.columns), axis=1)
y = training_df.loc[:, target]

In [65]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.4, random_state=1234
)

In [66]:
run = neptune.init_run(
    source_files=['neptune_experiment.ipynb'],
    project="zsotnich/mltest"
)

https://app.neptune.ai/zsotnich/mltest/e/MLTES-28
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [67]:
classifier = tree.DecisionTreeClassifier()

In [69]:
cv = StratifiedKFold(n_splits=3, shuffle=True)

In [70]:
param_grid = {
    "max_depth": Integer(5, 20),
    'max_leaf_nodes': Integer(5, 20)
}

In [71]:
class NeptuneCallback(BaseCallback):
    def __init__(self, neptune_run):
        self.epoc = 0
        self.neptune_run = neptune_run

    def on_step(self, record=None, logbook=None, estimator=None):
        self.neptune_run['accuracy'].append(record['fitness'])
        return False

In [72]:
evolved_estimator = GASearchCV(
    estimator=classifier,
    cv=cv,
    scoring='accuracy',
    param_grid=param_grid,
    n_jobs=-1,
    verbose=True,
)

In [74]:
n_callback = NeptuneCallback(run)
evolved_estimator.fit(X_train, y_train, callbacks=n_callback)

gen	nevals	fitness	fitness_std	fitness_max	fitness_min
0  	50    	0.86487	0.0166308  	0.886956   	0.823307   
1  	100   	0.878228	0.00766693 	0.888236   	0.852455   
2  	100   	0.882508	0.00395099 	0.888236   	0.872469   
3  	100   	0.884762	0.00212494 	0.888236   	0.878229   
4  	100   	0.885561	0.00167193 	0.888236   	0.880905   
5  	100   	0.885973	0.00108913 	0.888236   	0.882301   
6  	100   	0.885469	0.00187353 	0.887596   	0.877531   
7  	100   	0.885789	0.00170404 	0.887596   	0.877356   
8  	100   	0.885567	0.0027715  	0.888469   	0.873225   
9  	100   	0.885802	0.00233534 	0.888469   	0.874273   
10 	100   	0.88556 	0.00197853 	0.888469   	0.880614   
11 	100   	0.885455	0.00213582 	0.888469   	0.876833   
12 	100   	0.88581 	0.00232036 	0.888469   	0.873225   
13 	100   	0.886235	0.00149992 	0.888469   	0.882185   
14 	100   	0.88532 	0.00259582 	0.888469   	0.872935   
15 	100   	0.885683	0.00204072 	0.888469   	0.876483   
16 	100   	0.886587	0.00128799 	0.88876    	0.8824



36 	100   	0.884892	0.00441712 	0.888876   	0.858331   
37 	100   	0.885704	0.00328809 	0.888876   	0.867232   
38 	100   	0.88619 	0.00238943 	0.888876   	0.875727   
39 	100   	0.88597 	0.00289551 	0.888876   	0.872644   
40 	100   	0.886043	0.00281143 	0.888876   	0.871771   
41 	100   	0.885792	0.00237556 	0.888876   	0.8766     
42 	100   	0.885706	0.00557884 	0.888876   	0.848324   
43 	100   	0.886066	0.00196205 	0.888178   	0.876193   
44 	100   	0.885602	0.00265193 	0.887654   	0.876425   
45 	100   	0.885532	0.00298192 	0.887654   	0.870433   
46 	100   	0.885916	0.00254207 	0.888644   	0.875786   
47 	100   	0.885754	0.00217481 	0.888644   	0.878229   
48 	100   	0.885471	0.00262562 	0.888644   	0.871596   
49 	100   	0.885669	0.0020204  	0.888644   	0.877589   
50 	100   	0.885975	0.00202717 	0.888644   	0.878637   
51 	100   	0.886119	0.00294191 	0.888644   	0.869618   
52 	100   	0.886244	0.00280165 	0.888644   	0.873749   
53 	100   	0.886521	0.00179991 	0.888644   	0.88

In [75]:
score = classifier.score(X_test, y_test)

run["test/score"] = evolved_estimator.best_score_

In [76]:
y_train_pred = evolved_estimator.predict_proba(X_train)
y_test_pred = evolved_estimator.predict_proba(X_test)

train_f1 = f1_score(y_train, y_train_pred.argmax(axis=1), average="macro")
test_f1 = f1_score(y_test, y_test_pred.argmax(axis=1), average="macro")

run["train/f1"] = train_f1
run["test/f1"] = test_f1

In [77]:
run["model/parameters"] = evolved_estimator.best_params_

In [78]:
run.stop()

Shutting down background jobs, please wait a moment...
Done!
Waiting for the remaining 9 operations to synchronize with Neptune. Do not kill this process.
All 9 operations synced, thanks for waiting!
Explore the metadata in the Neptune app:
https://app.neptune.ai/zsotnich/mltest/e/MLTES-28
