In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
from tpot.export_utils import set_param_recursive

In [7]:
df = pd.read_csv('loan_data.csv')

In [8]:
tpot_data = df.copy(deep=True)

In [9]:
from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder()
tpot_data['purpose']=enc.fit_transform(tpot_data['purpose'].values)

In [10]:
features = tpot_data.drop('not.fully.paid', axis=1)

In [11]:
training_features, testing_features, training_target, testing_target = train_test_split(features, tpot_data['not.fully.paid'], random_state=42)

In [12]:
exported_pipeline = make_pipeline(
    MinMaxScaler(),
    RandomForestClassifier(bootstrap=True, criterion="gini", max_features=0.2, min_samples_leaf=8, min_samples_split=4, n_estimators=100)
)

In [13]:
set_param_recursive(exported_pipeline.steps, 'random_state', 42)

In [14]:
exported_pipeline.fit(training_features, training_target)

Pipeline(steps=[('minmaxscaler', MinMaxScaler()),
                ('randomforestclassifier',
                 RandomForestClassifier(max_features=0.2, min_samples_leaf=8,
                                        min_samples_split=4,
                                        random_state=42))])

In [15]:
results = exported_pipeline.predict(testing_features)


In [16]:
from sklearn.metrics import classification_report

In [17]:
print(classification_report(testing_target,results))

              precision    recall  f1-score   support

           0       0.84      1.00      0.91      2012
           1       1.00      0.00      0.01       383

    accuracy                           0.84      2395
   macro avg       0.92      0.50      0.46      2395
weighted avg       0.87      0.84      0.77      2395



In [18]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline, make_union
from tpot.builtins import StackingEstimator
from tpot.export_utils import set_param_recursive
from sklearn.preprocessing import FunctionTransformer
from copy import copy

In [30]:
from imblearn.over_sampling import SMOTE

In [31]:
sm = SMOTE()

In [32]:
X_os,y_os = sm.fit_resample(training_features,training_target)

In [23]:
exported_pipeline_sm = make_pipeline(
    make_union(
        FunctionTransformer(copy),
        FunctionTransformer(copy)
    ),
    GradientBoostingClassifier(learning_rate=0.1, max_depth=8, max_features=0.9000000000000001, min_samples_leaf=2, min_samples_split=18, n_estimators=100, subsample=0.4))

In [24]:
set_param_recursive(exported_pipeline_sm.steps, 'random_state', 42)

In [33]:
exported_pipeline_sm.fit(X_os, y_os)

Pipeline(steps=[('featureunion',
                 FeatureUnion(transformer_list=[('functiontransformer-1',
                                                 FunctionTransformer(func=<function copy at 0x000002AF4BC97B80>)),
                                                ('functiontransformer-2',
                                                 FunctionTransformer(func=<function copy at 0x000002AF4BC97B80>))])),
                ('gradientboostingclassifier',
                 GradientBoostingClassifier(max_depth=8,
                                            max_features=0.9000000000000001,
                                            min_samples_leaf=2,
                                            min_samples_split=18,
                                            random_state=42, subsample=0.4))])

In [34]:
results1 = exported_pipeline_sm.predict(testing_features)

In [35]:
print(classification_report(testing_target,results1))

              precision    recall  f1-score   support

           0       0.85      0.87      0.86      2012
           1       0.24      0.21      0.22       383

    accuracy                           0.77      2395
   macro avg       0.54      0.54      0.54      2395
weighted avg       0.75      0.77      0.76      2395



array([1, 0, 0, ..., 0, 0, 0], dtype=int64)