In [6]:
import numpy as np

In [2]:
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

In [3]:
import pickle
with open('data/credit.pkl', 'rb') as f:
    x_training, y_training, x_test, y_test = pickle.load(f)

In [4]:
x_training.shape, y_training.shape

((1500, 3), (1500,))

In [5]:
x_test.shape, y_test.shape

((500, 3), (500,))

In [7]:
# Using K-Cross Validation - Concatenate testing and training data bases
x_credit = np.concatenate((x_training, x_test), axis=0)
x_credit.shape

(2000, 3)

In [9]:
y_credit = np.concatenate((y_training, y_test), axis=0)
y_credit.shape

(2000,)

Decision Tree

In [11]:
parameters = {'criterion': ['gini', 'entropy'],
              'splitter': ['best', 'random'],
              'min_samples_split':[2, 5, 10],
              'min_samples_leaf': [1, 5, 10]}

In [15]:
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_
print(best_parameters)
print(best_results)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'splitter': 'best'}
0.983


Random Forest

In [16]:
parameters = {'criterion': ['gini', 'entropy'],
              'n_estimators': [10, 40, 100, 150],
              'min_samples_split':[2, 5, 10],
              'min_samples_leaf': [1, 5, 10]}

In [17]:
grid_search = GridSearchCV(estimator=RandomForestClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_
print(best_parameters)
print(best_results)

{'criterion': 'entropy', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 40}
0.986


kNN

In [20]:
parameters = {'n_neighbors': [3, 5, 10, 20],
              'p': [1, 2]}

In [21]:
grid_search = GridSearchCV(estimator=KNeighborsClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_
print(best_parameters)
print(best_results)

{'n_neighbors': 20, 'p': 1}
0.9800000000000001


Logistic Regression

In [24]:
parameters ={'tol': [0.0001, 0.00001, 0.000001],
             'C': [1.0, 1.5, 2.0],
             'solver': ['lbfgs', 'sag', 'saga']}

In [25]:
grid_search = GridSearchCV(estimator=LogisticRegression(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_
print(best_parameters)
print(best_results)

{'C': 1.0, 'solver': 'lbfgs', 'tol': 0.0001}
0.9484999999999999


SVM

In [26]:
parameters = {'tol': [0.001, 0.0001, 0.00001],
              'C': [1.0, 1.5, 2.0],
              'kernel': ['rbf', 'linear', 'poly', 'sigmoid']}

In [27]:
grid_search = GridSearchCV(estimator=SVC(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_
print(best_parameters)
print(best_results)

{'C': 1.5, 'kernel': 'rbf', 'tol': 0.001}
0.9829999999999999


Neural Networks

In [28]:
parameters = {'activation': ['relu', 'logistic', 'tanh'],
              'solver': ['adam', 'sgd'],
              'batch_size': [10, 56]}

In [None]:
grid_search = GridSearchCV(estimator=MLPClassifier(), param_grid=parameters)
grid_search.fit(x_credit, y_credit)
best_parameters = grid_search.best_params_
best_results = grid_search.best_score_


In [30]:
print(best_parameters)
print(best_results)

{'activation': 'relu', 'batch_size': 56, 'solver': 'adam'}
0.9970000000000001
