# Machine learning for Drug – Nanoparticle (DADNP) Systems in HCC

List of hyperparameters used by ML classifiers


In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# remove warnings
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

In [2]:
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score, GridSearchCV, StratifiedKFold
from sklearn.metrics import confusion_matrix,accuracy_score, roc_auc_score,f1_score, recall_score, precision_score
from sklearn.utils import class_weight

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, LassoCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process.kernels import RBF
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.feature_selection import RFECV, VarianceThreshold, SelectKBest, chi2
from sklearn.feature_selection import SelectFromModel, SelectPercentile, f_classif

In [3]:
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, BaggingClassifier, AdaBoostClassifier
from xgboost import XGBClassifier

### Define script parameters

In [4]:
# define output variables
outVars = ['f(vnj)obs06']

# define list of folds
foldTypes = [3]

# define a label for output files
targetName = 'sel'

seed = 42
np.random.seed(seed)

In [8]:
class_weights = {0: 0.815068493150685, 1: 1.2934782608695652}
priors = [(class_weights[0]/(class_weights[0]+class_weights[1])), (class_weights[1]/(class_weights[0]+class_weights[1]))]

In [12]:
priors

[0.3865546218487395, 0.6134453781512604]

In [11]:
names = ['KNN', 'GNB', 'LDA', 'LogR', 'MLP', 'DT', 'RF', 'XGB', 'GB', 'BAG', 'ADA']

In [9]:
classifiers = [KNeighborsClassifier(n_jobs=-1),
               GaussianNB(),
               LinearDiscriminantAnalysis(solver='svd',priors=priors), # No tiene random_state
               LogisticRegression(solver='lbfgs',random_state=seed,class_weight=class_weights,max_iter=20000),
               MLPClassifier(hidden_layer_sizes= (5), random_state = seed, max_iter=50000, shuffle=False),
               DecisionTreeClassifier(random_state = seed,class_weight=class_weights),
               RandomForestClassifier(n_jobs=-1,random_state=seed,class_weight=class_weights),
               XGBClassifier(n_jobs=-1,seed=seed,scale_pos_weight= class_weights[0]/class_weights[1]),
               GradientBoostingClassifier(random_state=seed),
               BaggingClassifier(random_state=seed),
               AdaBoostClassifier(random_state = seed)]

In [10]:
for clf in classifiers:
    print(f"{clf.__class__.__name__} parameters:")
    print(clf.get_params())
    print("\n")

KNeighborsClassifier parameters:
{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': -1, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


GaussianNB parameters:
{'priors': None, 'var_smoothing': 1e-09}


LinearDiscriminantAnalysis parameters:
{'n_components': None, 'priors': [0.3865546218487395, 0.6134453781512604], 'shrinkage': None, 'solver': 'svd', 'store_covariance': False, 'tol': 0.0001}


LogisticRegression parameters:
{'C': 1.0, 'class_weight': {0: 0.815068493150685, 1: 1.2934782608695652}, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 20000, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': 42, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}


MLPClassifier parameters:
{'activation': 'relu', 'alpha': 0.0001, 'batch_size': 'auto', 'beta_1': 0.9, 'beta_2': 0.999, 'early_stopping': False, 'epsilon': 1e-08, 'hidden_layer_sizes': 5, 'learning_rate': 'c