# GridSearchCv - HyperParameter Tuning

In [22]:
import pandas as pd
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.preprocessing import OneHotEncoder,LabelEncoder,StandardScaler
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
from sklearn.impute import SimpleImputer

In [None]:
df=pd.read_csv('loan_default_risk_dataset.csv')
df.head(5)

Unnamed: 0,Retirement_Age,Debt_Amount,Monthly_Savings,Loan_Default_Risk
0,60.0,2996.52,2378.49,0
1,66.4,4137.23,1538.92,1
2,58.5,19865.75,2434.8,1
3,49.8,16855.7,2677.82,1
4,67.3,7902.37,2206.72,0


In [24]:
X=df.drop('Loan_Default_Risk',axis=1)

y=df['Loan_Default_Risk']

In [25]:
xtrain,xtest,ytrain,ytest=train_test_split(X,y,train_size=0.8,random_state=42)

In [26]:
model = DecisionTreeClassifier(random_state=42)

In [None]:
model.fit(xtrain,ytrain)
model.score(xtest,ytest)

In [None]:
model.get_depth()
ypred=model.predict(xtest)
accuracy_score(ytest,ypred)

# Column Transformer

In [29]:
num_cols=X.select_dtypes(include=['int64','Float64']).columns

In [30]:
preprocessing=ColumnTransformer(
    transformers = [
        ('simpleimputer',SimpleImputer(strategy='mean'),num_cols)
        # ('standardscalar',StandardScaler(),num_cols)

    ],remainder="passthrough"
)

# Pipeline

In [31]:
pipeline=Pipeline(
    steps=[
        ('preprocessing',preprocessing),
        ('decisiontree',DecisionTreeClassifier(random_state=42,criterion='gini'))
    ]
)

In [None]:
params = {
    'decisiontree__criterion':['entropy'],
    'decisiontree__min_samples_split' : [2,3,5,10],
    'decisiontree__max_depth' : [5,10,50,100,200],
    'decisiontree__min_samples_leaf' : [2,3,5,7,10]
}

gridsearch =GridSearchCV(pipeline,params,cv=5,n_jobs=-1)

In [33]:
gridsearch.fit(xtrain,ytrain)

0,1,2
,estimator,Pipeline(step...m_state=42))])
,param_grid,"{'decisiontree__criterion': ['entropy'], 'decisiontree__max_depth': [5, 10, ...], 'decisiontree__min_samples_leaf': [2, 3, ...], 'decisiontree__min_samples_split': [2, 3, ...]}"
,scoring,
,n_jobs,-1
,refit,True
,cv,5
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,transformers,"[('simpleimputer', ...)]"
,remainder,'passthrough'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,missing_values,
,strategy,'mean'
,fill_value,
,copy,True
,add_indicator,False
,keep_empty_features,False

0,1,2
,criterion,'entropy'
,splitter,'best'
,max_depth,10
,min_samples_split,2
,min_samples_leaf,3
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,42
,max_leaf_nodes,
,min_impurity_decrease,0.0


In [34]:
gridsearch.score(xtest,ytest)

0.9666666666666667

In [35]:
ypred=gridsearch.predict(xtest)
gridsearch.score(xtest,ytest)

0.9666666666666667

In [36]:
gridsearch.best_params_

{'decisiontree__criterion': 'entropy',
 'decisiontree__max_depth': 10,
 'decisiontree__min_samples_leaf': 3,
 'decisiontree__min_samples_split': 2}

In [None]:
gridsearch.best_score_

np.float64(0.8541666666666666)