In [1]:
import pandas as pd

In [2]:
ds = pd.read_csv("CKD.csv")

In [3]:
#Check the balanced or imbalanced values
ds["classification"].value_counts()

classification
yes    249
no     150
Name: count, dtype: int64

In [4]:
ds = pd.get_dummies(ds,drop_first=True)

In [5]:
#ds.columns

In [6]:
#Input fields
independent = ds[['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv','wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes','appet_yes', 'pe_yes', 'ane_yes']]

#output fields
dependent =ds[['classification_yes']]

In [7]:
#Split the data for train test
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(independent,dependent,test_size=0.30,random_state=False)

In [8]:
#preprocessing tool used standardize features by removing the mean and scaling to unit variance. z-score normalization.
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train) # x - mean/std deviation
x_test = sc.transform(x_test)

In [9]:
from sklearn.model_selection import GridSearchCV
#Model creation 
from sklearn.linear_model import LogisticRegression
parm_grid = {
            'penalty' : ['l1', 'l2', 'elasticnet', 'None'],
            'solver' : ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
            'multi_class' : ['auto', 'ovr', 'multinomial']
            }
Classifier = GridSearchCV(LogisticRegression(), parm_grid, refit = True, verbose = 3, n_jobs=-1)

Classifier.fit(x_train,y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


250 fits failed out of a total of 360.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
15 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\1309048\AppData\Local\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
    ~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\1309048\AppData\Local\anaconda3\Lib\site-packages\sklearn\base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "C:\Users\1309048\AppData\Local\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  Fil

In [10]:
result = Classifier.cv_results_

In [11]:
y_pred = Classifier.predict(x_test)
table = pd.DataFrame.from_dict(result)
table

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_multi_class,param_penalty,param_solver,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002257,0.000901,0.000000,0.000000,auto,l1,lbfgs,"{'multi_class': 'auto', 'penalty': 'l1', 'solv...",,,,,,,,23
1,0.247562,0.224422,0.004383,0.000299,auto,l1,liblinear,"{'multi_class': 'auto', 'penalty': 'l1', 'solv...",1.0,0.982143,0.982143,0.946429,0.963636,0.97487,0.018289,19
2,0.001084,0.000036,0.000000,0.000000,auto,l1,newton-cg,"{'multi_class': 'auto', 'penalty': 'l1', 'solv...",,,,,,,,23
3,0.000959,0.000145,0.000000,0.000000,auto,l1,newton-cholesky,"{'multi_class': 'auto', 'penalty': 'l1', 'solv...",,,,,,,,23
4,0.001138,0.000423,0.000000,0.000000,auto,l1,sag,"{'multi_class': 'auto', 'penalty': 'l1', 'solv...",,,,,,,,23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,0.000756,0.000166,0.000000,0.000000,multinomial,,liblinear,"{'multi_class': 'multinomial', 'penalty': 'Non...",,,,,,,,23
68,0.000730,0.000075,0.000000,0.000000,multinomial,,newton-cg,"{'multi_class': 'multinomial', 'penalty': 'Non...",,,,,,,,23
69,0.000745,0.000070,0.000000,0.000000,multinomial,,newton-cholesky,"{'multi_class': 'multinomial', 'penalty': 'Non...",,,,,,,,23
70,0.000711,0.000138,0.000000,0.000000,multinomial,,sag,"{'multi_class': 'multinomial', 'penalty': 'Non...",,,,,,,,23


In [12]:
# Get the metrics
from sklearn.metrics import confusion_matrix
matric = confusion_matrix(y_test,y_pred)
print(matric)

[[45  0]
 [ 1 74]]


In [13]:
# get the classification report
from sklearn.metrics import classification_report
report = classification_report(y_test,y_pred)
print(report)

              precision    recall  f1-score   support

       False       0.98      1.00      0.99        45
        True       1.00      0.99      0.99        75

    accuracy                           0.99       120
   macro avg       0.99      0.99      0.99       120
weighted avg       0.99      0.99      0.99       120



In [14]:
# Key to built the model
# How well model supports for overall call model
from sklearn.metrics import f1_score
f1_macro = f1_score (y_test,y_pred,average='weighted')
print ("The best value for Parameter {}:".format(Classifier.best_params_),f1_macro)

The best value for Parameter {'multi_class': 'auto', 'penalty': 'l2', 'solver': 'lbfgs'}: 0.9916844900066377


In [15]:
# receiver operating characteristic and Area under the curve
# How well model supports for Class 0 and class 1
from sklearn.metrics import roc_auc_score
roc_auc = roc_auc_score (y_test,Classifier.predict_proba(x_test)[:,1]) # : tell all rows, take 1 ist colum of all rows for probability 
print ("The best value for Parameter {}:".format(Classifier.best_params_),roc_auc)

The best value for Parameter {'multi_class': 'auto', 'penalty': 'l2', 'solver': 'lbfgs'}: 1.0
