In [70]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
import lightgbm as lgb


df = pd.read_csv('breast-cancer.csv')
df.shape


(569, 32)

In [71]:
for i in df.columns:
    print(i, ',', df[i].dtype,',', df[i].nunique(dropna=False),',', df[i].isna().sum())

id , int64 , 569 , 0
diagnosis , object , 2 , 0
radius_mean , float64 , 456 , 0
texture_mean , float64 , 479 , 0
perimeter_mean , float64 , 522 , 0
area_mean , float64 , 539 , 0
smoothness_mean , float64 , 474 , 0
compactness_mean , float64 , 537 , 0
concavity_mean , float64 , 537 , 0
concave points_mean , float64 , 542 , 0
symmetry_mean , float64 , 432 , 0
fractal_dimension_mean , float64 , 499 , 0
radius_se , float64 , 540 , 0
texture_se , float64 , 519 , 0
perimeter_se , float64 , 533 , 0
area_se , float64 , 528 , 0
smoothness_se , float64 , 547 , 0
compactness_se , float64 , 541 , 0
concavity_se , float64 , 533 , 0
concave points_se , float64 , 507 , 0
symmetry_se , float64 , 498 , 0
fractal_dimension_se , float64 , 545 , 0
radius_worst , float64 , 457 , 0
texture_worst , float64 , 511 , 0
perimeter_worst , float64 , 514 , 0
area_worst , float64 , 544 , 0
smoothness_worst , float64 , 411 , 0
compactness_worst , float64 , 529 , 0
concavity_worst , float64 , 539 , 0
concave points_wo

In [72]:
df['diagnosis'].value_counts(dropna=False)

diagnosis
B    357
M    212
Name: count, dtype: int64

In [73]:
df_x = df.drop('diagnosis', axis=1)
df_y = df['diagnosis']

x_train, x_test, y_train, y_test = train_test_split(df_x, df_y, test_size= 0.3, random_state=37)

In [74]:
model = lgb.LGBMClassifier(random_state=37, verbose = -1)
param_grid = {'learning_rate': [0.1,0.2,0.3,0.4],
              'n_estimators': [25,50,75,100,150],
              'num_leaves': [15,25,30,35,40]
              }
model_cv = GridSearchCV(model, param_grid,cv=5)
model_cv.fit(x_train, y_train)
best_param = model_cv.best_params_

print(f'Best Parameter = {model_cv.best_params_}')
print("Best score is {}".format(model_cv.best_score_))

Best Parameter = {'learning_rate': 0.3, 'n_estimators': 50, 'num_leaves': 25}
Best score is 0.9673101265822786


In [75]:
model = lgb.LGBMClassifier(learning_rate=0.3, n_estimators=50, num_leaves=25, random_state=37, verbose = -1)
model.fit(x_train,y_train)
labels = model.predict(x_test)
model_accuracy = accuracy_score(labels, y_test)
print(f'LightGBM Model Accuray = {model_accuracy}')

cm = confusion_matrix(y_test, labels)
print(f'\nConfusion Matrix = \n{cm}')

LightGBM Model Accuray = 0.9707602339181286

Confusion Matrix = 
[[110   2]
 [  3  56]]


In [76]:
hyperparameters = model.get_params(deep=True)  # Hyperparameters are stored in a Python dictionary
hyperparameters

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.3,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 50,
 'n_jobs': None,
 'num_leaves': 25,
 'objective': None,
 'random_state': 37,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0,
 'verbose': -1}

In [77]:
model = lgb.LGBMClassifier(random_state=37, verbose = -1)
model.fit(x_train,y_train)
labels = model.predict(x_test)
model_accuracy = accuracy_score(labels, y_test)
print(f'LightGBM Model Accuray = {model_accuracy}')

cm = confusion_matrix(y_test, labels)
print(f'\nConfusion Matrix = \n{cm}')

LightGBM Model Accuray = 0.9590643274853801

Confusion Matrix = 
[[108   4]
 [  3  56]]


In [78]:
hyperparameters = model.get_params(deep=True)  # Hyperparameters are stored in a Python dictionary
hyperparameters

{'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'num_leaves': 31,
 'objective': None,
 'random_state': 37,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0,
 'verbose': -1}