# Light GBM

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
data_file = "diabetes.csv"

In [3]:
df = pd.read_csv(data_file)
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
X = df.drop(["Outcome"], axis=1)
y = df["Outcome"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3232)

In [5]:
lgbm_model = LGBMClassifier().fit(X_train, y_train)

# Tahmin

In [6]:
y_pred = lgbm_model.predict(X_test)
accuracy_score(y_test, y_pred)

0.7467532467532467

# Model Tuning

In [7]:
lgbm_params = {
    'n_estimators': [100, 250, 500],
    'subsample': [0.6, 0.8, 1.0],
    'max_depth': [3, 4, 5],
    'learning_rate': [0.05, 0.01, 0.1],
    'min_child_samples': [5, 10, 15]}
lgbm_model = LGBMClassifier()
lgbm_cv_model = GridSearchCV(lgbm_model, lgbm_params, cv=10, n_jobs=-1).fit(X_train, y_train)

In [8]:
lgbm_cv_model.best_params_

{'learning_rate': 0.01,
 'max_depth': 3,
 'min_child_samples': 15,
 'n_estimators': 250,
 'subsample': 0.6}

In [9]:
lgbm_tuned = LGBMClassifier(learning_rate=0.01, max_depth=3, min_child_samples=15, n_estimators=250, subsample=0.6).fit(X_train, y_train)
y_pred = lgbm_tuned.predict(X_test)
accuracy_score(y_test, y_pred)

0.7597402597402597







