In [None]:
import numpy as np
import pandas as pd
import random

from sklearn.metrics import classification_report, log_loss, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit

import lightgbm as lgbm

# Data Process

In [None]:
data = pd.read_csv("../input/heart-disease-uci/heart.csv")
data[0:2]

In [None]:
n=len(data)
print(n)

In [None]:
N=[]
for i in range(n):
    N+=[i]
random.shuffle(N)

In [None]:
datay=data['target']
data2=data.drop('target',axis=1)

In [None]:
data2=np.array(data2)
datay=np.array(datay)

In [None]:
train=data2[N[0:(n//10)*8]]
test=data2[N[(n//10)*8:]]
trainy=datay[N[0:(n//10)*8]]
testy=datay[N[(n//10)*8:]]

In [None]:
X = train
y = trainy

# Modeling

In [None]:
from contextlib import contextmanager
from time import time

class Timer:
    def __init__(self, logger=None, format_str='{:.3f}[s]', prefix=None, suffix=None, sep=' '):

        if prefix: format_str = str(prefix) + sep + format_str
        if suffix: format_str = format_str + sep + str(suffix)
        self.format_str = format_str
        self.logger = logger
        self.start = None
        self.end = None

    @property
    def duration(self):
        if self.end is None:
            return 0
        return self.end - self.start

    def __enter__(self):
        self.start = time()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.end = time()
        out_str = self.format_str.format(self.duration)
        if self.logger:
            self.logger.info(out_str)
        else:
            print(out_str)

In [None]:
import lightgbm as lgbm
from sklearn.metrics import mean_squared_error

def fit_lgbm(X, y, cv, 
             params: dict=None, 
             verbose: int=50):

    if params is None:
        params = {}

    models = []
    oof_pred = np.zeros_like(y, dtype=np.float)

    for i, (idx_train, idx_valid) in enumerate(cv): 
        x_train, y_train = X[idx_train], y[idx_train]
        x_valid, y_valid = X[idx_valid], y[idx_valid]

        clf = lgbm.LGBMRegressor(**params)
        
        with Timer(prefix='fit fold={} '.format(i)):
            clf.fit(x_train, y_train, 
                    eval_set=[(x_valid, y_valid)],  
                    early_stopping_rounds=100,
                    verbose=verbose)

        pred_i = clf.predict(x_valid)
        oof_pred[idx_valid] = pred_i
        models.append(clf)
        print(f'Fold {i} RMSLE: {mean_squared_error(y_valid, pred_i) ** .5:.4f}')
        print()

    score = mean_squared_error(y, oof_pred) ** .5
    print('-' * 50)
    print('FINISHED | Whole RMSLE: {:.4f}'.format(score))
    return oof_pred, models

In [None]:
params = {
    'objective': 'rmse', 
    'learning_rate': .1,
    'reg_lambda': 1.,
    'reg_alpha': .1,
    'max_depth': 5, 
    'n_estimators': 10000, 
    'colsample_bytree': .5, 
    'min_child_samples': 10,
    'subsample_freq': 3,
    'subsample': .9,
    'importance_type': 'gain', 
    'random_state': 71,
}

In [None]:
from sklearn.model_selection import KFold

fold = KFold(n_splits=10, shuffle=True, random_state=71)
cv = list(fold.split(X, y))
oof, models = fit_lgbm(X, y, cv, params=params, verbose=10000)

# Predict

In [None]:
def revert_to_real(preda):
    predb = pd.Series(preda).apply(lambda x:round(x))
    return predb

In [None]:
pred0 = np.array([model.predict(test) for model in models])
pred1 = np.mean(pred0, axis=0)
pred2 = revert_to_real(pred1)
PRED=np.array(pred2)
PRED[0:20]

In [None]:
ANS=testy
ANS[0:20]

In [None]:
accuracy=accuracy_score(ANS,PRED)
print(accuracy)