In [1]:
import pandas as pd
import numpy as np
import lightgbm as lgb 
import matplotlib.pyplot as plt
from numpy import loadtxt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import recall_score, accuracy_score, precision_score 
from sklearn.metrics import f1_score, balanced_accuracy_score,roc_auc_score

In [2]:
def lightGBM(x_train_filename, x_test_filename, y_train_filename, y_test_filename):

    # Load dataset
    df_X_train = pd.read_csv(x_train_filename, header=None)
    df_Y_train = pd.read_csv(y_train_filename, header=None)

    df_X_test = pd.read_csv(x_test_filename, header=None)
    df_Y_test = pd.read_csv(y_test_filename, header=None)

    X_train = np.array(df_X_train.values)
    Y_train = np.array(df_Y_train.values)

    X_test = np.array(df_X_test.values)
    Y_test = np.array(df_Y_test.values)
    
    # Set parameters
    d_train = lgb.Dataset(X_train, label = Y_train.flatten())

    params = {}
    params['learning_rate'] = 0.3
    params['boosting_type'] = 'gbdt'
    params['objective'] = 'binary'
    params['metric'] = 'binary_logloss'
    params['sub_feature'] = 0.5
    params['num_leaves'] = 3
    params['min_data'] = 50
    params['max_depth'] = 10

    clf = lgb.train(params, d_train, 100)
    
    # Prediction
    y_prob=clf.predict(X_test)
    
    # Convert into binary values
    y_hat = list()
    for p in y_prob:
        if p >= 0.5:       
            y_hat.append(1)
        else:  
            y_hat.append(0)
            
    # accuracy: (tp + tn) / (p + n)
    #accuracy = accuracy_score(Y_test, y_hat)
    #accuracy_str = 'Accuracy: %f' % accuracy 
    #print(accuracy_str)
    # precision tp / (tp + fp)
    #precision = precision_score(Y_test, y_hat, labels=range(5), average='weighted')
    #precision_str = 'Precision: %f' % precision 
    #print(precision_str)
    # recall: tp / (tp + fn)
    #recall = recall_score(Y_test, y_hat, labels=range(5), average='weighted')
    #recall_str = 'Recall: %f' % recall
    #print(recall_str)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(Y_test, y_hat, labels=range(5), average='weighted')
    f1_str = 'F1 score: %f' % f1 
    print(f1_str)
    # Calculate roc auc
    roc = roc_auc_score(Y_test, y_prob)
    roc_str = 'ROC: %f' % roc
    print(roc_str)
    #Confusion matrix
    cm = confusion_matrix(Y_test, y_hat)
    print(cm)
