In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.graph_objects as go
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import KFold, train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

In [None]:
train = pd.read_csv(r'../input/tabular-playground-series-feb-2022/train.csv')
train.head()

In [None]:
test = pd.read_csv(r'../input/tabular-playground-series-feb-2022/test.csv')
test.head()

In [None]:
sub = pd.read_csv(r'../input/tabular-playground-series-feb-2022/sample_submission.csv')
sub.head()

In [None]:
train.drop('row_id',axis=1,inplace=True)
test.drop('row_id',axis=1,inplace=True)

In [None]:
print(f'train set have {train.shape[0]} rows and {train.shape[1]} columns.')
print(f'test set have {test.shape[0]} rows and {test.shape[1]} columns.') 
print(f'sample_submission set have {sub.shape[0]} rows and {sub.shape[1]} columns.') 

In [None]:
fig = go.Figure(data=[go.Pie(labels=train['target'], hole=.3)])
fig.add_annotation(text='Target',
                   x=0.5,y=0.5,showarrow=False,font_size=14,opacity=0.7,font_family='monospace')
fig.update_traces(hoverinfo='label+percent+value',
                  marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(
    font_family='monospace',
    title=dict(text='Target Unique Values',x=0.47,y=0.98,
               font=dict(color='black',size=20)),
    legend=dict(orientation='v',traceorder='reversed'),
    hoverlabel=dict(bgcolor='white'))
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
train['target'] = le.fit_transform(train['target'])

In [None]:
y = train['target']
train.drop('target',axis=1,inplace=True)

In [None]:
features = []
for feature in train.columns:
    features.append(feature)
print(features)

In [None]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
train[features] = scale.fit_transform(train[features])
test[features] = scale.transform(test[features])

In [None]:
def fit_lgb(trial, x_train, y_train, x_test, y_test):
    params = {
        'reg_alpha' : trial.suggest_loguniform('reg_alpha' , 1e-4, 1e4),
        'reg_lambda' : trial.suggest_loguniform('reg_lambda' ,1e-4, 1e4),
        'learning_rate' : trial.suggest_uniform('learning_rate' , 0.03 , 0.07),
        'max_depth' : trial.suggest_int('max_depth', 1 , 20),
        'n_estimators' : trial.suggest_int('n_estimators', 100 , 20000),
        'min_child_weight' : trial.suggest_loguniform('min_child_weight', 1e-4, 1e4),
        'subsample' : trial.suggest_uniform('subsample' , 0.01 , 2.0),
        'min_child_samples' : trial.suggest_int('min_child_samples', 76, 80),
        'device_type' : 'gpu','n_jobs':4
    } 
    
    
    model = LGBMClassifier(**params)
    model.fit(x_train, y_train,eval_set=[(x_test,y_test)], early_stopping_rounds=150, verbose=False)
    
    y_train_pred = model.predict(x_train)
    y_train_pred = [round(value) for value in y_train_pred]
    
    y_test_pred = model.predict(x_test)
    y_test_pred = [round(value) for value in y_test_pred]
    
    
    log = {
        "train accuracy": accuracy_score(y_train, y_train_pred),
        "valid accuracy": accuracy_score(y_test, y_test_pred)
    }
    
    return model, log

In [None]:
def objective(trial):
    acc = 0
    x_train, x_test, y_train, y_test = train_test_split(train, y, test_size=0.30)
    model, log = fit_lgb(trial, x_train, y_train, x_test, y_test)
    acc += log['valid accuracy']
        
    return acc

In [None]:
params = {'reg_alpha': 0.0038667681867618933, 
 'reg_lambda': 0.636197588712714, 
 'learning_rate': 0.047206888009302006, 
 'max_depth': 11, 
 'n_estimators': 6786, 
 'min_child_samples': 79,'device_type' : 'gpu'}

In [None]:
folds = KFold(n_splits=5, shuffle=True)

lgb_predictions = np.zeros(len(test))

for fold, (trn_idx, val_idx) in enumerate(folds.split(train)):
    print(f"Fold: {fold}")
    X_train, X_test = train.iloc[trn_idx], train.iloc[val_idx]
    y_train, y_test = y.iloc[trn_idx], y.iloc[val_idx]

    model = LGBMClassifier(**params)
   
    model.fit(X_train, y_train,
              eval_set=[(X_test, y_test)],
                early_stopping_rounds=400,
                verbose=False)
    y_pred = model.predict(X_test)
    pred = [round(value) for value in y_pred]
    acc = accuracy_score(y_test, y_pred)
    
    print(f" accuracy: {acc*100}%")
    print("-"*50)

In [None]:
lgb_predictions = model.predict(test)

In [None]:
sub['target'] = le.inverse_transform(lgb_predictions)
sub.to_csv(f'lgb.csv',index = False)

### WORK IN PROGRESS....