In [None]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import lightgbm as lgb

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [None]:
train = pd.read_csv("../input/lish-moa/train_features.csv")

In [None]:
train["cp_dose_time"] = train["cp_dose"] + "_" + train["cp_time"].astype("str")
train["cp_dose_time"] = train["cp_dose_time"].map({"D1_24":0 , "D1_48":1, "D1_72":2, "D2_24":3 , "D2_48":4, "D2_72":5})

In [None]:
train.drop(columns=['sig_id', 'cp_type', 'cp_time', 'cp_dose'], inplace=True)

train_X = train.drop(columns="cp_dose_time").values
train_y = train["cp_dose_time"].values

In [None]:
train_X.shape, train_y.shape

In [None]:
skf = StratifiedKFold(n_splits=5)
for fold, (train_idx, valid_idx) in enumerate(skf.split(train_X, y=train_y)):
    
    X_train = train_X[train_idx]
    X_valid = train_X[valid_idx]
    
    y_train = train_y[train_idx]
    y_valid = train_y[valid_idx]
    
    #setting up the parameters
    params={}
    #params['learning_rate']=0.03
    params['boosting_type']='gbdt' #GradientBoostingDecisionTree
    params['objective']='multiclass' #Multi-class target feature
    params['metric']='multi_logloss' #metric for multi-class
    params['max_depth']=10
    params["n_estimators"] = 1000
    params['num_class']=6 #no.of unique values in the target class not inclusive of the end value
    
    clf = lgb.LGBMClassifier(**params)
    clf.fit(X=X_train, y=y_train, eval_set=[(X_valid, y_valid)], verbose=50, early_stopping_rounds=50)
    
    y_pred = clf.predict(X_valid)
    
    print("FOLD ACC : ", accuracy_score(y_valid, y_pred))
    
    #break
    

In [None]:
columns = train.drop(columns="cp_dose_time").columns.values.tolist()

imp_df = pd.DataFrame({
    "columns" : columns,
    "imp" : clf.feature_importances_
})

imp_df.sort_values("imp", inplace=True, ascending=False)

sns.barplot(x="imp", y="columns", data=imp_df[:25])
plt.show()

In [None]:
sns.lineplot(x=range(len(imp_df)), y=imp_df.imp.values)
plt.show()