# Importing packages

In [None]:
import pandas as pd
import numpy as np
import missingno
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from imblearn.over_sampling import KMeansSMOTE
from sklearn.calibration import CalibratedClassifierCV
import warnings

# Importing packages for SMOTE
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import BorderlineSMOTE

np.random.seed(32)

warnings.filterwarnings("ignore")

# Importing Data

In [None]:
train=pd.read_csv("../input/tabular-playground-series-may-2021/train.csv")

In [None]:
# Dropping id 
train.drop("id",axis=1,inplace=True)

# Showing the data
train.head()

In [None]:
dic={"Class_1":0,"Class_2":1,"Class_3":2,"Class_4":3}
train['target'].replace(dic,inplace=True)

In [None]:
train.head()

# Checking For Multicollinearity

# Doing train test split

In [None]:
X=train.drop('target',axis=1).values
y=train['target'].values
# undersample = KMeansSMOTE()
# X, y = undersample.fit_resample(X, y)
from sklearn.preprocessing import MinMaxScaler
mm=MinMaxScaler()
X=mm.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.1, random_state=42 , stratify = y )
# transform the dataset

# BernoulliNB

In [None]:
# Bernolli model
nb=BernoulliNB(alpha=0.2)
nb.fit(X,y)
nb_clf = CalibratedClassifierCV(nb, method="sigmoid", cv="prefit")
nb_clf.fit(X_train,y_train)
nb_predict=nb_clf.predict_proba(X_test)
print(classification_report(y_test,nb_clf.predict(X_test)))

# Using MLP Classifier

In [None]:
mlp=MLPClassifier()
mlp.fit(X,y)
mlp_clf = CalibratedClassifierCV(mlp, method="sigmoid", cv="prefit")
mlp_clf.fit(X_train,y_train)
mlp_predict=mlp_clf.predict_proba(X_test)
print(classification_report(y_test,mlp_clf.predict(X_test)))

# Using LGBMCLassifier

In [None]:
from sklearn.metrics import accuracy_score
import optuna

In [None]:
BEST_PARAMS = {
    'n_estimators': 10000, # Waiting for early-stopping
    'learning_rate': 0.05, # Me
    'metric': 'auc' # Me
}
# Objective function
def objective(trial):
    # Search spaces
    hyperparams = {
        'reg_alpha': trial.suggest_float('reg_alpha', 0.001, 10.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.001, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 5, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'max_depth': trial.suggest_int('max_depth', 5, 64),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.1, 0.5),
        'cat_smooth' : trial.suggest_int('cat_smooth', 10, 100),
        'cat_l2': trial.suggest_int('cat_l2', 1, 20),
        'min_data_per_group': trial.suggest_int('min_data_per_group', 50, 200)
    }
    lgbc=LGBMClassifier(**hyperparams)
    lgbc.fit(X_train,y_train)
    return accuracy_score(y_test,lgbc.predict(X_test))


In [None]:
# Optimization
# study = optuna.create_study(direction='maximize')
# study.optimize(objective, timeout=600)

In [None]:
params={'reg_alpha': 4.434234469181169, 'reg_lambda': 2.2973211767274404, 'num_leaves': 192, 'min_child_samples': 13, 'max_depth': 38, 'colsample_bytree': 0.2902544137376231, 'cat_smooth': 87, 'cat_l2': 17, 'min_data_per_group': 150}
lgbc=LGBMClassifier(**params)
lgbc.fit(X,y)
lgbc_clf = CalibratedClassifierCV(lgbc, method="sigmoid", cv="prefit")
lgbc_clf.fit(X_train,y_train)
predict_lgbc=lgbc_clf.predict_proba(X_test)
print(classification_report(y_test,lgbc_clf.predict(X_test)))

# Using Catboost Classifier

In [None]:
# Catboost Classifier
cbc=CatBoostClassifier(iterations=400)
cbc.fit(X,y)
cbc_predict=cbc.predict_proba(X_test)
print(classification_report(y_test,cbc.predict(X_test)))

# KNN model

In [None]:
kn=KNeighborsClassifier(n_neighbors=40)
kn.fit(X,y)
kn_clf = CalibratedClassifierCV(kn, method="sigmoid", cv="prefit")
kn_clf.fit(X_train,y_train)
prediction=kn_clf.predict(X_test)
print(classification_report(prediction,y_test))

# Bagging Classifier

In [None]:
bc=BaggingClassifier()
bc.fit(X,y)
bc_clf = CalibratedClassifierCV(bc, method="sigmoid", cv="prefit")
bc_clf.fit(X_train,y_train)
prediction=bc_clf.predict(X_test)
print(classification_report(prediction,y_test))

# Making submission notebook

In [None]:
test=pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
test.head()

In [None]:
ids=test['id'].values
X_rtest=test.drop('id',axis=1).values
pred1=nb_clf.predict_proba(X_rtest)
pred1=np.clip(pred1,0.08,0.95)
pred2=mlp_clf.predict_proba(X_rtest)
pred2=np.clip(pred2,0.08,0.95)
pred3=lgbc_clf.predict_proba(X_rtest)
pred3=np.clip(pred3,0.08,0.95)
pred4=cbc.predict_proba(X_rtest)
pred4=np.clip(pred4,0.08,0.95)
pred5=kn_clf.predict_proba(X_rtest)
pred5=np.clip(pred5,0.08,0.95)
pred6=bc_clf.predict_proba(X_rtest)
pred6=np.clip(pred6,0.08,0.95)
total=(pred1*0.1+pred2*0.25+pred3*0.2+pred4*0.25+pred5*0.1+pred6*0.1)

In [None]:
sample=pd.read_csv("../input/tabular-playground-series-may-2021/sample_submission.csv")

In [None]:
sample['Class_1']=total[:,0]
sample['Class_2']=total[:,1]
sample['Class_3']=total[:,2]
sample['Class_4']=total[:,3]

In [None]:
sample.to_csv("Sample.csv",index=False)

In [None]:
sample.head()