In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
train_df = pd.read_csv('../input/tabular-playground-series-sep-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-sep-2021/test.csv')
sample_df = pd.read_csv('../input/tabular-playground-series-sep-2021/sample_solution.csv')

In [None]:
train_df = train_df.sample(frac=1)

In [None]:
train_df.set_index('id', inplace=True)
test_df.set_index('id', inplace=True)

In [None]:
train_df.fillna(train_df.mean() , inplace = True)
test_df.fillna(test_df.mean() , inplace = True)

In [None]:
max_ = train_df.max(axis=0)
min_ = train_df.min(axis=0)
train_df = (train_df - min_) / (max_ - min_)

max_ = test_df.max(axis=0)
min_ = test_df.min(axis=0)
test_df = (test_df - min_) / (max_ - min_)

In [None]:
y_train = train_df.claim
X_train = train_df.drop(['claim'], axis=1)
X_test = test_df.copy()

In [None]:
lgbm1 = LGBMClassifier(learning_rate=0.05,
                      n_estimators=100,
                      reg_lambda = 1)
lgbm2 = LGBMClassifier(learning_rate=0.05,
                      n_estimators=1000,
                      reg_lambda = 1)
lgbm3 = LGBMClassifier(learning_rate=0.01,
                      n_estimators=1000,
                      reg_lambda = 1)
model_list = [lgbm1, lgbm2, lgbm3,
            KNeighborsClassifier(3),
            SVC(kernel="linear", C=0.025),
            SVC(gamma=2, C=1),
            GaussianProcessClassifier(1.0 * RBF(1.0)),
            DecisionTreeClassifier(max_depth=5),
            RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
            MLPClassifier(alpha=1, max_iter=1000),
            AdaBoostClassifier(),
            GaussianNB(),
            QuadraticDiscriminantAnalysis()]

In [None]:
model_list = [lgbm1, lgbm2]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=0)

X_valid, X_test, y_valid, y_test = train_test_split(X_valid, y_valid, test_size=0.2, random_state=1)

In [None]:
def model_calculator(my_model, X_train, y_train, X_valid, y_valid):
    my_model.fit(X_train, y_train)
    predictions = my_model.predict(X_valid)
    acc = (predictions == y_valid).sum() / len(X_valid)
    print(f'The accuracy of {my_model} is: {acc}')
    return acc

In [None]:
# accs = [model_calculator(my_model, X_train, y_train, X_valid, y_valid) for my_model in model_list]

In [None]:
# from sklearn.metrics import roc_auc_score
# from sklearn.model_selection import cross_val_score

# scores = -1 * cross_val_score(lgbm2, X_train, y_train,
#                               cv=5,
#                               scoring='roc_auc')

# print("roc_auc_score:\n", scores)

In [None]:
lgbm2.fit(X_train, y_train)
predictions = lgbm2.predict(X_valid)
acc = (predictions == y_valid).sum() / len(X_valid)
print(f'The accuracy of {lgbm2} is: {acc}')

In [None]:
from sklearn.metrics import roc_auc_score

roc_auc_score(y_valid, predictions)

In [None]:
predictions = lgbm2.predict(X_test)
acc = (predictions == y_test).sum() / len(X_test)

In [None]:
acc

In [None]:
X_test.shape

In [None]:
test_df.shape

In [None]:
X_test = test_df.copy()

In [None]:
predictions = lgbm2.predict(X_test)

In [None]:
X_test.reset_index(inplace=True)

In [None]:
id_col = X_test['id'].to_numpy()
pred = pd.DataFrame({'id':id_col , 'claim':predictions})

In [None]:
pred.to_csv('file.csv' , header = ['id' , 'claim'] , index = False)