In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from sklearn.preprocessing import PowerTransformer,MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,StackingClassifier,ExtraTreesClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import roc_curve,auc
import statsmodels.api as sm
import warnings
warnings.filterwarnings('ignore')

In [None]:
pd.options.display.max_columns=None
pd.options.display.max_rows=None

In [None]:
data=pd.read_csv('../input/tabular-playground-series-nov-2021/train.csv')
df=data.copy()
df.head()

In [None]:
df.shape

In [None]:
null=df.isna().sum().to_frame('null_count')
null[null['null_count']>0]

In [None]:
X=df.drop(['id','target'],axis=1)
y=df['target']

In [None]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
pca=PCA()
x_train_pca=pca.fit_transform(x_train)
pca.explained_variance_ratio_

In [None]:
total_variance=np.cumsum(pca.explained_variance_ratio_)
total_variance

In [None]:
pca=PCA(n_components=2)
x_train_pca=pca.fit_transform(x_train)
x_tesr_pca=pca.transform(x_test)

In [None]:
models = []  # Empty list to store all the models

# Appending pipelines for each model into the list
models.append(
    (
        "LR",
        Pipeline(
            steps=[
                ("log_reg", LogisticRegression(n_jobs=-1)),
            ]
        ),
    )
)
models.append(
    (
        "RF",
        Pipeline(
            steps=[
                ("random_forest", RandomForestClassifier(random_state=42,n_jobs=-1)),
            ]
        ),
    )
)
models.append(
    (
        "GBM",
        Pipeline(
            steps=[
                ("gradient_boosting", GradientBoostingClassifier(random_state=42)),
            ]
        ),
    )
)
models.append(
    (
        "EXT",
        Pipeline(
            steps=[
                ("EXT", ExtraTreesClassifier(random_state=42)),
            ]
        ),
    )
)

models.append(
    (
        "DTREE",
        Pipeline(
            steps=[
                ("decision_tree", DecisionTreeClassifier(random_state=42)),
            ]
        ),
    )
)
models.append(
    (
        "XGB",
        Pipeline(
            steps=[
                ("xgboost", XGBClassifier(random_state=42,eval_metric='logloss',n_jobs=-1)),
            ]
        ),
    )
)

models.append(
    (
        "LGBM",
        Pipeline(
            steps=[
                ("lgbm", LGBMClassifier(random_state=42)),
            ]
        ),
    )
)
models.append(
    (
        "CAT",
        Pipeline(
            steps=[
                ("cat", CatBoostClassifier(verbose=0)),
            ]
        ),
    )
)
names = []  # Empty list to store name of the models
# loop through all models to get the AUC
results=[]
for name, model in models:
    names.append(names)
    model.fit(x_train_pca,y_train)
    probas=model.predict_proba(x_tesr_pca)
    fpr, tpr, thresholds = roc_curve(y_test, probas[:, 1])
    roc_auc = auc(fpr, tpr)
    print("AUC of {}: {}".format(name, roc_auc))

In [None]:
test=pd.read_csv('../input/tabular-playground-series-nov-2021/test.csv')
test.head()

In [None]:
test_final=test.drop('id',axis=1)
test_pca=pca.transform(test_final)

In [None]:
model=GradientBoostingClassifier()
model.fit(x_train_pca,y_train)
probas=model.predict_proba(x_tesr_pca)
fpr, tpr, thresholds = roc_curve(y_test, probas[:, 1])
roc_auc = auc(fpr, tpr)
print(roc_auc)

In [None]:
predictions=model.predict(test_pca)
submission = pd.DataFrame()
submission['id']=test['id']
submission['target'] = predictions.tolist()
submission.to_csv("submission.csv",index=False)