In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.svm import LinearSVC 
from sklearn.model_selection import train_test_split

In [None]:
df=pd.read_csv("/kaggle/input/tabular-playground-series-nov-2021/train.csv")
test=pd.read_csv("/kaggle/input/tabular-playground-series-nov-2021/test.csv")

In [None]:
y_train=df['target'].astype('str')
X_train=df.drop('target',axis=1)
X_train.describe()

In [None]:
X_train.isna().sum()

In [None]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(
    X_train, 
    y_train.values, 
    test_size=0.2, 
    shuffle=True)

# **LogisticRegression**

In [None]:
params={'log__C':[0.01,0.1,1,10,100,1000],
       'log__solver':['lbfgs', 'liblinear']}
log_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('log', LogisticRegression(dual=False,max_iter=2000)),
])
gr_srch=GridSearchCV(log_clf,params,cv=3,scoring='roc_auc')
gr_srch.fit(X_train,y_train)

In [None]:
gr_srch.best_params_

In [None]:
log_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('log', LogisticRegression(dual=False,max_iter=2000,C=10,solver='liblinear')),
])

In [None]:
log_clf.fit(X_train_1,y_train_1)
roc_auc_score(y_test_1,log_clf.predict(X_test_1))

In [None]:
submit = pd.read_csv('/kaggle/input/tabular-playground-series-nov-2021/sample_submission.csv')
log_clf.fit(X_train,y_train)
submit['target']=log_clf.predict(test)
submit.to_csv('subm_log_clf_tunned.csv',index=False)

# **Linear SVC**

In [None]:
svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', LinearSVC(dual=False,tol=1e-4,max_iter=2000,C=100)),
])

In [None]:
svm_clf.fit(X_train_1,y_train_1)
roc_auc_score(y_test_1,svm_clf.predict(X_test_1))

In [None]:
svm_clf.fit(X_train,y_train)
submit['target']=svm_clf.predict(test)
submit.to_csv('subm_linear_svc_c100.csv',index=False)

# **Catboost with PCA**

In [None]:
clf_cat_pca=Pipeline([('scaler', StandardScaler()), 
          ('pca', PCA(80)),
          ('catboost', CatBoostClassifier(learning_rate=0.1,l2_leaf_reg=1,iterations=2000,depth=3,border_count=100,verbose=False))])

In [None]:
clf_cat_pca.fit(X_train_1,y_train_1)
roc_auc_score(y_test_1,clf_cat_pca.predict(X_test_1))

In [None]:
clf_cat_pca.fit(X_train,y_train)
submit['target']=clf_cat_pca.predict(test)
submit.to_csv('subm_cat_pca.csv',index=False)

# **Catboost without PCA**

In [None]:
cat=CatBoostClassifier(depth=3,l2_leaf_reg=1,n_estimators= 2000,border_count=256,bagging_temperature=0.75,
                      random_strength=0.8, max_ctr_complexity=3,verbose=False)

In [None]:
cat.fit(X_train_1,y_train_1)
roc_auc_score(y_test_1,cat.predict(X_test_1))

In [None]:
cat.fit(X_train,y_train)
submit['target']=cat.predict(test)
submit.to_csv('subm_cat_tun.csv',index=False)

# **NN**

In [None]:
ss=StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(test)
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(
    X_train, 
    y_train.values, 
    test_size=0.2, 
    shuffle=True)

In [None]:
X_train_1 = torch.FloatTensor(X_train_1)
X_test_1 = torch.FloatTensor(X_test_1)
y_train_1 = torch.LongTensor(y_train_1.astype(int))
y_test_1 = torch.LongTensor(y_test_1.astype(int))

In [None]:
class MyNet(torch.nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.fc1 = torch.nn.Linear(X_train_1.shape[1], 256)
        self.activ1 = torch.nn.Tanh()
        self.drop1 = torch.nn.Dropout(p=0.7)
        self.fc2 = torch.nn.Linear(256, 64)
        self.activ2 = torch.nn.Tanh()
        self.drop2 = torch.nn.Dropout(p=0.5)
        self.fc3 = torch.nn.Linear(64, 8)
        self.activ3 = torch.nn.ReLU()
        self.drop3 = torch.nn.Dropout(p=0.3)
#         self.fc4 = torch.nn.Linear(32, 8)
#         self.activ4 = torch.nn.ReLU()
#         self.drop4 = torch.nn.Dropout(p=0.4)
        self.fc5 = torch.nn.Linear(8, 2)
        self.sm = torch.nn.Softmax()
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.activ1(x)
        x = self.drop1(x)
        x = self.fc2(x)
        x = self.activ2(x)
        x = self.drop2(x)
        x = self.fc3(x)
        x = self.activ3(x)
#         x = self.drop3(x)
#         x = self.fc4(x)
#         x = self.activ4(x)
#         x = self.drop4(x)
        x = self.fc5(x)
        return x

    def inference(self, x):
        x = self.forward(x)
        x = self.sm(x)
        return x

my_net = MyNet()
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.Adam(my_net.parameters(), 
                             lr=1.0e-4)

In [None]:
batch_size = 2048

for epoch in range(100):
    order = np.random.permutation(len(X_train_1))
    for start_index in range(0, len(X_train_1), batch_size):
        optimizer.zero_grad()
        
        batch_indexes = order[start_index:start_index+batch_size]
        
        x_batch = X_train_1[batch_indexes]
        y_batch = y_train_1[batch_indexes]
        
        preds = my_net.forward(x_batch)
        loss_value = loss(preds,y_batch)
        loss_value.backward()
        
        optimizer.step()
#         print(preds)
    test_preds = my_net.forward(X_test_1)
    test_preds = test_preds.argmax(dim=1)
#     print((test_preds == y_test_1).float().mean())
    print(roc_auc_score(y_test_1,test_preds))

In [None]:
batch_size = 2048
X_train = torch.FloatTensor(X_train)
X_test= torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train.astype(int))
cnt=1
for epoch in range(1000):
    order = np.random.permutation(len(X_train))
    for start_index in range(0, len(X_train), batch_size):
        optimizer.zero_grad()
        
        batch_indexes = order[start_index:start_index+batch_size]
        
        x_batch = X_train[batch_indexes]
        y_batch = y_train[batch_indexes]
        
        preds = my_net.forward(x_batch)
        loss_value = loss(preds,y_batch)
        loss_value.backward()
        
        optimizer.step()
#         print(preds)
    train_preds = my_net.forward(X_train)
    train_preds = train_preds.argmax(dim=1)
    print(cnt)
    cnt+=1
#     print((test_preds == y_test_1).float().mean())
    print(roc_auc_score(y_train,train_preds))

In [None]:

submit['target']=my_net.forward(X_test).argmax(dim=1)
submit

In [None]:
submit.to_csv('nn_3_layers.csv',index=False)