In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold

from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from tensorflow.keras.utils import to_categorical
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier


from matplotlib import pyplot as plt


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory


# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/train.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.dtypes[df.dtypes == 'category']

In [None]:
df = df.drop('row_id', axis = 1)
df['target'] = df['target'].astype('category')

In [None]:
df.head()

In [None]:
df.isna().sum().sum()

In [None]:
df['target'].value_counts().plot(kind = 'bar')

In [None]:
def get_data(df, scale = False):
    X = df.copy().drop('target', axis=1)
    y = df.copy().pop('target')
    
    
    if scale == True:
        min_max_scaler = preprocessing.MinMaxScaler().fit(X)
        X = min_max_scaler.transform(X)
    
    return X, y


def get_data_nn(df, scale = False):
    X = df.copy().drop('target', axis=1)
    y = df.copy().pop('target')

    encoder = LabelEncoder()
    y = encoder.fit_transform(y)

    dummy_y = np_utils.to_categorical(y)
    
    if scale == True:
        min_max_scaler = preprocessing.MinMaxScaler().fit(X)
        X = min_max_scaler.transform(X)
    else:
        X = X.values
        
    return X, y, dummy_y


In [None]:
X, y = get_data(df)

model = LogisticRegression(solver='liblinear')

result = cross_val_score(model, X, y)

print('Accuracy : {}'.format(result.mean()))

In [None]:

def create_nn(input_dim, output_dim):
    model_nn = Sequential()
    model_nn.add(Dense(256, input_dim = input_dim, activation='relu'))
    model_nn.add(Dropout(0.2))
    model_nn.add(Dense(512, activation = 'relu'))
    model_nn.add(Dropout(0.2))
    model_nn.add(Dense(output_dim, activation='softmax'))
    opt = keras.optimizers.Adam(learning_rate = 0.0001)
    model_nn.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model_nn

In [None]:
X, y, dummy_y= get_data_nn(df)

input_dim = len(X[0])
output_dim = 10

kfold = StratifiedKFold()
result1 = []

fold_no = 0

for idx_train, idx_test in kfold.split(X, y):
    model_nn = create_nn(input_dim, output_dim)
    history_model_nn = model_nn.fit(X[idx_train]
              ,dummy_y[idx_train]
              ,epochs=20
              ,batch_size=256
              ,verbose = 0)

    score = model_nn.evaluate(X[idx_test], dummy_y[idx_test], verbose = 0)
    result1.append(score[1]*100)

print('Accuracy : {}'.format(np.array(result1).mean()))

In [None]:
X, y = get_data(df, scale = True)

model2 = LogisticRegression(solver='liblinear')

result = cross_val_score(model2, X, y)

print('Accuracy : {}'.format(result.mean()))

In [None]:
X, y, dummy_y= get_data_nn(df, scale = True)

input_dim = len(X[0])
output_dim = 10

kfold = StratifiedKFold()
result2 = []

fold_no = 0

for idx_train, idx_test in kfold.split(X, y):
    model2_nn = create_nn(input_dim, output_dim)
    history_model2_nn = model2_nn.fit(X[idx_train]
              ,dummy_y[idx_train]
              ,epochs=20
              ,batch_size=256
              ,verbose = 0)

    score = model2_nn.evaluate(X[idx_test], dummy_y[idx_test], verbose = 0)
    result2.append(score[1]*100)

print('Accuracy : {}'.format(np.array(result2).mean()))