In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 전처리를 위한 라이브러리
from sklearn.preprocessing import OrdinalEncoder , StandardScaler
from sklearn.model_selection import train_test_split

# 모델 제작을 위한 라이브러리
import tensorflow as tf
from tensorflow.keras import layers , regularizers, callbacks

In [8]:
# 데이터 입력
tr_dat = pd.read_csv('/content/drive/MyDrive/Kaggle/data/train.csv')
te_dat = pd.read_csv('/content/drive/MyDrive/Kaggle/data/test.csv')
testid = te_dat['id']

# 전처리1: Encoding
def cate_dat(df , encoder=OrdinalEncoder()):
    cols = df.select_dtypes(exclude=[np.number])
    for i in cols:
        df[i] = encoder.fit_transform(df[[i]])
    return df
n_traindat = cate_dat(tr_dat)
ntestdata = cate_dat(te_dat)

# 전처리2: 입력값 정규화
xtest = StandardScaler().fit_transform(ntestdata)
target_values = n_traindat['loan_status']
xtr_dat = StandardScaler().fit_transform(n_traindat.drop('loan_status' , axis=1))

# 전처리3: split
xtrain , xvalid , ytrain , yvalid = train_test_split(xtr_dat , target_values ,train_size=0.7 ,test_size=0.3 , random_state=99)

In [9]:
# 모델 생성: Regularization, Batchnormalization, EarlyStop
tf.random.set_seed(1234)

nn = tf.keras.Sequential([
    tf.keras.Input(shape=(12,)),                  ## Input layer with no, of features as units(neurons)
    layers.Dense(12  , activation='relu' , kernel_regularizer=regularizers.l1(0.01)),   ## 1 Hidden Layer
    layers.BatchNormalization(), ## Not a layer just to normalize the neural network
    layers.Dense(1 , activation='sigmoid')   ## Output layer
])
nn.summary()

nn.compile(optimizer=tf.keras.optimizers.Adam(), loss=tf.keras.losses.BinaryCrossentropy(from_logits=False) ,  metrics=['auc'])

earlystop1 = callbacks.EarlyStopping(monitor='val_loss', mode='auto')
earlystop2 = callbacks.EarlyStopping(monitor='val_auc' , mode='auto')

# 학습된 모델
train_nn = nn.fit(xtrain , ytrain , validation_data=(xvalid , yvalid) ,epochs=10 , callbacks=[earlystop1 , earlystop2])

Epoch 1/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - auc: 0.7587 - loss: 0.6977 - val_auc: 0.8993 - val_loss: 0.3064
Epoch 2/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - auc: 0.9038 - loss: 0.2716 - val_auc: 0.9055 - val_loss: 0.2642
Epoch 3/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - auc: 0.9095 - loss: 0.2430 - val_auc: 0.9078 - val_loss: 0.2525
Epoch 4/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - auc: 0.9108 - loss: 0.2351 - val_auc: 0.9090 - val_loss: 0.2486
Epoch 5/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - auc: 0.9115 - loss: 0.2326 - val_auc: 0.9090 - val_loss: 0.2467
Epoch 6/10
[1m1283/1283[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - auc: 0.9121 - loss: 0.2303 - val_auc: 0.9087 - val_loss: 0.2444


In [16]:
# Predictions of Neural Networks
# (n, 1)의 데이터 형식을 (1, n)으로 변경
def test_predictions(neural_net , testdf):
    neural_predicts = neural_net.predict(x=testdf)
    predi = (neural_predicts.reshape(neural_predicts.shape[1] , neural_predicts.shape[0])).flatten()
    print(predi.shape)
    return predi

predictnn1 = test_predictions(nn , xtest)

# 데이터 프레임으로 변환
def create_df(data=[] , col_names=[]):
    sub_df = pd.DataFrame(data).T
    sub_df.columns = col_names
    return sub_df


df_nn1 = create_df([testid , predictnn1] , ['id' , 'loan_status'])

[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
(39098,)
[1m1222/1222[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
(39098, 1)
[[0.9998184  0.03186182 0.8583336  ... 0.03987404 0.38305828 0.99923396]]
[0.9998184  0.03186182 0.8583336  ... 0.03987404 0.38305828 0.99923396]


In [11]:
df_nn1.to_csv('/content/drive/MyDrive/Kaggle/submission.csv' , index=False)