In [1]:
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

import numpy as np
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import pandas as pd

# 데이터 전처리

In [2]:
np.random.seed(3)
tf.random.set_seed(3)

df = pd.read_csv('creditcard.csv')
amount = df['Amount']

print(f'{amount} \n\n {type(amount)} \n\n {amount.shape}')


0         149.62
1           2.69
2         378.66
3         123.50
4          69.99
           ...  
284802      0.77
284803     24.79
284804     67.88
284805     10.00
284806    217.00
Name: Amount, Length: 284807, dtype: float64 

 <class 'pandas.core.series.Series'> 

 (284807,)


In [3]:
amount_val = amount.values # 시리즈의 value값만 가져옴
amount = amount_val.reshape(-1,1) #amount.shape가 [1]이 없기때문에 reshape하여서 빈곳채움

print(f'{amount} \n\n {type(amount)} \n\n {amount.shape}')

[[149.62]
 [  2.69]
 [378.66]
 ...
 [ 67.88]
 [ 10.  ]
 [217.  ]] 

 <class 'numpy.ndarray'> 

 (284807, 1)


In [4]:
scaler = StandardScaler() #가져온거임

scaler.fit(amount)
amount_scaled = scaler.transform(amount)
amount_scaled


array([[ 0.24496426],
       [-0.34247454],
       [ 1.16068593],
       ...,
       [-0.0818393 ],
       [-0.31324853],
       [ 0.51435531]])

In [5]:
df_amount_scaled = pd.DataFrame(amount_scaled) #Array => Dataframe
display(df_amount_scaled) #이래만들면 컬럼명 0으로 나옴

df_amount_scaled.columns = ['normalAmount']
display(df_amount_scaled) #컬럼명 0을 ''로 설정

df_amount_scaled.rename(columns = {'0' : 'normalAmount'}, inplace = True)
display(df_amount_scaled) #inplace하여 완벽하게 FIX

Unnamed: 0,0
0,0.244964
1,-0.342475
2,1.160686
3,0.140534
4,-0.073403
...,...
284802,-0.350151
284803,-0.254117
284804,-0.081839
284805,-0.313249


Unnamed: 0,normalAmount
0,0.244964
1,-0.342475
2,1.160686
3,0.140534
4,-0.073403
...,...
284802,-0.350151
284803,-0.254117
284804,-0.081839
284805,-0.313249


Unnamed: 0,normalAmount
0,0.244964
1,-0.342475
2,1.160686
3,0.140534
4,-0.073403
...,...
284802,-0.350151
284803,-0.254117
284804,-0.081839
284805,-0.313249


In [6]:
#데이터셋 확인
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,0


In [7]:
df.shape

(284807, 31)

In [8]:
df_y = df['Class']


In [9]:
df_x = df.drop('Class',axis = 1).copy()
df_x

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,0.251412,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.069083,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.524980,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.208038,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,0.408542,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,1.475829,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.059616,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.001396,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.127434,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00


In [10]:
df_x = pd.concat([df_x, df_amount_scaled],axis=1)
df_x

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,normalAmount
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0.244964
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,-0.342475
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,1.160686
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0.140534
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,-0.073403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,0.77,-0.350151
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,24.79,-0.254117
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,67.88,-0.081839
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,10.00,-0.313249


In [11]:
df = pd.concat([df_x, df_y],axis=1)
df = df.drop('Amount',axis = 1)
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,normalAmount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.244964,0
1,0.0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,-0.342475,0
2,1.0,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,1.160686,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.140534,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,-0.073403,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284802,172786.0,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.014480,-0.509348,1.436807,0.250034,0.943651,0.823731,-0.350151,0
284803,172787.0,-0.732789,-0.055080,2.035030,-0.738589,0.868229,1.058415,0.024330,0.294869,0.584800,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,-0.254117,0
284804,172788.0,1.919565,-0.301254,-3.249640,-0.557828,2.630515,3.031260,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,-0.081839,0
284805,172788.0,-0.240440,0.530483,0.702510,0.689799,-0.377961,0.623708,-0.686180,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,-0.313249,0


In [12]:
df.to_csv('credit_test.csv')
df = pd.read_csv('credit_test.csv', index_col=0)

In [13]:
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,normalAmount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0.244964,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,-0.342475,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,1.160686,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0.140534,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,-0.073403,0


-----

# Train set : Test set = 85 : 15 (4번)

In [21]:
X =df.values[:,0:30] #독립변수
Y =df.values[:,30] #종속변수 , 30만
X_train, X_test, Y_train, Y_test= train_test_split(X,Y, test_size = 0.15) #85 : 15니까
print(X_train.shape) # 85프로
print(X_test.shape) #15프로

(242085, 30)
(42722, 30)


# DNN 학습 및 저장 / 정확도, Loss 그래프

In [22]:
#모델 설정
model = Sequential()
model.add(Dense(30, activation='relu', input_dim=X.shape[1]))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(64, activation='relu'))
# model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

#모델 컴파일
model.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics= ['accuracy'])

# #모델 저장 폴더 지정
MODEL_DIR='./credit/'
if not os.path.exists(MODEL_DIR):
    os.mkdir(MODEL_DIR)

# #모델 저장 방법
modelpath = './credit/{epoch:02d}-{val_loss:.4f}.hdf5'
checkpointer =  ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
    
# 학습 조기 종료 , callbacks=[checkpointer, early_stopping_callback]
early_stopping_callback= EarlyStopping(monitor='val_loss', patience=100)


#모델 학습
history = model.fit(X_train, Y_train ,validation_split=0.33, epochs=1000, batch_size=1000,
                    callbacks=[checkpointer, early_stopping_callback])
model.summary()

Epoch 1/1000

Epoch 00001: val_loss improved from inf to 2.98628, saving model to ./credit\01-2.9863.hdf5
Epoch 2/1000

Epoch 00002: val_loss improved from 2.98628 to 1.54213, saving model to ./credit\02-1.5421.hdf5
Epoch 3/1000

Epoch 00003: val_loss improved from 1.54213 to 0.03655, saving model to ./credit\03-0.0366.hdf5
Epoch 4/1000

Epoch 00004: val_loss improved from 0.03655 to 0.02471, saving model to ./credit\04-0.0247.hdf5
Epoch 5/1000

Epoch 00005: val_loss improved from 0.02471 to 0.02129, saving model to ./credit\05-0.0213.hdf5
Epoch 6/1000

Epoch 00006: val_loss improved from 0.02129 to 0.01614, saving model to ./credit\06-0.0161.hdf5
Epoch 7/1000

Epoch 00007: val_loss did not improve from 0.01614
Epoch 8/1000

Epoch 00008: val_loss did not improve from 0.01614
Epoch 9/1000

Epoch 00009: val_loss did not improve from 0.01614
Epoch 10/1000

Epoch 00010: val_loss did not improve from 0.01614
Epoch 11/1000

Epoch 00011: val_loss did not improve from 0.01614
Epoch 12/1000

Ep


Epoch 00040: val_loss did not improve from 0.01614
Epoch 41/1000

Epoch 00041: val_loss did not improve from 0.01614
Epoch 42/1000

Epoch 00042: val_loss did not improve from 0.01614
Epoch 43/1000

Epoch 00043: val_loss did not improve from 0.01614
Epoch 44/1000

Epoch 00044: val_loss did not improve from 0.01614
Epoch 45/1000

Epoch 00045: val_loss did not improve from 0.01614
Epoch 46/1000

Epoch 00046: val_loss did not improve from 0.01614
Epoch 47/1000

Epoch 00047: val_loss did not improve from 0.01614
Epoch 48/1000

Epoch 00048: val_loss did not improve from 0.01614
Epoch 49/1000

Epoch 00049: val_loss did not improve from 0.01614
Epoch 50/1000

Epoch 00050: val_loss did not improve from 0.01614
Epoch 51/1000

Epoch 00051: val_loss did not improve from 0.01614
Epoch 52/1000

Epoch 00052: val_loss did not improve from 0.01614
Epoch 53/1000

Epoch 00053: val_loss did not improve from 0.01614
Epoch 54/1000

Epoch 00054: val_loss did not improve from 0.01614
Epoch 55/1000

Epoch 000


Epoch 00082: val_loss did not improve from 0.01614
Epoch 83/1000

Epoch 00083: val_loss did not improve from 0.01614
Epoch 84/1000

Epoch 00084: val_loss did not improve from 0.01614
Epoch 85/1000

Epoch 00085: val_loss did not improve from 0.01614
Epoch 86/1000

Epoch 00086: val_loss did not improve from 0.01614
Epoch 87/1000

Epoch 00087: val_loss did not improve from 0.01614
Epoch 88/1000

Epoch 00088: val_loss did not improve from 0.01614
Epoch 89/1000

Epoch 00089: val_loss did not improve from 0.01614
Epoch 90/1000

Epoch 00090: val_loss did not improve from 0.01614
Epoch 91/1000

Epoch 00091: val_loss did not improve from 0.01614
Epoch 92/1000

Epoch 00092: val_loss did not improve from 0.01614
Epoch 93/1000

Epoch 00093: val_loss did not improve from 0.01614
Epoch 94/1000

Epoch 00094: val_loss did not improve from 0.01614
Epoch 95/1000

Epoch 00095: val_loss did not improve from 0.01614
Epoch 96/1000

Epoch 00096: val_loss did not improve from 0.01614
Epoch 97/1000

Epoch 000

In [None]:
# 모델 반복횟수에 따른 정확도 및 loss를 그래프로 확인
y_vloss = history.history['val_loss']
y_acc = history.history['accuracy']

x_len = np.arange(len(y_acc))


plt.plot(x_len, y_vloss, 'o', c='red',markersize=3)
plt.plot(x_len, y_acc, 'o', c='blue',markersize=3)
plt.show()

In [25]:
#학습기 불러오기
from keras.models import load_model
model= load_model('./credit/06-0.0161.hdf5')


#평가
print(model.evaluate(X_test,Y_test))

#예측
prediction = model.predict(X_test)
# print(prediction)
부정자용자 =int(input('가져올 데이터 번호'))
부정사용 = prediction[부정자용자]
p=부정사용[0]*100
print(f'신용카드를 부정사용 확률은 {p:.4f}%입니다.')

[0.020913423970341682, 0.9983848929405212]
가져올 데이터 번호14
신용카드를 부정사용 확률은 0.0006%입니다.


-----

# 0 : 492개, 1: 492개

In [None]:
df['Class'].value_counts()
# 출력층 몇개 있는지 확인하려고 클래스값 확인

In [None]:
df_class_0 = df[df['Class'] == 0].sample(492)
df_class_0 

In [None]:
df_class_0['Class'].value_counts()

In [None]:
df_class_1 = df[df['Class'] == 1]
df_class_1

In [None]:
df_class_1['Class'].value_counts()

In [None]:
df_sample = pd.concat([df_class_0,df_class_1])
df_sample

In [None]:
X =df_sample.values[:,0:30] #독립변수
Y =df_sample.values[:,30] #종속변수 , 31만
X_train, X_test, Y_train, Y_test= train_test_split(X,Y, test_size = 0.3) #85 : 15니까
print(X_train.shape) # 85프로
print(X_test.shape) #15프로

In [None]:
#모델 설정
model_1 = Sequential()
model_1.add(Dense(30, activation='relu', input_dim=X.shape[1]))
model_1.add(Dense(12, activation='relu'))
model_1.add(Dense(8, activation='relu'))
model_1.add(Dense(1, activation='sigmoid'))

#모델 컴파일
model_1.compile(loss='binary_crossentropy',
             optimizer='adam',
             metrics= ['accuracy'])

# #모델 저장 폴더 지정
# MODEL_DIR='./credit/'
# if not os.path.exists(MODEL_DIR):
#     os.mkdir(MODEL_DIR)

# #모델 저장 방법
# modelpath = './credit/{epoch:02d}-{val_loss:.4f}.hdf5'
# checkpointer =  ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
    
# 학습 조기 종료 , callbacks=[checkpointer, early_stopping_callback]
# early_stopping_callback= EarlyStopping(monitor='val_loss', patience=10)


#모델 학습
history = model_1.fit(X_train, Y_train , epochs=1000, batch_size=100)
model.summary()