# 개정 3판. 피마 인디언의 당뇨병 예측 실행

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

!git clone https://github.com/taehojo/data.git   

df = pd.read_csv('./data/pima-indians-diabetes3.csv')

X = df.iloc[:, 0:8] # 세부 정보
Y = df.iloc[:, 8]   # 당뇨병 여부

model = Sequential()
model.add(Dense(12, input_dim=8, activation='relu', name='Dense_1'))
model.add(Dense(8, activation='relu', name='Dense_2'))
model.add(Dense(1, activation='sigmoid',name='Dense_3'))
# model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history=model.fit(X, Y, epochs=200, batch_size=10)

print(f"Accuracy: {model.evaluate(X, Y)[1]:4f}%")

# 상관도 그래프

In [None]:
from tensorflow.python import metrics
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

np.random.seed(3)
tf.random.set_seed(3)

# 깃허브에 준비된 데이터를 가져옵니다.
!git clone https://github.com/taehojo/data.git

# 아이리스 데이터를 불러옵니다.
df = pd.read_csv('./data/iris3.csv')

X = df.iloc[:, 0:4]
Y = df.iloc[:, 4]

# one-hot encoding
y = pd.get_dummies(Y)

model = Sequential()
model.add(Dense(12, input_dim=4, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(3, activation='softmax'))
model.summary()

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history=model.fit(X, y, epochs=30, batch_size=5)

# 학습셋과 테스트셋

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split

import pandas as pd

!git clone https://github.com/taehojo/data.git

df = pd.read_csv("./data/sonar3.csv", header=None)

X = df.iloc[:, 0:60] # 음파 관련 속성
Y = df.iloc[:, 60]   # 광물의 종류

# 학습셋과 테스트셋 랜덤 분리
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, shuffle=True)

model = Sequential()
model.add(Dense(24, input_dim=60, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(X_train, Y_train, epochs=200, batch_size=10)

# 모델을 테스트 셋에 적용해 정확도를 구한다.
score = model.evaluate(X_test, Y_test)
print("Test accuracy: ", score[1])

model.save("./data/model/my_model.hdf5")

from tensorflow.keras.models import Sequential, load_model
del model

model = load_model('./data/model/my_model.hdf5')

# 테스트 셋을 적용하여 정확도 산출
score = model.evaluate(X_test, Y_test)
print("Test accuracy: ", score[1])

# K겹 교차 검증

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold

import pandas as pd

!git clone https://github.com/taehojo/data.git

df = pd.read_csv('./data/sonar3.csv', header=None)

# 음파 관련 속성을 X로, 광물의 종류를 y로 저장합니다.
X = df.iloc[:,0:60]
Y = df.iloc[:,60]

n_fold = 10
skf = StratifiedKFold(n_splits=n_fold, shuffle=True)

acc_score = []

def model_fn():
  model = Sequential()
  model.add(Dense(24, input_dim=60, activation='relu'))
  model.add(Dense(10, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))
  return model

for train_index, test_index in skf.split(X, Y):
  X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
  Y_train, Y_test = Y.iloc[train_index], Y.iloc[test_index]

  model = model_fn()
  model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
  history = model.fit(X_train, Y_train, epochs=200, batch_size=10, verbose=0)

  accuracy = model.evaluate(X_test, Y_test)[1]
  acc_score.append(accuracy)

avg_acc_score = sum(acc_score)/n_fold

print('정확도:', acc_score)
print('정확도 평균:', avg_acc_score)

# 모델 업데이트하기

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
import pandas as pd

# 깃허브에 준비된 데이터를 가져옵니다.
!git clone https://github.com/taehojo/data.git

# 와인 데이터를 불러옵니다.
df = pd.read_csv('./data/wine.csv', header=None)

X = df.iloc[:, 0:12]
Y = df.iloc[:, 12]

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, shuffle=True)

model = Sequential()
model.add(Dense(30, input_dim=12, activation="relu"))
model.add(Dense(12, activation="relu"))
model.add(Dense(8, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.summary()

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
modelpath = "./data/model/{epoch:02d}-{val_loss:.4f}.hdf5"
checkpointer = ModelCheckpoint(
    filepath = modelpath,
    monitor = "val_loss",
    verbose=1,
    save_best_only=True
)

model.fit(X, Y, validation_split=0.2, epochs=200, batch_size=200, verbose=0, callbacks=[checkpointer])

# 학습 자동 중단

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import os
import pandas as pd

!git clone https://github.com/taehojo/data.git

df = pd.read_csv('./data/wine.csv', header=None)

X = df.iloc[:,0:12]
y = df.iloc[:,12]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)

model = Sequential()
model.add(Dense(30,  input_dim=12, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.summary()

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

early_stopping_callback = EarlyStopping(monitor="val_loss", patience=20) # 과적합 방지, 향상된 정보가 20개 이상 없으면 중지

modelpath="./data/model/Ch14-4-bestmodel.hdf5"

checkpointer = ModelCheckpoint(filepath=modelpath, monitor="val_loss", verbose=0, save_best_only=True) # 더 나은 결과에 대해서만 저장

history=model.fit(X_train, y_train, epochs=2000, batch_size=500, validation_split=0.25, verbose=1, callbacks=[early_stopping_callback,checkpointer])

score = model.evaluate(X_test, y_test)
print("Test accuracy = ", score[1])

