In [1]:
# %autosave 0
import warnings
warnings.filterwarnings(action='ignore')

import numpy as np
import pandas as pd

from tensorflow.keras.utils import to_categorical  # one-hot 엔코딩
from sklearn.preprocessing import LabelEncoder     # 명목형 척도 정수화 
from tensorflow.keras.models import load_model     # model 사용

In [2]:
model = load_model('./Iris.h5')

In [3]:
df = pd.read_csv('./iris_use.csv', names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "species"])
print(df.head())

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.0          3.5           1.3          0.3  Iris-setosa
1           4.5          2.3           1.3          0.3  Iris-setosa
2           4.4          3.2           1.3          0.2  Iris-setosa
3           5.0          3.5           1.6          0.6  Iris-setosa
4           5.1          3.8           1.9          0.4  Iris-setosa


In [4]:
data = df.values
print(type(data))
print(len(data))
X = data[:, 0:4].astype(float)  # 독립 변수, 정의역
Y_str = data[:, 4]              # 종속 변수, 치역 
print(Y_str)

<class 'numpy.ndarray'>
30
['Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa' 'Iris-setosa'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor' 'Iris-versicolor'
 'Iris-versicolor' 'Iris-versicolor' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica'
 'Iris-virginica' 'Iris-virginica' 'Iris-virginica' 'Iris-virginica']


In [5]:
# 문자 레이블을 숫자화, 문자열을 알파벳순으로 그룹화한후 0부터 숫자로 변환
encoder = LabelEncoder()
Y = encoder.fit_transform(Y_str)
print(Y)

Y_encoded = to_categorical(Y) # one-hot-encoding, 해당 index만 1로 변경
print(Y_encoded[0])
print(Y_encoded[10])
print(Y_encoded[20])

# LabelEncoder() 엔코더 사용시 모든 그룹의 데이터를 통합하여 진행 권장
# CSV 훈련: A, B, C  CSV 테스트: A, B, D
#           0  1  2              0  1  ?  
# CSV A, B, C, D 모두 통합후 -> 엔코더 적용 -> 데이터 훈련, 테스트로 분할
#     1  2  3  4

[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2]
[1. 0. 0.]
[0. 1. 0.]
[0. 0. 1.]


In [6]:
print(encoder.inverse_transform([0, 1, 2])) # Label 확인

['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [8]:
# Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2
p = model.predict(X)  # 테스트 데이터 30건
print('p.shape:', p.shape)
print('데이터:', X[0]) # 첫번째 데이터행

p.shape: (30, 3)
데이터: [5.  3.5 1.3 0.3]


In [9]:
# 확률 0 ~ 1사이의 실수값
print('예측값:', p[0]) 
print('예측값: {0:.2f}% {1:.2f}% {2:.2f}%'.format((p[0][0]*100), (p[0][1]*100), (p[0][2]*100))) 
print('LabelEncoder: ', Y[0])
print('One-hot-encoding: ', Y_encoded[0])
print(np.argmax(p[0]))  # 가장 큰값의 index
print(Y_str[0])         # 첫번째 품종

예측값: [0.9504712  0.00506847 0.04446027]
예측값: 95.05% 0.51% 4.45%
LabelEncoder:  0
One-hot-encoding:  [1. 0. 0.]
0
Iris-setosa


In [10]:
for item in p: # 2차원 배열에서 1행씩 산출
    print(item)
    print('최대값:', np.max(item), '-> index:', np.argmax(item))
    # 최대값 -> 최대값이 있는 index

[0.9504712  0.00506847 0.04446027]
최대값: 0.9504712 -> index: 0
[0.8266243  0.03801839 0.13535729]
최대값: 0.8266243 -> index: 0
[0.92875457 0.00970859 0.06153683]
최대값: 0.92875457 -> index: 0
[0.918645   0.01277162 0.06858333]
최대값: 0.918645 -> index: 0
[0.92751247 0.0110701  0.06141748]
최대값: 0.92751247 -> index: 0
[0.9112058  0.01427844 0.07451574]
최대값: 0.9112058 -> index: 0
[0.95294976 0.0048865  0.04216376]
최대값: 0.95294976 -> index: 0
[0.92608577 0.01035208 0.06356215]
최대값: 0.92608577 -> index: 0
[0.9564501  0.00409422 0.03945557]
최대값: 0.9564501 -> index: 0
[0.93941724 0.00727605 0.05330673]
최대값: 0.93941724 -> index: 0
[0.02595438 0.5814452  0.39260045]
최대값: 0.5814452 -> index: 1
[0.02604049 0.5653677  0.40859184]
최대값: 0.5653677 -> index: 1
[0.02099667 0.52875316 0.45025015]
최대값: 0.52875316 -> index: 1
[0.0333773  0.48543406 0.48118863]
최대값: 0.48543406 -> index: 1
[0.023431   0.50604266 0.4705264 ]
최대값: 0.50604266 -> index: 1
[0.02705857 0.4992689  0.47367263]
최대값: 0.4992689 -> index: 1
[

In [11]:
# softmax 함수의 값이 1이되는지 확인
# 컴퓨터는 내부적으로 정수 연산이라 미세한 오차 발생
for item in p:
    print(item, ' 합계: {0:0.0f}'.format(np.sum(item)))

[0.9504712  0.00506847 0.04446027]  합계: 1
[0.8266243  0.03801839 0.13535729]  합계: 1
[0.92875457 0.00970859 0.06153683]  합계: 1
[0.918645   0.01277162 0.06858333]  합계: 1
[0.92751247 0.0110701  0.06141748]  합계: 1
[0.9112058  0.01427844 0.07451574]  합계: 1
[0.95294976 0.0048865  0.04216376]  합계: 1
[0.92608577 0.01035208 0.06356215]  합계: 1
[0.9564501  0.00409422 0.03945557]  합계: 1
[0.93941724 0.00727605 0.05330673]  합계: 1
[0.02595438 0.5814452  0.39260045]  합계: 1
[0.02604049 0.5653677  0.40859184]  합계: 1
[0.02099667 0.52875316 0.45025015]  합계: 1
[0.0333773  0.48543406 0.48118863]  합계: 1
[0.023431   0.50604266 0.4705264 ]  합계: 1
[0.02705857 0.4992689  0.47367263]  합계: 1
[0.02186903 0.5371607  0.4409703 ]  합계: 1
[0.06487209 0.5668951  0.3682328 ]  합계: 1
[0.02734312 0.5469793  0.42567754]  합계: 1
[0.03440628 0.52282315 0.4427705 ]  합계: 1
[0.00599654 0.3110814  0.68292207]  합계: 1
[0.01223662 0.35450733 0.6332561 ]  합계: 1
[0.00702549 0.32907608 0.66389847]  합계: 1
[0.01001028 0.34976918 0.6402206 ]

In [12]:
# Iris-setosa: 0, Iris-versicolor: 1, Iris-virginica: 2
def decode(data):  # data: 1차원 배열, predict는 2차원 배열을 리턴함
    index = np.argmax(data) # 최대값의 index를 찾아줌
    if index == 0:
        label = 'Iris-setosa'
    elif index == 1:
        label = 'Iris-versicolor'
    elif index == 2:
        label = 'Iris-virginica'
        
    return label

In [14]:
print(len(p))
for i in range(len(p)):
    print('Target(실제값):', Y_str[i], ' Y hat(예측값):', decode(p[i]))

30
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-setosa  Y hat(예측값): Iris-setosa
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y hat(예측값): Iris-versicolor
Target(실제값): Iris-versicolor  Y h

In [15]:
# input 함수의 사용
# 5.1,3.5,1.4,0.2
src = input('꽃받침 길이, 꽃받침 넓이, 꽃잎 길이, 꽃잎 넓이를 입력하세요.')
print(type(src))
print(src)

꽃받침 길이, 꽃받침 넓이, 꽃잎 길이, 꽃잎 넓이를 입력하세요.5.1,3.5,1.4,0.2
<class 'str'>
5.1,3.5,1.4,0.2


In [16]:
data = np.array(src.split(','), dtype=float) # 1차원 텐서
print(data.shape)
print(data)

(4,)
[5.1 3.5 1.4 0.2]


In [17]:
X = np.array([data])  # 2차원 텐서(배열)로 변환
print(X.shape)
print(X)
p = model.predict(X)  # 2차원 배열 대입
print('p:', p)

for i in range(len(p)):
    print('예측값:', decode(p[i])) # [[0.6227252  0.23900516 0.13826966]]

(1, 4)
[[5.1 3.5 1.4 0.2]]
p: [[0.95041513 0.00509432 0.04449051]]
예측값: Iris-setosa
