In [1]:
from tensorflow import keras
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("./data/boston.csv")

- 보스턴 집값 데이터 독립변수
    - CRIM : 인구 1명 당 범죄 발생 수
    - ZN : 25000평방 피트 이상의 주거 구역 비중
    - INDUS : 소매업 외 상업이 차지하는 면적 비율
    - CHAS : 찰스강 위치 변수 (1 : 강 주변, 0 : 이외)
    - NOX : 일산화질소 농도
    - RM : 집의 평균 방 수
    - AGE : 1940년 이전에 지어진 비율
    - DIS : 5가지 보스턴 시 고용 시설까지의 거리
    - RAD : 순환고속도로의 접근 용이성
    - TAX : $10,000당 무동산 세율 총계
    - PTRATIO : 지역별 학생과 교사 비율
    - B : 흑인 비율
    - LSTAT : 급여가 낮은 직업에 종사하는 인구 비율(%)
    
- 종속변수
    - PRICE : 가격 (단위 : $1,000)

In [3]:
x = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 12)
x_sub, x_val, y_sub, y_val = train_test_split(x_train, y_train, test_size = 0.2, random_state = 12)

### 모델 설계

In [5]:
model = keras.Sequential()

In [6]:
# 은닉층1
model.add(keras.layers.Dense(30, activation = "relu", input_shape = (13,)))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
# 은닉층2
model.add(keras.layers.Dense(6, activation = "relu"))

In [8]:
# 출력층
model.add(keras.layers.Dense(1))

In [9]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience = 16, restore_best_weights = True)

In [10]:
model.compile(optimizer = "adam", loss = "mean_squared_error", metrics = ["mae"])

In [11]:
model.summary()

In [12]:
history = model.fit(x_sub, y_sub, epochs = 200, validation_data = (x_val, y_val), callbacks = [early_stopping_cb])

Epoch 1/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - loss: 474.2788 - mae: 17.3160 - val_loss: 174.6510 - val_mae: 11.0043
Epoch 2/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 192.7597 - mae: 11.4647 - val_loss: 112.4969 - val_mae: 8.2109
Epoch 3/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 135.2487 - mae: 8.4707 - val_loss: 81.7569 - val_mae: 6.7116
Epoch 4/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 117.3175 - mae: 8.2131 - val_loss: 72.1885 - val_mae: 6.1958
Epoch 5/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 108.2228 - mae: 7.5075 - val_loss: 68.4509 - val_mae: 6.0087
Epoch 6/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 126.0150 - mae: 8.0650 - val_loss: 66.6345 - val_mae: 5.8582
Epoch 7/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 41.6181 - mae: 4.5421 - val_loss: 36.4931 - val_mae: 4.2164
Epoch 55/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 46.1366 - mae: 4.4208 - val_loss: 34.4891 - val_mae: 4.1799
Epoch 56/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 45.0579 - mae: 4.3640 - val_loss: 34.7659 - val_mae: 4.2629
Epoch 57/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 33.3737 - mae: 3.8626 - val_loss: 34.4543 - val_mae: 4.4338
Epoch 58/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 37.7968 - mae: 4.1415 - val_loss: 34.4649 - val_mae: 4.6929
Epoch 59/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 41.6983 - mae: 4.7256 - val_loss: 39.4047 - val_mae: 4.8609
Epoch 60/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - l

[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 36.2753 - mae: 4.2204 - val_loss: 29.6020 - val_mae: 3.9503
Epoch 108/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 32.8978 - mae: 4.4704 - val_loss: 28.4352 - val_mae: 3.8711
Epoch 109/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 30.9664 - mae: 4.0568 - val_loss: 26.0914 - val_mae: 3.6735
Epoch 110/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 33.3563 - mae: 3.8568 - val_loss: 25.6004 - val_mae: 3.6618
Epoch 111/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 30.8967 - mae: 3.9973 - val_loss: 26.4714 - val_mae: 3.6639
Epoch 112/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 29.4791 - mae: 3.8985 - val_loss: 28.3268 - val_mae: 3.8273
Epoch 113/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/st

Epoch 160/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 27.0999 - mae: 3.6946 - val_loss: 26.5792 - val_mae: 3.7395
Epoch 161/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 26.0830 - mae: 3.6988 - val_loss: 23.6281 - val_mae: 3.5559
Epoch 162/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 24.9254 - mae: 3.6168 - val_loss: 25.2158 - val_mae: 3.7147
Epoch 163/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 23.7294 - mae: 3.7286 - val_loss: 24.6027 - val_mae: 3.6777
Epoch 164/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 30.4795 - mae: 4.1468 - val_loss: 27.3666 - val_mae: 3.8155
Epoch 165/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 23.5411 - mae: 3.6905 - val_loss: 24.0147 - val_mae: 3.6552
Epoch 166/200
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

### 모델 성능 평가

In [13]:
model.evaluate(x_test, y_test)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 28.2282 - mae: 4.0959 


[29.157119750976562, 4.206006050109863]

In [14]:
y_pred = model.predict(x_test).flatten()

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 


In [15]:
for i in range(10):
    label = y_test.iloc[i]
    prediction = y_pred[i]
    print(f"실제가격 : {label:.3f}, 예상가격 : {prediction:.3f}")

실제가격 : 20.700, 예상가격 : 25.814
실제가격 : 12.700, 예상가격 : 19.576
실제가격 : 8.500, 예상가격 : 4.613
실제가격 : 25.100, 예상가격 : 29.109
실제가격 : 28.200, 예상가격 : 33.962
실제가격 : 22.500, 예상가격 : 20.702
실제가격 : 18.200, 예상가격 : 22.366
실제가격 : 43.500, 예상가격 : 34.099
실제가격 : 36.100, 예상가격 : 31.813
실제가격 : 23.800, 예상가격 : 24.776
