In [20]:
from sklearn.datasets import fetch_california_housing

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
dataset = fetch_california_housing()

In [21]:
x = dataset.data
y = dataset.target
print(x.shape)
print(y.shape)

(20640, 8)
(20640,)


In [22]:
# 사이킥 런의 칼럼 조회기능
print(dataset.feature_names)
print(len(dataset.feature_names))


['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
8


In [23]:
# 사이킥런의 데이터 요약
print(dataset.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [24]:
# 3. 모델구성
# 실습 train 0.7 이상
# 평가지표 R2 : 0.8 이상으로 뽑아내보기 / RMSE 사용

x_train, x_test,y_train,y_test = train_test_split(x,y,
    train_size=0.7,
    shuffle = True,
    random_state= 123
)


x_train = x_train.reshape(len(x_train),x_train.shape[1],1,1)

x_test = x_test.reshape(len(x_test),x_test.shape[1],1,1)
print(x_train.shape,x_test.shape)

(14447, 8, 1, 1) (6193, 8, 1, 1)


In [29]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input,Conv2D,MaxPooling2D,Flatten

# 모델
rate = 0.2
model = Sequential([
    # 5X5 이미지 채널 1개 
    # 필터 = output 노드수 
    # 자르는 필터는 2X2 사이즈 
    Conv2D(filters=64,kernel_size=(2,1),input_shape=(8,1,1),activation="relu"),

    Flatten(),
    Dense(16,activation="relu"),
    Dense(1,activation="linear")
])

model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 7, 1, 64)          192       
                                                                 
 flatten_3 (Flatten)         (None, 448)               0         
                                                                 
 dense_26 (Dense)            (None, 16)                7184      
                                                                 
 dense_27 (Dense)            (None, 1)                 17        
                                                                 
Total params: 7,393
Trainable params: 7,393
Non-trainable params: 0
_________________________________________________________________


In [26]:
from datetime import datetime
from sklearn.metrics import r2_score
now = datetime.now()

model.compile(loss="mae",optimizer="adam")
f = open("C:\study\keras\califonia.txt",'a')
# 4. 모델 컴파일
while (True):
    model.fit(x_train,y_train,epochs=100,batch_size=4,steps_per_epoch=100)
    loss = model.evaluate(x_test,y_test)
    print("loss : ",loss)
    y_predict =model.predict(x_test)
    print("=================")
    print(y_test)
    print(y_predict)
    print("=================")
    r2 = r2_score(y_test,y_predict)
    print("R2 : ",r2)
    
    f.write(str(datetime.now())+str(r2)+"\n") 
    
    if r2 >= 0.55 :
        model.save("califonia.h5")
        f.write(str(datetime.now())+str(r2)+"\n") 
        f.close()
        break

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [27]:
# 모델의 R2 스코어 출력
from tensorflow.keras.models import load_model
load_model = load_model('califonia.h5')

loss = load_model.evaluate(x_test,y_test)
print("loss : ",loss)
y_predict =load_model.predict(x_test)
print("=================")
print(y_test)
print(y_predict)
print("=================")
r2 = r2_score(y_test,y_predict)
print("R2 : ",r2)

loss :  0.5451908707618713
[1.516 0.992 1.345 ... 4.869 2.362 1.042]
[[2.1056683]
 [1.1750606]
 [1.1420069]
 ...
 [2.758371 ]
 [1.9405922]
 [1.0206953]]
R2 :  0.5605969259387469
