In [None]:
!pip install -q tensorflow-gpu==2.0.0-rc1

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
print(tf.__version__)

In [None]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path

In [None]:
columns=['mpg' ,
         'cylinders' ,
         'displacement' ,
         'horsepower', 
         'weight' ,
         'acceleration', 'model year', 'origin']        
raw_dataset=pd.read_csv(dataset_path,
                        names=columns,
                        sep=" ",
                        comment='\t',
                        skipinitialspace=True)
raw_dataset.head()

In [None]:
dataset=raw_dataset.copy()
dataset.tail()

In [None]:
dataset['origin'].unique()

In [None]:
# 결측값 확인
dataset.isnull().sum()

In [None]:
dataset['horsepower'].unique()

In [None]:
# 데이터 정제
# 결측값 확인
dataset['horsepower'].replace("?",np.nan,inplace=True)

In [None]:
dataset.dropna(subset=['horsepower'],axis=0,inplace=True)

In [None]:
dataset['horsepower']=dataset['horsepower'].astype('float')

In [None]:
# origin은 수치형이 아니고 범주형으로 원-핫-인코딩을 통한 변환이 필요하다.
# 1 == USA , 2 == Europe , 3 == Korea

In [None]:
origin=dataset.pop('origin')
dataset['USA']=(origin==1)*1.0
dataset['Europe']=(origin==2)*2.0
dataset['Korea']=(origin==3)*3.0

In [None]:
dataset

In [None]:
# 세트 분할
train_set=dataset.sample(frac=.8,random_state=0)
test_set=dataset.drop(train_set.index)

In [None]:
sns.pairplot(train_set[['mpg','cylinders','displacement','weight']],diag_kind='kde')

In [None]:
train_stats=train_set.describe()
train_stats.pop('mpg')
train_stats= train_stats.transpose()

In [None]:
y_train=train_set.pop('mpg') # pop는 데이터프레임에서 해당 컬럼을 제거 후 저장
y_test=test_set.pop('mpg')

In [None]:
# 데이터 정규화
def norm(x):
    return (x-train_stats['mean'])/train_stats['std']
norm_train_set=norm(train_set)
norm_test_set=norm(test_set)

In [None]:
norm_test_set

In [None]:
# 모델 만들기
model = keras.Sequential([
    layers.Dense(50,activation='relu',input_shape=[len(train_set.keys())]),
    layers.Dense(50,activation='relu'),
    layers.Dense(1)
])

# 최적화 모델 찾기
optimizer = tf.keras.optimizers.RMSprop()

# 학습 명령어
model.compile(loss = 'mse', optimizer = optimizer, metrics = ['mae', 'mse'])

In [None]:
model.summary()

In [None]:
sample_result=model.predict(norm_train_set[:10])
sample_result

In [None]:
class PrintDot(keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs):
    if epoch % 100 == 0: print('')
    print('.', end='')

# 훈련정확도와 검증 정확도 : history
history=model.fit(norm_train_set,y_train,
                  epochs=1000,validation_split=2,verbose=0,
                  callbacks=[PrintDot()])
history

In [None]:
hist=pd.DataFrame(history.history)
hist

In [None]:
# 예측
loss,mae,mse=model.evaluate(norm_test_set,y_test,verbose=1)
print('평균 절대 오차:',mae)

In [None]:
import matplotlib.pyplot as plt
# 시각화
y_pred=model.predict(norm_test_set).flatten()
plt.scatter(y_test,y_pred,c='pink')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_=plt.plot([-100,100],[-100,100])
plt.show