In [None]:
# 다중선형회귀 - 자동차 연비 예측 - Network 구성 함수작성, 조기종료
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Concatenate
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

dataset = pd.read_csv('https://raw.githubusercontent.com/pykwon/python/refs/heads/master/testdata_utf8/auto-mpg.csv', na_values='?')
print(dataset.head(2))
del dataset['car name']
print(dataset.head(2))
dataset.drop(['cylinders', 'acceleration', 'model year', 'origin'], axis='columns', inplace=True) 
print(dataset.corr())
print(dataset.info())
dataset = dataset.dropna()
print(dataset.isna().sum())

sns.pairplot(dataset[['mpg', 'displacement','horsepower', 'weight']], diag_kind='kde')
plt.show()


In [None]:
# train / test split
train_dataset = dataset.sample(frac=0.7, random_state=123)
test_dataset = dataset.drop(train_dataset.index)
print(train_dataset[:2], train_dataset.shape)   # (274, 4)
print(test_dataset[:2], test_dataset.shape)     # (118, 4)

# 표준화 - 수식 (관찰값 - 평균) / 표준편차
train_stat = train_dataset.describe()
print(train_stat)
train_stat.pop('mpg')
print(train_stat.transpose())
train_stat = train_stat.transpose()

def std_func(x):
    return (x - train_stat['mean']) / train_stat['std']

# print(std_func(train_dataset[:3]))
st_train_data = std_func(train_dataset)
st_train_data = st_train_data.drop(['mpg'], axis='columns')
print(st_train_data[:2])
st_test_data = std_func(test_dataset)
st_test_data = st_test_data.drop(['mpg'], axis='columns')
print(st_test_data[:2])

train_label = train_dataset.pop('mpg')
print(train_label[:2])
test_label = test_dataset.pop('mpg')
print(test_label[:2])



In [None]:
from scipy import optimize
from streamlit import metric


def build_model():
    network = Sequential([
        Input(shape=(3,)),
        Dense(units=32, activation='relu'),
        Dense(units=32, activation='relu'),
        Dense(units=1, activation='linear'),
    ])

    opti = tf.keras.optimizers.Adam(learning_rate=0.01)
    network.compile(optimizer=opti, loss='mean_squared_error', metrics=['mean_squared_error', 'mean_absolute_error'])
    return network

model = build_model()
print(model.summary())


In [None]:
from gc import callbacks
from tabnanny import verbose
from libmambapy import History


epochs = 5000
# 조기 종료
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', baseline=0.03, patience=5)
history = model.fit(st_train_data, train_label, batch_size=32, epochs=epochs, validation_split=0.2, verbose=2, callbacks=[early_stop])
df = pd.DataFrame(history.history)
print(df.head(3))

In [None]:
# 모델 학습 정보 시각화
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch
    print(hist.head(3))
    plt.figure(figsize=(8, 12))
    plt.subplot(2, 1, 1)
    plt.xlabel('epoch')
    plt.ylabel('mean squared error [mpg]')
    plt.plot(hist['epoch'], hist['mean_squared_error'], label='train err')
    plt.plot(hist['epoch'], hist['val_mean_squared_error'], label='validation err')
    
    plt.subplot(2, 1, 2)
    plt.xlabel('epoch')
    plt.ylabel('mean absolute error [$mpg^2$]')
    plt.plot(hist['epoch'], hist['mean_absolute_error'], label='train abs err')
    plt.plot(hist['epoch'], hist['val_mean_absolute_error'], label='validation abs err')

    plt.legend()
    plt.show()

plot_history(history)

In [None]:
loss, mse, mae = model.evaluate(st_test_data, test_label)
print('test dataset으로 평가 loss : {:5.3f}'.format(loss))
print('test dataset으로 평가 mse : {:5.3f}'.format(mse))
print('test dataset으로 평가 mae : {:5.3f}'.format(mae))