In [4]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from keras import layers,models,losses

In [5]:
# 在线下载汽车效能数据集
dataset_path = keras.utils.get_file("auto-mpg.data","http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
# 利用 pandas 读取数据集，字段有效能（公里数每加仑），气缸数，排量，马力，重量
# 加速度，型号年份，产地
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
 'Acceleration', 'Model Year', 'Origin']
raw_dataset = pd.read_csv(dataset_path, names=column_names,na_values = "?", comment='\t',sep=" ", skipinitialspace=True)
dataset = raw_dataset.copy()
# 查看部分数据
dataset.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [6]:
dataset.isna().sum() # 统计空白数据
dataset = dataset.dropna() # 删除空白数据项

In [7]:
dataset.isna().sum() # 再次统计空白数据

MPG             0
Cylinders       0
Displacement    0
Horsepower      0
Weight          0
Acceleration    0
Model Year      0
Origin          0
dtype: int64

In [8]:
# 处理类别型数据，其中 origin 列代表了类别 1,2,3,分布代表产地：美国、欧洲、日本
# 先弹出(删除并返回)origin 这一列
origin = dataset.pop('Origin')
# 根据 origin 列来写入新的 3 个列
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail() # 查看新表格的后几项

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,USA,Europe,Japan
393,27.0,4,140.0,86.0,2790.0,15.6,82,1.0,0.0,0.0
394,44.0,4,97.0,52.0,2130.0,24.6,82,0.0,1.0,0.0
395,32.0,4,135.0,84.0,2295.0,11.6,82,1.0,0.0,0.0
396,28.0,4,120.0,79.0,2625.0,18.6,82,1.0,0.0,0.0
397,31.0,4,119.0,82.0,2720.0,19.4,82,1.0,0.0,0.0


In [9]:
# 切分为训练集和测试集
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

In [10]:
# 移动 MPG 油耗效能这一列为真实标签 Y
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')

In [11]:
# 查看训练集的输入 X 的统计数据
train_stats = train_dataset.describe()

In [12]:

train_stats = train_stats.transpose() # 转置

In [13]:
train_stats

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Cylinders,314.0,5.477707,1.699788,3.0,4.0,4.0,8.0,8.0
Displacement,314.0,195.318471,104.331589,68.0,105.5,151.0,265.75,455.0
Horsepower,314.0,104.869427,38.096214,46.0,76.25,94.5,128.0,225.0
Weight,314.0,2990.251592,843.898596,1649.0,2256.5,2822.5,3608.0,5140.0
Acceleration,314.0,15.559236,2.78923,8.0,13.8,15.5,17.2,24.8
Model Year,314.0,75.898089,3.675642,70.0,73.0,76.0,79.0,82.0
USA,314.0,0.624204,0.485101,0.0,0.0,1.0,1.0,1.0
Europe,314.0,0.178344,0.383413,0.0,0.0,0.0,0.0,1.0
Japan,314.0,0.197452,0.398712,0.0,0.0,0.0,0.0,1.0


In [14]:
# 标准化数据
def norm(x): # 减去每个字段的均值，并除以标准差
     return (x - train_stats['mean']) / train_stats['std']


In [15]:
normed_train_data = norm(train_dataset) # 标准化训练集
normed_test_data = norm(test_dataset) # 标准化测试集


In [16]:
print(normed_train_data.shape,train_labels.shape)

(314, 9) (314,)


In [17]:
print(normed_test_data.shape, test_labels.shape)

(78, 9) (78,)


In [18]:
# 构建Dataset对象
train_db = tf.data.Dataset.from_tensor_slices((normed_train_data.values,train_labels.values))
# 随机打乱，批量batch
train_db = train_db.shuffle(100).batch(32)

In [19]:
class Network(models.Model):
    # 回归网络模型
    def __init__(self):
        super(Network, self).__init__()
        # 创建 3 个全连接层
        self.fc1 = layers.Dense(64, activation='relu')
        self.fc2 = layers.Dense(64, activation='relu')
        self.fc3 = layers.Dense(1)

    def call(self, inputs, training=None, mask=None):
        # 依次通过三个网络层
        x = self.fc1(inputs)
        x = self.fc2(x)
        x = self.fc3(x)

        return x

In [20]:
# 创建网络类实例
model = Network()

In [21]:
# build构建
model.build(input_shape=(4,9))
model.summary()

Model: "network"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               multiple                  640       
                                                                 
 dense_1 (Dense)             multiple                  4160      
                                                                 
 dense_2 (Dense)             multiple                  65        
                                                                 
Total params: 4,865
Trainable params: 4,865
Non-trainable params: 0
_________________________________________________________________


In [29]:
# 创建优化器
optimizer = keras.optimizers.RMSprop(0.01)


In [30]:
# 200次epoch
for epoch in range(200):
    for step,(x,y) in enumerate(train_db):
        # 梯度记录器
        with tf.GradientTape() as tape:
            out = model(x)
            loss =tf.reduce_mean(losses.MSE(y,out))
            mae = tf.reduce_mean(losses.MAE(y,out))
        if epoch%10 == 0:
            print("Epoch: ",epoch," step: ",step," loss: ",float(loss))
        # 计算梯度并更新
        grads = tape.gradient(loss,model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

Epoch:  0  step:  0  loss:  50.92118835449219
Epoch:  0  step:  1  loss:  267.1707458496094
Epoch:  0  step:  2  loss:  66.46539306640625
Epoch:  0  step:  3  loss:  84.44633483886719
Epoch:  0  step:  4  loss:  47.584228515625
Epoch:  0  step:  5  loss:  94.08747863769531
Epoch:  0  step:  6  loss:  47.119842529296875
Epoch:  0  step:  7  loss:  63.98382568359375
Epoch:  0  step:  8  loss:  85.45479583740234
Epoch:  0  step:  9  loss:  54.23306655883789
Epoch:  10  step:  0  loss:  66.8530502319336
Epoch:  10  step:  1  loss:  47.29558563232422
Epoch:  10  step:  2  loss:  81.4603042602539
Epoch:  10  step:  3  loss:  69.46524047851562
Epoch:  10  step:  4  loss:  82.9395523071289
Epoch:  10  step:  5  loss:  44.67466354370117
Epoch:  10  step:  6  loss:  80.58865356445312
Epoch:  10  step:  7  loss:  51.95793533325195
Epoch:  10  step:  8  loss:  80.4049072265625
Epoch:  10  step:  9  loss:  60.196815490722656
Epoch:  20  step:  0  loss:  73.00391387939453
Epoch:  20  step:  1  loss: