In [43]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

In [1]:
features = pd.read_csv('../0_resources/temps.csv', usecols=['year','month','day','week','temp_2','temp_1','average', 'actual', 'friend'])
print(features.shape)
features.info()

NameError: name 'pd' is not defined

In [None]:
# 处理时间数据
import datetime
# 分别得到年月日
years = features['year']
months = features['month']
days = features['day']

dates = ['{}-{}-{}'.format(year, month, day) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in dates]
dates[:5]

In [None]:
## 准备画图
# 指定默认风格
plt.style.use('fivethirtyeight')

# 设置布局
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10, 10))
fig.autofmt_xdate(rotation=45)

# 标签值
ax1.plot(dates, features['actual'])
ax1.set_xlabel('')
ax1.set_ylabel('Temperature')
ax1.set_title('Actual')

# 昨天
ax2.plot(dates, features['temp_1'])
ax2.set_xlabel('')
ax2.set_ylabel('Temperature')
ax2.set_title('Yesterday Temperature')

# 前天
ax3.plot(dates, features['temp_2'])
ax3.set_xlabel('')
ax3.set_ylabel('Temperature')
ax3.set_title('2 Days Temperature')

# 我的逗比朋友猜的
ax4.plot(dates, features['friend'])
ax4.set_xlabel('')
ax4.set_ylabel('Temperature')
ax4.set_title('My Friend Temperature')


In [None]:
# 将每周的星期几进行独热编码
features = pd.get_dummies(features)
features.head()

In [None]:
# 获取标签数据
labels = np.array(features['actual'])
# 在特征中去掉标签
features = features.drop(['actual'], axis=1)

# 单独保存一下训练特征值的列名
features_column = list(features.columns)

# 将特征值数据从DataFrame转换成ndarray格式
features = np.array(features)

In [None]:
features.shape

In [None]:
## 对特征数据做标准化处理
from sklearn.preprocessing import StandardScaler

ss = StandardScaler()
input_features = ss.fit_transform(features)
input_features[0]

In [None]:
###  构建网络模型

In [None]:
# 按顺序构建网络模型
model = tf.keras.models.Sequential()
model.add(layers.Dense(16))
model.add(layers.Dense(32))
model.add(layers.Dense(1))

In [None]:
# 指定好优化器和损失函数
model.compile(optimizer=tf.keras.optimizers.SGD(0.001), loss='mean_squared_error')

In [None]:
# 训练模型
model.fit(input_features, labels, epochs=50, batch_size=64, validation_split=0.25)

In [None]:
# 展示模型的网络结构
model.summary()

In [None]:
# 模型改进1: 添加激活函数
model2 = tf.keras.models.Sequential()
model2.add(layers.Dense(16))
model2.add(layers.Dense(32, activation='relu'))
model2.add(layers.Dense(1))

# 指定好优化器和损失函数
model2.compile(optimizer=tf.keras.optimizers.SGD(0.001), loss='mean_squared_error')

# 训练模型
model2.fit(input_features, labels, epochs=50, batch_size=64, validation_split=0.25)

In [None]:
# 模型改进2: 更改权重参数初始化方法
model3 = tf.keras.models.Sequential()
model3.add(layers.Dense(16, kernel_initializer='random_normal'))
model3.add(layers.Dense(32, activation='relu', kernel_initializer='random_normal'))
model3.add(layers.Dense(1, kernel_initializer='random_normal'))

# 指定好优化器和损失函数
model3.compile(optimizer=tf.keras.optimizers.SGD(0.001), loss='mean_squared_error')

# 训练模型
model3.fit(input_features, labels, epochs=50, batch_size=64, validation_split=0.25)

In [None]:
# 模型改进3: 加入正则化惩罚项
model4 = tf.keras.models.Sequential()
model4.add(layers.Dense(16, kernel_initializer='random_normal', 
                        kernel_regularizer=tf.keras.regularizers.l2(0.03)))
model4.add(layers.Dense(32, activation='relu', 
                        kernel_initializer='random_normal', 
                        kernel_regularizer=tf.keras.regularizers.l2(0.03)))
model4.add(layers.Dense(1, kernel_initializer='random_normal',
                        kernel_regularizer=tf.keras.regularizers.l2(0.03)))

# 指定好优化器和损失函数
model4.compile(optimizer=tf.keras.optimizers.SGD(0.001), loss='mean_squared_error')

# 训练模型
model4.fit(input_features, labels, epochs=50, batch_size=64, validation_split=0.25)

In [None]:
###  预测模型结果

In [None]:
predict = model4.predict(input_features)

In [None]:
predict.shape

In [None]:
# 绘图展示
dates = ['{}-{}-{}'.format(year, month, day) for year, month, day in zip(years, months, days)]
dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in dates]

# 创建一个表格来存储日期和其对应的标签数值
true_data = pd.DataFrame({'date': dates, 'actual': labels})

# 同理 再创建一个来存储预测值
years = features[:, features_column.index('year')]
months = features[:, features_column.index('month')]
days = features[:, features_column.index('day')]

test_dates = ['{}-{}-{}'.format(year, month, day) for year, month, day in zip(years, months, days)]
test_dates = [datetime.datetime.strptime(date, '%Y-%m-%d') for date in test_dates]
predictions_data = pd.DataFrame({'date': test_dates, 'prediction': predict.reshape(-1)})

In [None]:
plt.figure(figsize=(10, 10), dpi=100)
# 真实值
plt.plot(true_data['date'], true_data['actual'], 'b-', label='actual')

# 预测值
plt.plot(predictions_data['date'], predictions_data['prediction'], 'ro', label='prediction')
plt.xticks(rotation=45)
plt.legend()