In [441]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
import pyecharts
from pyecharts import options as opts
from datetime import datetime
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
import keras
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense
from keras.models import Model
from keras import regularizers
from keras.callbacks import TensorBoard

In [2]:
from pyecharts.charts import Line, Page

def line_with_constant_benchmark(data, title, benchmark=None, ratio=None, axis_offset=0, zoom=True, range_=None) -> Line:
    marklines = []
    if benchmark and ratio:
        marklines += [
            opts.MarkLineItem(y=benchmark, name="Benchmark"),
            opts.MarkLineItem(y=benchmark * (1 + ratio), name="Upper"),
            opts.MarkLineItem(y=benchmark * (1 - ratio), name="Lower"),
        ]
    if benchmark and not ratio:
        marklines += [
            opts.MarkLineItem(y=benchmark, name="Benchmark"),
        ]
    if range_ is not None:
        marklines += [
            opts.MarkLineItem(x=range_[0], name="Start"),
            opts.MarkLineItem(x=range_[1], name="End"),
        ]
    line_ = (
        Line(
            init_opts=opts.InitOpts(
                animation_opts=opts.AnimationOpts(
                    animation=False
                )
            ))
        .add_xaxis(range(len(data)))
        .add_yaxis(
            title,
            data[title].values,
            label_opts=opts.LabelOpts(is_show=False)
        ).set_global_opts(
            datazoom_opts=opts.DataZoomOpts() if zoom else None,
            yaxis_opts=opts.AxisOpts(
                min_=min(data[title].values) - axis_offset,
                max_=max(data[title].values) + axis_offset
            ),
        ).set_series_opts(
            label_opts=opts.LabelOpts(is_show=False),
            markline_opts=opts.MarkLineOpts(
                data=marklines
            ),
        )
    )
    return line_

def line_with_variated_benchmark(data, title, benchmark_title, axis_offset=0, zoom=True, range_=None) -> Line:
    marklines = []
    if range_ is not None:
        marklines += [
            opts.MarkLineItem(x=range_[0], name="Start"),
            opts.MarkLineItem(x=range_[1], name="End"),
        ]
    line_ = (
        Line(
            init_opts=opts.InitOpts(
                animation_opts=opts.AnimationOpts(
                    animation=False
                )
            ))
        .add_xaxis(range(len(data)))
        .add_yaxis(
            title,
            data[title].values,
            label_opts=opts.LabelOpts(is_show=False)
        )
        .add_yaxis(
            benchmark_title,
            data[benchmark_title].values,
            label_opts=opts.LabelOpts(is_show=False)
        ).set_global_opts(
            datazoom_opts=opts.DataZoomOpts() if zoom else None,
            yaxis_opts=opts.AxisOpts(
                min_=min(data[title].values) - axis_offset,
                max_=max(data[title].values) + axis_offset
            ),
        ).set_series_opts(
            markline_opts=opts.MarkLineOpts(
                data=marklines
            )
        )
    )
    return line_
    
    
def draw_split_data(split_data_item, title, zoom=True, range_=None, dir_=None):
    # split_data_item['入口水分'] = split_data_item['入口水分'].shift(-100, fill_value=0)
    page = Page()
    page.add(line_with_constant_benchmark(
        split_data_item, 
        "烘前叶丝流量",
        # np.mean(split_data_item['烘前叶丝流量设定值']), 
        axis_offset=5,
        zoom=zoom
    ))
    page.add(line_with_constant_benchmark(
        split_data_item, 
        "出口水分", 
        # np.mean(split_data_item['出口水分设定值']), 
        ratio=BIAS_ERROR_RATIO,
        zoom=zoom,
        range_=range_
    ))
    page.add(line_with_constant_benchmark(
        split_data_item, 
        "入口水分",
        zoom=zoom
    ))
    page.add(line_with_variated_benchmark(
        split_data_item, 
        "热风速度实际值", 
        '热风速度设定值',
        0.01,
        zoom=zoom,
        range_=range_
    ))
    page.add(line_with_variated_benchmark(
        split_data_item, 
        "筒壁1区温度实际值", 
        '筒壁1区温度设定值',
        0.5,
        zoom=zoom,
        range_=range_
    ))
    page.add(line_with_variated_benchmark(
        split_data_item, 
        "筒壁2区温度实际值",
        "筒壁2区温度设定值",
        0.5,
        zoom=zoom,
        range_=range_
    ))
    if dir_:
        make_dir('./plot/' + dir_ )
        page.render('./plot/'+ dir_ + '/'+ title + '.html')
    else:
        page.render('./plot/' + title + '.html')

def make_dir(path):
    if not os.path.exists(path):
        os.makedirs(path) 


## Read original Data

In [4]:
original = pd.read_csv('./data.csv', encoding='gbk')
original.columns.values

  interactivity=interactivity, compiler=compiler, result=result)


array(['ID', '批次', '牌号', '时间', '生产班次', '生产班别', '设备状态', '烘前叶丝流量设定值',
       '烘前叶丝流量', '烘前叶丝流量累积量', 'SIROX蒸汽流量', '热风温度', '筒壁1区温度设定值',
       '筒壁1区温度实际值', '筒壁2区温度设定值', '筒壁2区温度实际值', '脱水量', '排潮风门开度', '罩压力',
       '热风速度设定值', '热风速度实际值', '出口温度', 'SIROX水分增加', '入口水分', '出口水分设定值',
       '出口水分', '冷凝水温度1区', '冷凝水温度2区', '滚筒转速', '蒸汽压力', '区域1预热阶段滚筒温度额定值',
       '区域2预热阶段滚筒温度额定值', '工作点脱水', '区域1滚筒温度标准工作点', '区域2滚筒温度标准工作点',
       '区域1筒壁蒸汽压力', '区域2筒壁蒸汽压力', '罩压力设定值'], dtype=object)

In [5]:
columns = [
 '时间', '牌号', '设备状态', \
 '入口水分', '出口水分', '出口水分设定值', \
 '热风速度设定值',  '热风速度实际值', \
 '烘前叶丝流量设定值', '烘前叶丝流量', \
 '筒壁1区温度设定值', '筒壁1区温度实际值',   \
 '筒壁2区温度实际值', '筒壁2区温度设定值'
]

data = original[columns]
data['出口水分差值'] = data['出口水分'] - data['出口水分设定值']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()


# 对总的数据进行预处理

In [6]:
# drop nan
data = data.dropna()

# 牌号莫名奇妙的存储时间，drop这些行
index = data[[isinstance(item, str) and item.startswith('2019') for item in data['牌号']]].index
data = data.drop(index, axis=0)

# 烘前叶丝流量 == 0，表示设备没有运行
index = data[data['烘前叶丝流量'] == 0].index
data = data.drop(index, axis=0)

# Formate Time
# def format_time(time_str):
#     try:
#         return datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
#     except ValueError:
#         return datetime.strptime(time_str, '%Y-%m-%d')
# data['时间'] = data['时间'].map(lambda x: format_time(x))

# 按照时间进行排序
data = data.sort_values(by=['时间'], ascending=True)


In [597]:
# 查看设备状态的种类
for index, status in enumerate(data['设备状态'].unique()):
    print(status, ' ', data[data['设备状态'] == status].shape[0])

# 查看牌号的种类
for index, number in enumerate(data['牌号'].unique()):
    print(number, ' ', data[data['牌号'] == number].shape[0])

# 取得Sample Data + 预处理

In [7]:
data = data[data['设备状态'] == '生产']
sample_list = []
sample_list_name = []
FLOW_BIAS_DROP_RATE = 0.003

for index, number in enumerate(data['牌号'].unique()):
    sample_ = data[data['牌号'] == number]
    flow_mean = sample_['烘前叶丝流量设定值'].mean()
    flow_mask = np.abs(sample_['烘前叶丝流量'] - flow_mean) < FLOW_BIAS_DROP_RATE * flow_mean
    sample_ = sample_[flow_mask]
    sample_ = sample_.reset_index(drop=True) 
    print(number, sample_.shape)
    if sample_.shape[0] > 1000:
        sample_list.append(sample_)
        sample_list_name.append(number)
    

TG####A (320338, 15)
HSX### (133907, 15)
Txy### (101676, 15)
TH####A (251547, 15)
KPH### (4, 15)
HsxY## (187457, 15)
DQMr## (16128, 15)
ThQD##A (1966, 15)


In [21]:
sample_list[1][['入口水分', '出口水分', '热风速度实际值', '烘前叶丝流量', '筒壁1区温度实际值', '筒壁2区温度实际值']].describe()

Unnamed: 0,入口水分,出口水分,热风速度实际值,烘前叶丝流量,筒壁1区温度实际值,筒壁2区温度实际值
count,133907.0,133907.0,133907.0,133907.0,133907.0,133907.0
mean,21.724467,13.803235,0.38808,4799.990748,136.546417,136.550413
std,0.3496,0.0783,0.033596,4.100291,2.349876,2.350484
min,20.17289,13.29,0.29622,4785.619,130.4426,130.6654
25%,21.468455,13.75,0.360193,4797.401,134.8074,134.8128
50%,21.62588,13.8,0.394065,4799.989,135.7463,135.7502
75%,22.01655,13.85,0.40971,4802.579,138.8779,138.88475
max,22.7035,14.16,0.467372,4814.381,143.1458,143.224


In [216]:
sample_list[1].tail(20)

Unnamed: 0,时间,牌号,设备状态,入口水分,出口水分,出口水分设定值,热风速度设定值,热风速度实际值,烘前叶丝流量设定值,烘前叶丝流量,筒壁1区温度设定值,筒壁1区温度实际值,筒壁2区温度实际值,筒壁2区温度设定值,出口水分差值
133887,2019-09-05 03:36:52,HSX###,生产,21.33688,13.69,13.8,0.44,0.43835,4800,4802.782,134.5552,134.0763,134.0882,134.5557,-0.11
133888,2019-09-05 03:36:54,HSX###,生产,21.3544,13.72,13.8,0.44,0.437822,4800,4805.456,134.5843,134.0629,134.0845,134.6058,-0.08
133889,2019-09-05 03:36:56,HSX###,生产,21.35762,13.7,13.8,0.44,0.438635,4800,4806.367,134.6292,134.0548,134.082,134.6673,-0.1
133890,2019-09-05 03:36:58,HSX###,生产,21.35528,13.66,13.8,0.44,0.439288,4800,4807.084,134.6719,134.0463,134.0841,134.7219,-0.14
133891,2019-09-05 03:37:00,HSX###,生产,21.35578,13.64,13.8,0.44,0.439629,4800,4809.135,134.6709,134.0417,134.0886,134.7208,-0.16
133892,2019-09-05 03:37:02,HSX###,生产,21.3593,13.65,13.8,0.44,0.439927,4800,4808.049,134.6359,134.0424,134.0965,134.6937,-0.15
133893,2019-09-05 03:37:04,HSX###,生产,21.37075,13.65,13.8,0.44,0.440603,4800,4809.277,134.6043,134.0411,134.1073,134.6688,-0.15
133894,2019-09-05 03:37:06,HSX###,生产,21.38248,13.6,13.8,0.44,0.440897,4800,4806.968,134.5871,134.0405,134.1179,134.6604,-0.2
133895,2019-09-05 03:37:08,HSX###,生产,21.37599,13.64,13.8,0.44,0.440865,4800,4806.898,134.5182,134.0411,134.1295,134.6032,-0.16
133896,2019-09-05 03:37:10,HSX###,生产,21.37023,13.61,13.8,0.44,0.439706,4800,4804.588,134.4811,134.0381,134.1433,134.5818,-0.19


# 按时间进行分割

In [8]:
def format_time(time_str):
    try:
        return datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
    except ValueError:
        return datetime.strptime(time_str, '%Y-%m-%d')

for index, sample in enumerate(sample_list):
    sample_list[index]['时间'] = sample['时间'].map(lambda x: format_time(x))


In [11]:
# 如果两个点相差 SPLIT_INTERVAL 秒，进行分割
SPLIT_INTERVAL = 300
split_data_per_number = []

for sample in sample_list:
    sample_time_diff = (sample['时间'][1:] - sample['时间'].shift(1)[1:]).map(lambda x: x.seconds)

    split_point = sample_time_diff[sample_time_diff > SPLIT_INTERVAL].index
    split_point = split_point.insert(0, 0)
    split_point = split_point.insert(len(split_point), len(sample))
    split_data = []
    for i in range(1, len(split_point)):
        if split_point[i] - split_point[i - 1] > TIME_LAG_2:
            split_data.append(sample[split_point[i - 1]: split_point[i]])
    split_data_per_number.append(split_data)


# 画图

In [182]:
draw_split_data(split_data_per_number[0][54], '54')

# 抽取训练数据
选取出口水分在STABLE_WINDOWS_SIZE内都是稳定的一段时间，然后向前找到 T-TIME_LAG_1_START 到 T-TIME_LAG_1_END 时间内的所有特征，来预测 T-TIME_LAG_1_END 时刻需要调整的参数值

Label即 T-TIME_LAG_1_END 到 T 时刻的各种参数的差值

In [428]:
feature_column = ['出口水分差值', '热风速度实际值', '筒壁1区温度实际值', '筒壁2区温度实际值']
label_column = ['热风速度设定值', '筒壁1区温度设定值', '筒壁2区温度设定值']

# 5   3   50   6
TIME_LAG_STEP = 5
TIME_LAG_1_END = 3
TIME_LAG_1_START = 25
STABLE_WINDOWS_SIZE = 5

TIME_LAG_2 = 100
BIAS_ERROR_RATIO = 0.002
LABLE_WINDOWS_SIZE = 1

In [429]:
def calc_feature(item_,
                 stable_start: int,
                 time_lag_2=TIME_LAG_2,
                 time_lag_1_start=TIME_LAG_1_START,
                 time_lag_1_end=TIME_LAG_1_END) -> []:
    input_humidity = item_['入口水分'].iloc[stable_start - time_lag_2 - time_lag_1_start: stable_start - time_lag_2 - time_lag_1_end]
    feature_slice = item_[feature_column].iloc[stable_start - time_lag_1_start: stable_start - time_lag_1_end]

    return np.concatenate([
        [
            calc_integral(input_humidity.values),
            input_humidity.std(),
            input_humidity.skew(),
            input_humidity.kurtosis(),
        ],
        calc_integral(feature_slice.values),
        feature_slice.mean().values,
        feature_slice.std().values,
        feature_slice.skew().values,
        feature_slice.kurtosis().values,
        [time_lag_1_start - time_lag_1_end]
    ])
    

def calc_integral(data):
    if len(data) <= 1:
        return 0
    sum_ = sum(data)
    return sum_ - (data[0] + data[len(data) - 1]) / 2

def calc_lable(item_, end: int) -> []:
    real_start = end
    # real_end = end + STABLE_WINDOWS_SIZE
    
    current_start = end - TIME_LAG_1_END
    # current_end = end - TIME_LAG_1_END + LABLE_WINDOWS_SIZE
    
    real_ = np.mean(item_[label_column].iloc[current_start: real_start].values, axis=0)
    current_ = item_[label_column].iloc[current_start]
    return real_ - current_

In [430]:
def generate_data(split_data, split_data_index):
    setting = np.mean(split_data[0]['出口水分设定值'])
    sample_train_dataset = []
    sample_train_label = []
    sample_data_windows = []
    
    for index, item in enumerate(split_data):
        length = len(item)
        humidity = item['出口水分']
        wind_speed = item['热风速度设定值']

        # 保证出口水分在一定时间的恒定值内
        for stable_start in range(TIME_LAG_2 + TIME_LAG_1_START, length - STABLE_WINDOWS_SIZE):
            stable_end = stable_start + STABLE_WINDOWS_SIZE
            if np.all(np.abs(humidity[stable_start: stable_end] - setting) < setting * BIAS_ERROR_RATIO):
                
                for time_lag_1_start_step in range(TIME_LAG_1_START, TIME_LAG_1_END + 15, -TIME_LAG_STEP):
                    sample_data_windows.append([split_data_index, index, stable_start])
                    sample_train_dataset.append(calc_feature(item, stable_start, TIME_LAG_2, time_lag_1_start_step, TIME_LAG_1_END))
                    sample_train_label.append(calc_lable(item, stable_start))

        # 保证风速调整后，出口水分是正常的
        for adjust_start in range(TIME_LAG_2 + TIME_LAG_1_START, length - TIME_LAG_1_END):
            if np.abs(wind_speed.iloc[adjust_start - 1] - wind_speed.iloc[adjust_start]) > 0 \
                    and np.abs(item['出口水分'] - setting).iloc[TIME_LAG_1_END + adjust_start] < setting * BIAS_ERROR_RATIO:

                for time_lag_1_start_step in range(TIME_LAG_1_START, TIME_LAG_1_END + 15, -TIME_LAG_STEP):
                    sample_data_windows.append([split_data_index, index, adjust_start + TIME_LAG_1_END])
                    sample_train_dataset.append(calc_feature(item, adjust_start + TIME_LAG_1_END, TIME_LAG_2, time_lag_1_start_step, TIME_LAG_1_END))
                    sample_train_label.append(calc_lable(item, adjust_start + TIME_LAG_1_END))
    
    sample_train_dataset = np.array(sample_train_dataset)
    sample_train_label = np.array(sample_train_label)
    sample_data_windows = np.array(sample_data_windows)
    
    return sample_train_dataset, sample_train_label, sample_data_windows

In [431]:
def inconsistent_symbol(label_):
        return np.array(label_[:, 0] * label_[:, 1] < 0) \
               | np.array(label_[:, 0] * label_[:, 2] < 0) \
               | np.array(label_[:, 1] * label_[:, 2] < 0)
    
def remove_inconsistent(sample_train_dataset, sample_train_label, sample_data_windows):
    # 去除风速非常小的数据
    sample_train_label[np.logical_and(sample_train_label < 1e-8, sample_train_label > -1e-8)] = 0

    # 去除风速，温度符号不同的数值
    inconsistent_mask = inconsistent_symbol(sample_train_label)
    # print('remove inconsistent: ', sum(inconsistent_mask))

    sample_train_label = sample_train_label[~inconsistent_mask]
    sample_train_dataset = sample_train_dataset[~inconsistent_mask]
    sample_data_windows = sample_data_windows[~inconsistent_mask]
    return sample_train_dataset, sample_train_label, sample_data_windows

# sample_train_dataset, sample_train_label, sample_data_windows = remove_inconsistent(sample_train_dataset, sample_train_label, sample_data_windows)

In [432]:
# train_dataset_list = np.load('./npy/train_dataset_list.npy', allow_pickle=True)
# train_label_list = np.load('./npy/train_label_list.npy', allow_pickle=True)
# data_windows_list = np.load('./npy/data_windows_list.npy', allow_pickle=True)

# np.save('./npy/train_dataset_list_1.npy', arr=train_dataset_list)
# np.save('./npy/train_label_list_1.npy', arr=train_label_list)
# np.save('./npy/data_windows_list_1.npy', arr=data_windows_list)

train_dataset_list = []
train_label_list = []
data_windows_list = []


In [None]:
for split_data_index, split_data in enumerate(split_data_per_number):
    start = datetime.now()
    
    sample_train_dataset, sample_train_label, sample_data_windows = generate_data(split_data, split_data_index)
    sample_train_dataset, sample_train_label, sample_data_windows = remove_inconsistent(sample_train_dataset, sample_train_label, sample_data_windows)

    train_dataset_list.append(sample_train_dataset)
    train_label_list.append(sample_train_label)
    data_windows_list.append(sample_data_windows)
    
    print(split_data_index, ':', len(sample_train_dataset))
    print('time:', datetime.now() - start)


In [434]:
sample_train_dataset = train_dataset_list[0]
sample_train_label = train_label_list[0]
sample_data_windows = data_windows_list[0]

for i in range(1, len(train_dataset_list)):
    sample_train_dataset = np.concatenate([sample_train_dataset, train_dataset_list[i]], axis=0)
    sample_train_label = np.concatenate([sample_train_label, train_label_list[i]], axis=0)
    sample_data_windows = np.concatenate([sample_data_windows, data_windows_list[i]], axis=0)


In [435]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test, index_train, index_test = train_test_split(sample_train_dataset, sample_train_label, sample_data_windows, test_size=0.1, random_state=42)

print('Train set: ', len(X_train))
print('Val set: ', len(X_test))

Train set:  156564
Val set:  17396


In [508]:
pd.DataFrame(sample_train_dataset).corr()
# ['入口水分', '出口水分差值', '热风速度实际值', '筒壁1区温度实际值', '筒壁2区温度实际值']
# ['积分', '方差', ‘Skew’, 'Kurtosis']
# ['积分'，'均值', '方差', ‘Skew’, 'Kurtosis']

# 定义模型

In [436]:
from sklearn.linear_model import LinearRegression

clf = LinearRegression()
clf.fit(X_train, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [453]:
pred = clf.predict(X_test)
pred[np.logical_and(pred < 1e-4, pred > -1e-4)] = 0
print('mae: ', round(mean_absolute_error(y_test, pred), 5))
print('r2: ', round(r2_score(y_test, pred), 5))

mae:  0.01089
r2:  0.16322


In [438]:
kernel_ = np.array(clf.coef_).T
bias_ = np.array(clf.intercept_)
def kernel_initializer(shape, dtype=None):
    return kernel_

def bias_initializer(shape, dtype=None):
    return bias_

def loss(y_true, y_pred):
    diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None))
    
    wind_coef = K.clip(K.maximum(-y_pred[:, 0] * y_true[:, 0], 0), K.epsilon(), None) * 1e7
    temperature_coef = K.clip(K.maximum(-y_pred[:, 1:] * y_true[:, 1:], 0), K.epsilon(), None) * 1e7

    return K.mean(diff[:, 0] * wind_coef, axis=-1) + K.mean(diff[:, 1:] * temperature_coef, axis=-1)


In [455]:
def compute_loss(y_true, y_pred):
    diff = np.abs((y_true - y_pred) / np.clip(np.abs(y_true), 1e-7, None))
    wind_coef = np.clip(np.maximum(-y_pred[:, 0] * y_true[:, 0], 0), 1e-7, None) * 1e7
    temperature_coef = np.clip(np.maximum(-y_pred[:, 1:] * y_true[:, 1:], 0), 1e-7, None) * 1e7
    return np.mean(diff[:, 0] + wind_coef, axis=-1) + np.mean(diff[:, 1:] + temperature_coef, axis=-1)

y_true = np.array([[0, 0.0046, 0.0044]])
y_pred = np.array([[0.000, 0.1678, 0.0166 ]])
compute_loss(y_true, y_pred)

array([19.12551407])

In [None]:
model = Sequential()
model.add(Dense(input_dim=25, units=3, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer))

model.compile(optimizer=keras.optimizers.Adam(), loss=loss, metrics=['mae'])
# model.summary()

now_ = str(datetime.now())
if not os.path.exists('./ckpt/' + now_):
        os.makedirs('./ckpt/' + now_) 

model.fit(X_train, 
        y_train, 
        batch_size=2048 * 2, 
        epochs=1100,
        verbose=1,
        validation_data=(X_test, y_test),
        callbacks = [
            keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=1e-10),
            keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=30, verbose=0, mode='min'),
            TensorBoard(log_dir='/home/qihang/qingdao/tensorboard/' + "{0:%Y-%m-%d %H:%M:%S/}".format(datetime.now())),
            keras.callbacks.ModelCheckpoint(filepath='./ckpt/' + now_ + '/model.{epoch:03d}-{loss:.4f}.h5', monitor='val_loss', save_best_only=True, verbose=0)
        ])

In [None]:
pred = model.predict(X_test)
pred[np.logical_and(pred < 1e-4, pred > -1e-4)] = 0
print('mae: ', round(mean_absolute_error(y_test, pred), 5))
print('r2: ', round(r2_score(y_test, pred), 5))

# 验证集阶段

In [385]:
np.set_printoptions(suppress=True)

def print_result(range_=100, dir_=None, save_result=False):
    str_ = []
    for i in range(range_):
        item = split_data_per_number[index_test[i][0]][index_test[i][1]]
        item = item[index_test[i][2] - TIME_LAG_2: index_test[i][2] + STABLE_WINDOWS_SIZE]
        
#         draw_split_data(item, 
#                         str(index_test[i][0]) + '-' + str(index_test[i][1]) + '-' + str(index_test[i][2]), 
#                         zoom=False, 
#                         range_=[TIME_LAG_2 - TIME_LAG_1_START, TIME_LAG_2 - TIME_LAG_1_END],
#                         dir_=dir_
#                        )
        if not save_result:
            if np.logical_or(np.array(pred[i, 1] * y_test[i, 1] < 0), np.array(pred[i, 2] * y_test[i, 2] < 0)):
                print('{}-{}-{}: \t {}, \t{} \t *'.format(index_test[i][0], index_test[i][1], index_test[i][2], np.round(pred[i], 4), np.round(y_test[i], 4)))
            else:
                print('{}-{}-{}: \t {}, \t{}'.format(index_test[i][0], index_test[i][1], index_test[i][2], np.round(pred[i], 4), np.round(y_test[i], 4)))
        else:
            str_.append(str(index_test[i][0]) + '-' + str(index_test[i][1]) + ': [' + ' '.join([str(x) for x in np.round(pred[i], 4)]) + '], [' + ' '.join([str(x) for x in np.round(y_test[i], 4)]) + ']')
    
    if save_result:
        fw = open('./plot/' + dir_ + '/result.txt', 'w')
        for line in str_:
            fw.write(line)
            fw.write("\n") 

print_result(dir_='val/' + str(datetime.now()))

3-115-241: 	 [0.     0.0157 0.0155], 	[0.     0.1223 0.1258]
0-104-429: 	 [0.     0.0161 0.0163], 	[0.     0.0009 0.0088]
2-13-841: 	 [0.     0.023  0.0225], 	[0.     0.0335 0.0975]
2-45-1274: 	 [ 0.     -0.0039 -0.0047], 	[ 0.     -0.0145 -0.0429]
2-36-519: 	 [ 0.     -0.0062 -0.0069], 	[0.     0.0178 0.0235] 	 *
4-63-407: 	 [ 0.      0.0012 -0.0001], 	[ 0.     -0.0856 -0.0705] 	 *
4-21-752: 	 [ 0.     -0.009  -0.0097], 	[ 0.     -0.0754 -0.023 ]
0-135-1560: 	 [ 0.     -0.015  -0.0142], 	[ 0.     -0.2308 -0.2357]
0-90-285: 	 [ 0.     -0.0422 -0.0432], 	[ 0.     -0.1256 -0.1308]
0-95-1417: 	 [0.     0.0079 0.0082], 	[ 0.     -0.0355 -0.0333] 	 *
3-78-325: 	 [0.     0.0142 0.0137], 	[0.     0.0782 0.0614]
0-107-1293: 	 [0.     0.0018 0.0009], 	[0.     0.0087 0.0332]
0-19-241: 	 [0.     0.0071 0.0062], 	[0.     0.0269 0.0411]
0-73-164: 	 [ 0.     -0.0093 -0.0097], 	[ 0.     -0.0709 -0.0433]
1-29-1823: 	 [ 0.     -0.0053 -0.005 ], 	[ 0.     -0.0027 -0.0391]
0-87-526: 	 [0.     0.0288 0.02

# 测试集合测试
滑动窗口不断滑动，然后取数据进行测试

In [421]:
def generate_real_test_data(item):
    length = len(item)
    final_X_test_ = []
    final_X_index_ = []
    
    for item_index in range(TIME_LAG_2 + TIME_LAG_1_START, length - STABLE_WINDOWS_SIZE, 1):
        final_X_test_.append(calc_feature(item, item_index))
        final_X_index_.append(item_index)
        
    return np.array(final_X_test_), np.array(final_X_index_)

final_test_data = split_data_per_number[0][5]
final_X_test, final_X_index = generate_real_test_data(final_test_data)
pred = model.predict(final_X_test)

dir_ = 'final/' + str(datetime.now())

# Plot
for index_, item_ in enumerate(final_X_index):
    draw_split_data(final_test_data[item_- TIME_LAG_2: item_ + 20],
                    title=str(str(final_X_index[index_])), 
                    zoom=False, 
                    range_=[TIME_LAG_2 - TIME_LAG_1_START, TIME_LAG_2 - TIME_LAG_1_END],
                    dir_=dir_
                   )

for index_, item_ in enumerate(final_X_index):
    diff = (final_test_data.iloc[index_ + TIME_LAG_2 + TIME_LAG_1_START - TIME_LAG_1_END + 1][['热风速度设定值', '筒壁1区温度设定值', '筒壁2区温度设定值']] - final_test_data.iloc[index_ + TIME_LAG_2 + TIME_LAG_1_START - TIME_LAG_1_END][['热风速度设定值', '筒壁1区温度设定值', '筒壁2区温度设定值']]).values
    print('{}, {} \t {}'.format(item_, np.round(pred[index_], 4), np.round(diff.astype(np.double), 4)) )
    
    
    

145, [-0.      0.0019  0.0021] 	 [0.     0.0107 0.0198]
146, [-0.      0.0019  0.0021] 	 [0.     0.0083 0.0155]
147, [-0.      0.0024  0.0027] 	 [0.     0.0092 0.0121]
148, [-0.      0.0025  0.0027] 	 [0.     0.0049 0.0037]
149, [-0.      0.0026  0.0028] 	 [0.     0.0338 0.0322]
150, [-0.      0.0019  0.002 ] 	 [0.     0.0554 0.0554]
151, [-0.      0.0008  0.0008] 	 [0.     0.0278 0.027 ]
152, [-0.      0.0015  0.0016] 	 [0.     0.0329 0.0357]
153, [-0.      0.0012  0.0014] 	 [0.     0.0452 0.0526]
154, [-0.      0.0012  0.0015] 	 [0.     0.0289 0.0414]
155, [-0.      0.0017  0.002 ] 	 [0.     0.0262 0.0438]
156, [-0.      0.0018  0.002 ] 	 [0.     0.0419 0.0418]
157, [-0.      0.0014  0.0016] 	 [0.     0.018  0.0363]
158, [-0.      0.001   0.0013] 	 [ 0.     -0.0032  0.0129]
159, [-0.      0.0012  0.0016] 	 [0.     0.0012 0.012 ]
160, [-0.      0.0012  0.0016] 	 [ 0.     -0.0025  0.0105]
161, [-0.      0.0014  0.0019] 	 [0.     0.0167 0.0167]
162, [-0.      0.0018  0.0023] 	 [0.     0

625, [-0.     -0.0078 -0.0086] 	 [ 0.     -0.0046 -0.0032]
626, [-0.     -0.0078 -0.0086] 	 [ 0.     -0.0162 -0.0131]
627, [-0.     -0.0076 -0.0084] 	 [ 0.     -0.0216 -0.0222]
628, [-0.     -0.0067 -0.0073] 	 [0.     0.0028 0.0061]
629, [-0.     -0.0039 -0.0035] 	 [0.     0.0153 0.0186]
630, [-0.      0.0003  0.0021] 	 [0.     0.0299 0.0358]
631, [-0.      0.0001  0.0016] 	 [0.     0.009  0.0163]
632, [-0.      0.0003  0.0012] 	 [ 0.     -0.004   0.0037]
633, [-0.      0.0011  0.0015] 	 [ 0.     -0.002   0.0026]
634, [-0.      0.0019  0.002 ] 	 [ 0.     -0.0045 -0.0026]
635, [-0.      0.0025  0.0026] 	 [ 0.     -0.0176 -0.0146]
636, [-0.      0.0032  0.0032] 	 [ 0.     -0.0125 -0.0121]
637, [-0.      0.0032  0.0033] 	 [0.     0.0089 0.0081]
638, [-0.      0.0028  0.003 ] 	 [0.     0.0355 0.0329]
639, [-0.      0.0025  0.0029] 	 [0.     0.0256 0.0243]
640, [-0.      0.0022  0.0027] 	 [0.     0.0041 0.0008]
641, [-0.      0.0022  0.0028] 	 [ 0.     -0.0007 -0.0063]
642, [-0.      0.0021

1114, [-0.      0.0005  0.0012] 	 [0.     0.0404 0.0231]
1115, [-0.      0.0007  0.0014] 	 [0.     0.0235 0.0279]
1116, [-0.      0.0006  0.0013] 	 [0.     0.0193 0.0086]
1117, [-0.      0.001   0.0015] 	 [0.     0.0067 0.0198]
1118, [-0.      0.0011  0.0015] 	 [0.     0.0119 0.0024]
1119, [0.     0.0003 0.0004] 	 [ 0.     -0.0012  0.0035]
1120, [ 0.     -0.0005 -0.0007] 	 [ 0.      0.0012 -0.0002]
1121, [ 0.     -0.001  -0.0011] 	 [ 0.     -0.0117 -0.026 ]
1122, [ 0.     -0.0003 -0.0004] 	 [ 0.     -0.0378 -0.0184]
1123, [0.     0.0003 0.0003] 	 [ 0.     -0.0351 -0.0547]
1124, [ 0.     -0.0004 -0.0003] 	 [ 0.     -0.0378 -0.0182]
1125, [0.     0.0002 0.0005] 	 [ 0.     -0.0259 -0.0257]
1126, [0.     0.0007 0.0012] 	 [ 0.     -0.08   -0.0799]
1127, [ 0.     -0.0003  0.0002] 	 [ 0.     -0.0757 -0.0757]
1128, [-0.     -0.0007 -0.0001] 	 [ 0.     -0.0166 -0.0155]
1129, [-0.     -0.0003  0.0003] 	 [ 0.0001 -0.0204 -0.0183]
1130, [-0.     -0.0004  0.0001] 	 [ 0.     -0.01   -0.0093]
1131, [

1264, [0.     0.0018 0.0009] 	 [0.0003 0.0437 0.0487]
1265, [0.    0.002 0.001] 	 [0.     0.0375 0.0407]
1266, [0.     0.0016 0.0007] 	 [0.     0.0233 0.0243]
1267, [0.     0.0012 0.0003] 	 [0.0002 0.0019 0.    ]
1268, [0.     0.0011 0.0003] 	 [ 0.     -0.0128 -0.0073]
1269, [ 0.      0.0006 -0.0001] 	 [ 0.     -0.0208 -0.0288]
1270, [0.     0.0004 0.0001] 	 [ 0.0002 -0.0193 -0.0205]
1271, [ 0.     -0.0006 -0.0007] 	 [ 0.     -0.006  -0.0037]
1272, [ 0.     -0.0019 -0.0017] 	 [0.0002 0.009  0.0157]
1273, [ 0.     -0.0029 -0.0025] 	 [0.     0.0171 0.025 ]
1274, [ 0.     -0.0034 -0.0029] 	 [0.     0.0216 0.0323]
1275, [ 0.     -0.0034 -0.0029] 	 [0.0002 0.0123 0.008 ]
1276, [ 0.     -0.0033 -0.003 ] 	 [0.     0.013  0.0198]
1277, [ 0.     -0.0029 -0.0027] 	 [0.     0.0178 0.0237]
1278, [ 0.     -0.0029 -0.0029] 	 [0.0002 0.0021 0.0053]
1279, [ 0.     -0.0025 -0.0026] 	 [ 0.     -0.0108 -0.0103]
1280, [ 0.     -0.0016 -0.0019] 	 [ 0.0002 -0.0188 -0.0131]
1281, [ 0.     -0.0016 -0.0019] 	 

In [423]:
def generate_real_test_data(item):
    length = len(item)
    final_X_test_ = []
    final_X_index_ = []
    
    for item_index in range(TIME_LAG_2 + TIME_LAG_1_START, length - STABLE_WINDOWS_SIZE, 1):
        final_X_test_.append(calc_feature(item, item_index))
        final_X_index_.append(item_index)
        
    return np.array(final_X_test_), np.array(final_X_index_)

final_test_data = split_data_per_number[1][5]
final_X_test, final_X_index = generate_real_test_data(final_test_data)
pred = model.predict(final_X_test)

dir_ = 'final/' + str(datetime.now())

# Plot
for index_, item_ in enumerate(final_X_index):
    draw_split_data(final_test_data[item_- TIME_LAG_2: item_ + 20],
                    title=str(str(final_X_index[index_])), 
                    zoom=False, 
                    range_=[TIME_LAG_2 - TIME_LAG_1_START, TIME_LAG_2 - TIME_LAG_1_END],
                    dir_=dir_
                   )

for index_, item_ in enumerate(final_X_index):
    diff = (final_test_data.iloc[index_ + TIME_LAG_2 + TIME_LAG_1_START - TIME_LAG_1_END + 1][['热风速度设定值', '筒壁1区温度设定值', '筒壁2区温度设定值']] - final_test_data.iloc[index_ + TIME_LAG_2 + TIME_LAG_1_START - TIME_LAG_1_END][['热风速度设定值', '筒壁1区温度设定值', '筒壁2区温度设定值']]).values
    print('{}, {} \t {}'.format(item_, np.round(pred[index_], 4), np.round(diff.astype(np.double), 4)) )
    
    
    

145, [-0.     -0.008  -0.0071] 	 [ 0.     -0.1261 -0.1151]
146, [-0.     -0.0042 -0.0033] 	 [ 0.     -0.071  -0.0711]
147, [-0.     -0.005  -0.0041] 	 [0.     0.0021 0.0144]
148, [-0.     -0.0063 -0.0054] 	 [ 0.     -0.1118 -0.1003]
149, [-0.     -0.0056 -0.0048] 	 [ 0.     -0.0631 -0.0531]
150, [-0.     -0.0058 -0.0051] 	 [ 0.     -0.0509 -0.0406]
151, [-0.     -0.0055 -0.0047] 	 [0.     0.0172 0.029 ]
152, [-0.     -0.0068 -0.0061] 	 [0.     0.0443 0.0443]
153, [-0.     -0.0076 -0.0069] 	 [0.     0.0661 0.077 ]
154, [-0.     -0.008  -0.0069] 	 [ 0.     -0.018  -0.0083]
155, [-0.     -0.0007  0.0007] 	 [ 0.      0.2934 -0.0127]
156, [-0.      0.0012  0.0032] 	 [ 0.     -0.0168  0.3066]
157, [-0.      0.0018  0.0042] 	 [0.     0.0152 0.0239]
158, [-0.      0.0015  0.0043] 	 [ 0.     -0.1624 -0.1624]
159, [-0.      0.0024  0.0054] 	 [ 0.     -0.0571 -0.0486]
160, [-0.      0.0005  0.0036] 	 [0.     0.0134 0.0309]
161, [-0.      0.0033  0.0065] 	 [0.     0.0495 0.0457]
162, [-0.      0.0

577, [-0.     -0.0091 -0.0098] 	 [ 0.     -0.0339 -0.0189]
578, [-0.     -0.0087 -0.0096] 	 [ 0.     -0.0174  0.001 ]
579, [-0.     -0.0084 -0.0093] 	 [ 0.     -0.04   -0.0191]
580, [-0.     -0.0075 -0.0086] 	 [ 0.     -0.0365 -0.0159]
581, [-0.     -0.0066 -0.0077] 	 [0.     0.0003 0.0008]
582, [ 0.     -0.0065 -0.0077] 	 [ 0.     -0.0276 -0.0081]
583, [ 0.     -0.0051 -0.0065] 	 [ 0.     -0.0268  0.0077]
584, [ 0.     -0.0042 -0.0053] 	 [0.     0.001  0.0009]
585, [-0.     -0.0024 -0.0031] 	 [ 0.     -0.0251 -0.0085]
586, [-0.     -0.0013 -0.0018] 	 [ 0.     -0.022  -0.0075]
587, [-0.     -0.0006 -0.001 ] 	 [ 0.     -0.0045 -0.0057]
588, [-0.      0.0003 -0.0001] 	 [ 0.     -0.0169 -0.0034]
589, [-0.      0.0009  0.0004] 	 [ 0.     -0.0159 -0.0036]
590, [-0.      0.0012  0.0006] 	 [ 0.     -0.0254 -0.0154]
591, [-0.      0.0017  0.0012] 	 [ 0.     -0.0509 -0.0426]
592, [-0.      0.0015  0.0011] 	 [ 0.     -0.0418 -0.036 ]
593, [-0.      0.0009  0.0006] 	 [ 0.     -0.0354 -0.0412]
594

900, [-0.      0.0002 -0.0006] 	 [ 0.     -0.0146 -0.0139]
901, [-0.     -0.0002 -0.0011] 	 [ 0.     -0.0492 -0.0491]
902, [-0.     -0.0012 -0.0021] 	 [ 0.     -0.0532 -0.054 ]
903, [-0.     -0.0021 -0.003 ] 	 [ 0.     -0.0733 -0.0746]
904, [-0.     -0.0051 -0.0059] 	 [ 0.     -0.0512 -0.0515]
905, [-0.     -0.0061 -0.0067] 	 [ 0.     -0.0101 -0.0085]
906, [-0.     -0.0056 -0.0061] 	 [0.     0.0183 0.0211]
907, [-0.     -0.0065 -0.007 ] 	 [0.     0.0198 0.0198]
908, [-0.     -0.0067 -0.0073] 	 [0.     0.0204 0.0254]
909, [-0.     -0.005  -0.0059] 	 [0.     0.0112 0.0156]
910, [-0.     -0.0041 -0.0051] 	 [ 0.     -0.0005  0.0007]
911, [-0.     -0.0043 -0.0054] 	 [ 0.     -0.0336 -0.034 ]
912, [-0.     -0.0038 -0.0051] 	 [ 0.     -0.0222 -0.0231]
913, [-0.     -0.004  -0.0056] 	 [ 0.     -0.0031 -0.003 ]
914, [-0.     -0.0038 -0.0054] 	 [ 0.     -0.0342 -0.0351]
915, [-0.     -0.0029 -0.0044] 	 [ 0.     -0.0375 -0.0406]
916, [-0.     -0.0023 -0.0037] 	 [ 0.    -0.026 -0.032]
917, [-0.   

1600, [ 0.      0.0004 -0.0004] 	 [ 0.     -0.0216 -0.0305]
1601, [ 0.     -0.0003 -0.0011] 	 [ 0.     -0.002  -0.0101]
1602, [ 0.     -0.0006 -0.0014] 	 [0.     0.0408 0.0463]
1603, [ 0.     -0.0012 -0.0019] 	 [0.     0.0242 0.0003]
1604, [-0.      0.0003 -0.0003] 	 [0.     0.0022 0.0021]
1605, [-0.      0.0002 -0.0002] 	 [ 0.     -0.0144 -0.0254]
1606, [-0.      0.001   0.0007] 	 [ 0.     -0.0015 -0.0152]
1607, [-0.     0.001  0.001] 	 [ 0.     -0.0033 -0.0164]
1608, [-0.      0.0008  0.0009] 	 [ 0.     -0.0192 -0.0226]
1609, [-0.      0.0009  0.0012] 	 [ 0.     -0.0115 -0.0299]
1610, [-0.      0.0014  0.0019] 	 [ 0.      0.0107 -0.0061]
1611, [-0.      0.0015  0.0021] 	 [0.     0.0466 0.0317]
1612, [-0.      0.0008  0.0014] 	 [ 0.      0.013  -0.0011]
1613, [-0.      0.0009  0.0015] 	 [0.     0.0238 0.0162]
1614, [-0.      0.0005  0.0012] 	 [0.     0.0278 0.0127]
1615, [-0.      0.0006  0.0014] 	 [0.     0.0106 0.0002]
1616, [-0.      0.0009  0.0019] 	 [0.     0.0199 0.0198]
1617, [

1965, [-0.      0.0029  0.0039] 	 [ 0.     -0.0582 -0.0533]
1966, [-0.      0.003   0.0041] 	 [ 0.     -0.0755 -0.0756]
1967, [-0.      0.0039  0.005 ] 	 [ 0.     -0.0444 -0.0752]
1968, [-0.      0.0034  0.0045] 	 [ 0.     -0.0831 -0.0558]
1969, [-0.      0.0036  0.0049] 	 [ 0.     -0.0856 -0.0905]
1970, [-0.      0.0025  0.0037] 	 [ 0.     -0.0775 -0.0847]
1971, [-0.      0.0013  0.0023] 	 [ 0.     -0.069  -0.0812]
1972, [-0.      0.0008  0.0015] 	 [ 0.     -0.0435 -0.0614]
1973, [-0.      0.0014  0.002 ] 	 [ 0.      0.0431 -0.0028]
1974, [-0.     -0.0003  0.0002] 	 [ 0.     -0.0027  0.0191]
1975, [-0.     -0.0003  0.0002] 	 [ 0.     -0.0067 -0.0376]
1976, [-0.     -0.      0.0006] 	 [ 0.     -0.0104 -0.045 ]
1977, [-0.      0.      0.0008] 	 [ 0.      0.0027 -0.036 ]
1978, [-0.      0.0011  0.002 ] 	 [ 0.      0.0364 -0.0061]
1979, [-0.      0.0011  0.0021] 	 [ 0.      0.0127 -0.039 ]
1980, [-0.      0.0014  0.0025] 	 [0.     0.0012 0.0066]
1981, [-0.      0.0015  0.0026] 	 [ 0.     