# 水质预测模型代码节选

In [None]:
# -*- coding: utf-8 -*-
"""
@author:wu.xianshuang@zeei.com.cn
"""
import os
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from matplotlib import pyplot as plt
import matplotlib as mpl

mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
mpl.rcParams['axes.unicode_minus'] = False
mpl.rcParams.update({'font.size': 8})

import WQUtilities as u


class MyMLPRegressor():
    def __init__(self, argv_dic):
        """读取并生成参数"""
        self.point_code = str(argv_dic.get("pointCode"))
        self.model_name = argv_dic.get("modelName")
        self.data_type = argv_dic.get("dataType")
        self.input_data_num = 3  # 前n条数据作为输入 todo
        self.data_path = argv_dic.get("dataPath")
        self.model_type = argv_dic.get("modelType")
        self.pollution_WP = argv_dic.get("pollutionWP")
        self.pollution_list = list(self.pollution_WP.keys())
        self.model_params = argv_dic.get("modelParams")
        self.time_period = argv_dic.get("timePeriod")
        self.last_train_time = pd.datetime.now().strftime("%Y-%m-%d")
        self.train_data = pd.DataFrame()
        self.train_label = pd.DataFrame()
        self.train_pred = pd.DataFrame()
        self.train_data_MAE = None
        self.train_data_RMSE = None
        self.train_data_R2 = None

        self.train_mean = None
        self.initial_max = None
        self.train_img_path = os.path.join(argv_dic.get("rootPath"), self.point_code, f"{self.model_name}_TrainImg.png")
        self.model_path = os.path.join(argv_dic.get("rootPath"), self.point_code, f"{self.model_name}.pickle")

        self.mlp = None
        self.code_to_name = {}
        for i in argv_dic.get("poltCodeRelName"):
            self.code_to_name[i["polluteCode"]] = i.get("polluteName")

    def create_mlp(self):
        """
        生成模型，warm_start 全部设置为True
        :return:
        """
        self.mlp = MLPRegressor()
        self.model_params['warm_start'] = True
        self.mlp.set_params(**self.model_params)

    def fit_model(self, *args):
        """
        训练模型
        :param args:如果是继续训练模型，输入继续训练的数据pd.DataFrame ,与 生成训练结果图存储的临时路径
        :return:
        """
        # 更新训练时间
        self.last_train_time = pd.datetime.now().strftime("%Y-%m-%d")
        # 判断是新建模型，还是继续训练的模型。新模型读取csv文件，分x，y;继续训练 将输入的数据pd.DataFrame 分x，y
        if self.train_data.empty:
            raw_dataset = u.deal_traindata(self.data_path, self.time_period, self.data_type)
            train_dataset = raw_dataset[self.pollution_list]
            train_dataset.sort_index(inplace=True)
            initial_max = train_dataset.max().to_dict()
            train_dataset_normed = u.normed_data(train_dataset, initial_max, self.pollution_WP)
            X_normed, Y_normed = u.split_x_y(train_dataset_normed, self.data_type, self.input_data_num)
            img_path = self.train_img_path
            train_label = train_dataset.loc[Y_normed.index]
        else:
            add_data = args[0][self.pollution_list]
            initial_max = self.initial_max
            add_data_normed = u.normed_data(add_data, initial_max, self.pollution_WP)
            X_normed, Y_normed = u.split_x_y(add_data_normed, self.data_type, self.input_data_num)
            img_path = args[1]
            train_label = add_data.loc[Y_normed.index]

