In [None]:
#! /usr/bin/python
# -*- coding: utf-8 -*-
# @author izhangxm
# @date 2021/10/12
# @fileName train.py
# Copyright 2017 izhangxm@gmail.com. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

In [1]:
import arviz as az
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pymc as pm
from itertools import product
%config InlineBackend.figure_format = 'retina'
az.style.use("arviz-darkgrid")

In [49]:
class MyDataset(object):
    
    def __init__(self, dataset_path):
        
        self.dataset_path = dataset_path
        
        # print(f"loading dataset from {self.dataset_path}....")
        df = pd.read_csv(dataset_path)
        # 数据初步处理
        # 计算反应速率rate，初始速率固定设置为0

        for c_i, (col_name, col_sr) in enumerate(df.items()):
            if "error" in col_name or "time" in col_name or "rate" in col_name:
                continue
            rate_col_name = f"{col_name}_rate"
            rates = []
            pre_t = None
            pre_v = None
            for th, (index, value) in zip(df['time'],col_sr.items()):
                if int(index) == 0:
                    rates.append(0.0)
                    pre_t = th
                    pre_v = value
                    continue

                delta_t = th-pre_t
                delta_value = value - pre_v
                # print(col_name, index, pre_t, th, pre_v ,value)
                rates.append(delta_value/delta_t)
                pre_t = th
                pre_v = value
            df[rate_col_name] = rates
        
        self.df = df
         # 准备输出值 Y
        self.cct_names = []
        for x in self.df.columns:
            if "time" in x or "error" in x or "rate" in x:
                continue
            self.cct_names.append(x)
        self.rates_names = [f"{x}_rate" for x in self.cct_names]
        self.error_names = [f"{x}-error" for x in self.cct_names]

        self.cct = self.df[self.cct_names].values
        self.rates = self.df[self.rates_names].values
        self.errors = self.df[self.error_names].values
        
        # print("done!")
        
    def get_rates(self):
        return self.rates
    
    def get_df(self):
        return self.df
    
    def get_errors(self):
        return self.errors
    
    def get_cct(self):
        return self.cct
    
    def get_var_col_names(self):
        return self.cct_names, self.rates_names, self.error_names
    


In [3]:
def get_target(ks, df, k_kinetics):
    def _swt_k_kinetics(k_i, sr_v):
        kk = k_kinetics[k_i]
        k = ks[k_i]
        if kk == 0:
            return kk
        elif kk ==1:
            return k * sr_v
        elif kk == 2:
            return k * sr_v ** 2
        raise Exception(f"unknown kinetics:{kk}")
    
    target = [] # 'xNH3', 'xNO3', 'xNO2', 'xNOrg', 'xN2', 'ANH3', 'ANO3', 'ANO2', 'ANOrg', 'AN2'
    for i in range(0,len(df)):
        sr = df.iloc[i]
        
        r1 = ks[0] * sr['xN2'] if k_kinetics[0] == 1 else ks[0]
        r2 = ks[1] * sr['xNH3'] if k_kinetics[1] == 1 else ks[1]
        r3 = ks[2] * sr['xNO2'] if k_kinetics[2] == 1 else ks[2]
        r4 = ks[3] * sr['xNO3'] if k_kinetics[3] == 1 else ks[3]
        r5 = ks[4] * sr['xNO2'] if k_kinetics[4] == 1 else ks[4]
        r6 = ks[5] * sr['xNO2'] * sr['xNO3'] if k_kinetics[5] == 1 else ks[5]
        r7 = ks[6] * sr['xNO3'] if k_kinetics[6] == 1 else ks[6]
        r8 = ks[7] * sr['xNO3'] if k_kinetics[7] == 1 else ks[7]
        r9 = ks[8] * sr['xNH3'] if k_kinetics[8] == 1 else ks[8]
        r10 = ks[9] * sr['xNOrg'] if k_kinetics[9] == 1 else ks[9]
        r11 = ks[10] * sr['xNOrg'] if k_kinetics[10] == 1 else ks[10]

        xNH3_rate =  2*r1 + r7 + r10 - r2 - r6 - r9
        xNO3_rate = r3 - r7 - r4 - r8 + r11
        xNO2_rate = r2 + r4 - r3 - r6 - 2*r5
        xNOrg_rate = r8 + r9 - r10 -r11
        xN2_rate = r5 + r6 - r1
        ANH3_rate = (2*r1*(sr['AN2'] - sr['ANH3']) + (sr['ANO3']-sr['ANH3'])*r7 + (sr['ANOrg']-sr['ANH3'])*r10 )/sr['xNH3']
        ANO3_rate = ( (sr['ANO2'] - sr['ANO3'])*r2 + (sr['ANOrg'] - sr['ANO3'])*r11 ) / sr['xNO3']
        ANO2_rate = ( (sr['ANH3']-sr['ANO2'] )*r2 + (sr['ANO3']-sr['ANO2'])*r4 ) / sr['xNO2']
        ANOrg_rate = ( (sr['ANO3']-sr['ANOrg'] )*r8 + (sr['ANH3']-sr['ANOrg'])*r9 ) / sr['xNOrg']
        AN2_rate = ( (sr['ANO2']-sr['AN2'] )*r5 + (sr['ANO2']*sr['ANH3'] - sr['AN2'])*r6 ) / sr['xN2']

        line_rate = [xNH3_rate, xNO3_rate,xNO2_rate, xNOrg_rate, xN2_rate, ANH3_rate,ANO3_rate, ANO2_rate,ANOrg_rate,AN2_rate]
        target.append(line_rate)
    
    target = np.array(target)
    
    return target


In [4]:
def r2_loss(pred, y):
    r2_loss = 1 - np.square(pred - y).sum() / np.square(y - np.mean(y)).sum()
    return r2_loss

def get_model(dataset, k_kinetics, k_sigma_priors = 0.01):
    # 定义参数优化模型
    mcmc_model = pm.Model()

    ## 参数个数
    params_n = 11

    # 参数K的先验分布参数
    mu_priors = 0

    ks = []
    with mcmc_model:
        for ki in range(1, params_n+1):
            p_dense = pm.HalfNormal(f"k{ki}", sigma=k_sigma_priors)
            # p_dense = pm.Normal(f"k{ki}",mu=0, sigma=sigma_priors)
            ks.append(p_dense)
    
    df = dataset.get_df()
    errors = dataset.get_errors()
    rates = dataset.get_rates()
    
    
    target= get_target(ks, df, k_kinetics)
    target = np.array(target)[1:].reshape(-1).tolist()
    sigma_Y = errors[1:].reshape(-1).tolist()
    rata_Y = rates[1:].reshape(-1).tolist()
    
    with mcmc_model:
        sigma = pm.HalfCauchy('sigma', beta=1, initval=0.1)
        y_obs = pm.Normal(f"rates", mu=target, sigma=sigma, observed=rata_Y, shape=len(rata_Y))
    
    return mcmc_model


In [5]:
def get_predict_ks(idata):
    parames_summary = az.summary(idata, round_to=10)
    ks_names = [f"k{x+1}" for x in range(11)]
    
    predict_ks = []
    for k_name in ks_names:
        k_v = parames_summary["mean"][k_name]
        predict_ks.append(k_v)
    return np.array(predict_ks)

In [6]:
def opt_model(dataset, k_kinetics, k_sigma_priors=0.01, draws=10000, tune=2000, chains=4, cores=4):

    mcmc_model = get_model(dataset, k_kinetics, k_sigma_priors=k_sigma_priors)
    idata = pm.sample(draws=draws,model=mcmc_model, chains=chains, cores=cores, tune=tune)
    
    return idata
    
def eval_model(idata, dataset):
    predict_ks = get_predict_ks(idata)
    predict = get_target(predict_ks, df, k_kinetics)
    rates_y  = dataset.get_rates()
    r2 = r2_loss(predict[1:],rates_y[1:])
    return r2


In [50]:
from scipy.optimize import leastsq
import time
def ltq_fit(dataset,k_kinetics):

    def _error_loss(ks, dataset):
        df = dataset.get_df()
        rates_y = dataset.get_rates()
        predict= get_target(ks, df, k_kinetics)

        r2 = r2_loss(predict[1:],rates_y[1:])
        res =  (rates_y[1:] - predict[1:]).reshape(-1)

        is_nagative = False
        for x in ks:
            if x<=0:
                is_nagative = True
                break

        # if is_nagative:
        #     res = res + 1000

        # print('step', ks,r2, res.shape)
        # time.sleep(1)
        return res

    ks_o = np.repeat(1,11).tolist()
    ks_res =leastsq(error_loss, ks_o, args=(dataset,))[0]
    return ks_res

dataset = MyDataset("dataset/data.csv")
k_kinetics = np.repeat(1,11).astype(np.uint8).tolist()


df = dataset.get_df()
ks_res = para

print(r2, para)


0.7767488693235725 [ 3.49616636e+00  3.55816345e-06  5.97588870e-02 -4.61434736e-07
  2.10964356e-05 -1.15459062e-06  2.98915719e-04 -7.51800489e-03
  2.38296376e-02  8.05773487e-05 -3.19028547e-04]


In [37]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq
 
def Fun(p,x):                        # 定义拟合函数形式
    a1,a2,a3 = p
    return a1*x**2+a2*x+a3


def error (p,x,y,c):                    # 拟合残差
    a1,a2,a3 = p
    res = a1*x**2+a2*x+a3 - y
    print(p, res.shape, y.shape)
    time.sleep(1)
    return res


def main():
    x = np.linspace(-10,10,100)  # 创建时间序列
    p_value = [-2,5,10] # 原始数据的参数
    noise = np.random.randn(len(x))  # 创建随机噪声
    y = Fun(p_value,x)+noise*2 # 加上噪声的序列
    p0 = [0.1,-0.01,100] # 拟合的初始参数设置
    
    c = 3
    
    para =leastsq(error, p0, args=(x,y,c)) # 进行拟合
    
    
    y_fitted = Fun (para[0],x) # 画出拟合后的曲线
 
    plt.figure
    plt.plot(x,y,'r', label = 'Original curve')
    plt.plot(x,y_fitted,'-b', label ='Fitted curve')
    plt.legend()
    plt.show()
    print (para[0])
main()

[ 1.e-01 -1.e-02  1.e+02] (100,) (100,)
[ 1.e-01 -1.e-02  1.e+02] (100,) (100,)
[ 1.e-01 -1.e-02  1.e+02] (100,) (100,)
[ 1.00000001e-01 -1.00000000e-02  1.00000000e+02] (100,) (100,)
[ 1.00000000e-01 -9.99999985e-03  1.00000000e+02] (100,) (100,)


KeyboardInterrupt: 

In [21]:
np.array([1,2,3]) ** 2

array([1, 4, 9])