In [2]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
import warnings
warnings.filterwarnings("ignore")
def sbm(formula, dataframe, evaquery, refquery):
    """
    formula: 产出变量~投入变量，如“ Y   ~ K     L ”
    dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetax,thetay,obj = {},{}, {}        # 定义thetax,thetay,obj 用于存储计算结果，分别是slakx slacky，和obj
    indexlt = dataframe.query(evaquery).index
    indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0].strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars).split(' ')    
    data = dataframe.loc[indexlt,xcol+ycol]
    dataref = dataframe.loc[indexltref,xcol+ycol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.t = Var( domain =PositiveReals,  doc='CC trans')
        model.thetax = Var(model.K,bounds=(0.0, None), doc='slack x')
        model.thetay = Var(model.L,bounds=(0.0, None), doc='slack y')
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return model.t-sum(model.thetax[k]/x[k] for k in model.K) / len(model.K)
        def cctrans_rule(model):
            """Return the cctrans  constraint"""
            return 1==model.t+ (sum(model.thetay[l]/y[l] for l in model.L) )/(len(model.L))
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                    ) == x[k]*model.t-model.thetax[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                    ) == y[l] * model.t+model.thetay[l]
        model.obj = Objective(rule=objective_rule, sense=minimize, doc='objective function')
        model.cctrans = Constraint(rule= cctrans_rule , doc='cctrans')
        model.input = Constraint(model.K, rule= input_rule , doc='input constraint')
        model.output = Constraint(model.L, rule= output_rule , doc='output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            thetax[j] = np.asarray(list(model.thetax[:].value)) /np.asarray(list(model.t[:].value)) # 提取决策变量thetax
            thetay[j] = np.asarray(list(model.thetay[:].value)) /np.asarray(list(model.t[:].value)) # 提取决策变量thetay
            obj[j]= value(model.obj) # 提取目标函数 
        objdf= pd.DataFrame(obj,index=["obj"]).T
        thetaxdf = pd.DataFrame(thetax,index=xcol).T
        thetaxdf.columns = thetaxdf.columns.map(lambda x : "slack "+ str(x) ) 
        thetaydf = pd.DataFrame(thetay,index=ycol).T
        thetaydf.columns = thetaydf.columns.map(lambda y : "slack "+ str(y) ) 
        thetadf=pd.concat([thetaxdf,thetaydf],axis=1)
        redata = pd.concat([objdf,thetadf],axis=1)
    return redata

In [3]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
sbmte=sbm("Y ~K L", ex4,"t==[1,2,3]","t==[1,2,3]" )
sbmte

Unnamed: 0,obj,slack K,slack L,slack Y
0,0.378488,22362.509650,3244.649581,0.0
1,0.361296,30369.722368,3184.872518,0.0
2,0.349632,39485.536601,3117.867636,0.0
3,0.590555,9597.138764,489.897017,0.0
4,0.606797,10335.156706,458.066471,0.0
...,...,...,...,...
85,0.587487,24170.128395,1766.326742,0.0
86,0.586013,31211.578107,1630.814147,0.0
87,0.417567,15204.936664,1081.654176,0.0
88,0.421375,18888.846688,1029.481861,0.0


In [4]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def sbm2( formula, dataframe, evaquery, refquery):
    """
    formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
    dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetax,thetay,thetab,obj = {},{}, {} ,{} # 定义thetax,thetay,thetab,obj 用于存储计算结果，分别是slakx slacky slackb，和obj
    indexlt = dataframe.query(evaquery).index
    indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0] .split(':')[0] .strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars) .split(' ')
    unoutputvars = formula.split('~')[0] .split(':')[1] .strip(' ') 
    bcol=re.compile(' +').sub(' ',unoutputvars) .split(' ')   
    data = dataframe.loc[indexlt,xcol+ycol+bcol]
    dataref = dataframe.loc[indexltref,xcol+ycol+bcol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    bref=dataref.loc[:,bcol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        b=data.loc[j,bcol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.M = Set(initialize = range(len(bcol))) # 采用列表初始化产出变量个数的集合
        model.t = Var( domain =PositiveReals,  doc='CC trans')
        model.thetax = Var(model.K,bounds=(0.0, None), doc='slack x')
        model.thetay = Var(model.L,bounds=(0.0, None), doc='slack y')
        model.thetab = Var(model.M,bounds=(0.0, None), doc='slack b')
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return model.t-sum(model.thetax[k]/x[k] for k in model.K) / len(model.K)
        def cctrans_rule(model):
            """Return the cctrans  constraint"""
            return 1==model.t+ (sum(model.thetay[l]/y[l] for l in model.L
                    ) +sum(model.thetay[m]/b[m] for m in model.M))/(len(model.L)+len(model.M))        
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                    ) == x[k]*model.t-model.thetax[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                    ) == y[l] * model.t+model.thetay[l]
        def undesirable_output_rule(model, m):
            """Return the proper undesirable output constraint"""
            return sum(model.lamda[i] * bref.loc[i,bcol[m]] for i in model.I
                    ) == b[m] * model.t-model.thetab[m]
        model.obj = Objective(rule=objective_rule, sense=minimize, doc='objective function')
        model.cctrans = Constraint(rule= cctrans_rule , doc='cctrans')
        model.input = Constraint(model.K, rule= input_rule , doc='input constraint')
        model.output = Constraint(model.L, rule= output_rule , doc='output constraint')
        model.undesirable_output = Constraint(model.M, rule= undesirable_output_rule , doc='undesirable output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            thetax[j] = np.asarray(list(model.thetax[:].value)) /np.asarray(list(model.t[:].value)) # 提取决策变量thetax
            thetay[j] = np.asarray(list(model.thetay[:].value)) /np.asarray(list(model.t[:].value)) # 提取决策变量thetay
            thetab[j] = np.asarray(list(model.thetab[:].value)) /np.asarray(list(model.t[:].value)) # 提取决策变量thetab
            obj[j]= value(model.obj) # 提取目标函数 
        objdf= pd.DataFrame(obj,index=["obj"]).T
        thetaxdf = pd.DataFrame(thetax,index=xcol).T
        thetaxdf.columns = thetaxdf.columns.map(lambda x : "slack "+ str(x) ) 
        thetaydf = pd.DataFrame(thetay,index=ycol).T
        thetaydf.columns = thetaydf.columns.map(lambda y : "slack "+ str(y) ) 
        thetabdf = pd.DataFrame(thetab,index=bcol).T
        thetabdf.columns = thetabdf.columns.map(lambda b : "slack "+ str(b) ) 
        thetadf=pd.concat([thetaxdf,thetaydf],axis=1)
        thetadf=pd.concat([thetadf,thetabdf],axis=1)
        redata = pd.concat([objdf,thetadf],axis=1)
    return redata

In [5]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
sbmte=sbm2("Y :CO2  ~ K     L", ex4,"t==[1,2,3]","t==[1,2,3]" )
sbmte

Unnamed: 0,obj,slack K,slack L,slack Y,slack CO2
0,0.378488,22362.509650,3244.649581,0.0,2.181541e+08
1,0.361296,30369.722368,3184.872518,0.0,2.096511e+08
2,0.349632,39485.536601,3117.867636,0.0,1.941574e+08
3,0.710468,6377.964286,365.340353,0.0,0.000000e+00
4,0.827771,4130.092868,217.979439,0.0,0.000000e+00
...,...,...,...,...,...
85,0.587487,24170.128395,1766.326742,0.0,1.097282e+08
86,0.586013,31211.578107,1630.814147,0.0,9.232574e+07
87,0.417567,15204.936664,1081.654176,0.0,5.213541e+07
88,0.421375,18888.846688,1029.481861,0.0,4.608261e+07


In [6]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def ddf(formula, dataframe, gx=None , gy=None , gb=None , evaquery=None, refquery=None ):
    """ddf: Directional distance function
    	formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
        dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
        gx (list, optional): 投入方向向量. 默认为 [-1].
        gy (list, optional): 合意产出方向向量. 默认为 [1].
        gb (list, optional): 非合意产出方向向量. 默认为[-1].
        evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"。默认为全部
        refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]。默认为全部
    """
    obj = {}                # 定义obj 用于存储计算结果，是obj
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0] .split(':')[0] .strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars) .split(' ')
    unoutputvars = formula.split('~')[0] .split(':')[1] .strip(' ') 
    bcol=re.compile(' +').sub(' ',unoutputvars) .split(' ')   
    data = dataframe.loc[indexlt,xcol+ycol+bcol]
    dataref = dataframe.loc[indexltref,xcol+ycol+bcol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    bref=dataref.loc[:,bcol]
    if type(gx)==type(None):
        gx=[-1]*len(xcol)
    if type(gy)==type(None):
        gy=[1]*len(ycol)
    if type(gb)==type(None):
        gb=[-1]*len(bcol)
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        b=data.loc[j,bcol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index)    # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.M = Set(initialize = range(len(bcol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(bounds=(None, None), within=Reals,doc='directional distance')
        model.lamda = Var(model.I , bounds=(0.0, None),within=Reals, doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return model.theta *1  
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                    ) - model.theta*gx[k]*x[k] <= x[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return -sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                    ) + model.theta*gy[l] *y[l]<= -y[l]
        def undesirable_output_rule(model, m):
            """Return the proper undesirable output constraint"""
            return sum(model.lamda[i] * bref.loc[i,bcol[m]] for i in model.I
                    ) -model.theta*gb[m]*b[m]==  b[m]
        model.obj = Objective(rule=objective_rule, sense=maximize, doc='objective function')
        model.input = Constraint(model.K,  rule=input_rule, doc='input constraint')
        model.output = Constraint(model.L,  rule=output_rule, doc='output constraint')
        model.undesirable_output = Constraint(model.M, rule=undesirable_output_rule, doc='undesirable output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            obj[j]= value(model.obj) # 提取目标函数 
        objdf= pd.DataFrame(obj,index=["te"]).T
    return objdf

In [7]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
ddfte=ddf("Y:    CO2 ~K     L  ",ex4,    )
ddfte

Unnamed: 0,te
0,0.295300
1,0.364385
2,0.373520
3,0.068456
4,0.042466
...,...
85,0.179605
86,0.173158
87,0.276945
88,0.268216


In [8]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def nddf(formula, dataframe, gx=None , gy=None, gb=None, weight =None, evaquery=None, refquery=None ):
    """nddf: Non-radial Directional distance function
    	formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
        dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
        gx (list, optional): 投入方向向量. 默认为 [-1].
        gy (list, optional): 合意产出方向向量. 默认为 [1].
        gb (list, optional): 非合意产出方向向量. 默认为[-1].
        weight(list, optional): weght matrix
        evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"。默认为全部
        refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]。默认为全部
    """
    thetax,thetay,thetab,obj = {},{},{},{}      # 定义thetax,thetay,thetab,obj 用于存储计算结果， 分别是betax,betay,betab,obj
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0] .split(':')[0] .strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars) .split(' ')
    unoutputvars = formula.split('~')[0] .split(':')[1] .strip(' ') 
    bcol=re.compile(' +').sub(' ',unoutputvars) .split(' ')   
    data = dataframe.loc[indexlt,xcol+ycol+bcol]
    dataref = dataframe.loc[indexltref,xcol+ycol+bcol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    bref=dataref.loc[:,bcol]
    if type(gx)==type(None):
        gx=[-1]*len(xcol)
    if type(gy)==type(None):
        gy=[1]*len(ycol)
    if type(gb)==type(None):
        gb=[-1]*len(bcol)
    if type(weight) == type(None):
        weight=[]
        fenmu = 1*int(gx[0]!=0) + 1*int(gy[0]!=0) + 1*int(gb[0]!=0)
        for _ in range(len(xcol)):
            weight.append(1/fenmu/len(xcol))
        for _ in range(len(ycol)):
            weight.append(1/fenmu/len(ycol))
        for _ in range(len(bcol)):
            weight.append(1/fenmu/len(bcol))  
    iweight = weight[0:len(xcol)]
    oweight = weight[len(xcol):len(xcol)+len(ycol)]
    bweight = weight[len(xcol)+len(ycol):len(xcol)+len(ycol)+len(bcol)]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        b=data.loc[j,bcol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.M = Set(initialize = range(len(bcol))) # 采用列表初始化产出变量个数的集合
        model.thetax = Var(model.K,bounds=(0.0, None),within=Reals, doc='scale factor x')
        model.thetay = Var(model.L,bounds=(0.0, None),within=Reals, doc='scale factor y')
        model.thetab = Var(model.M,bounds=(0.0, None),within=Reals, doc='scale factor b')
        model.lamda = Var(model.I , bounds=(0.0, None),within=Reals, doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return -sum( iweight[k]*gx[k]* model.thetax[k] for k in model.K
                ) + sum(oweight[l]*gy[l]* model.thetay[l] for l in model.L
                ) - sum(bweight[m]*gb[m]* model.thetab[m] for m in model.M)
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                ) - gx[k]*x[k]*model.thetax[k]<=  x[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return -1* sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                ) + gy[l]*y[l]*model.thetay[l] <= -1*y[l]
        def undesirable_output_rule(model, m):
            """Return the proper undesirable output constraint"""
            return sum(model.lamda[i] * bref.loc[i,bcol[m]] for i in model.I
                ) - gb[m]*b[m]*model.thetab[m] == b[m]
        model.obj = Objective(rule=objective_rule, sense=maximize, doc='objective function')
        model.input = Constraint(model.K,  rule=input_rule, doc='input constraint')
        model.output = Constraint(model.L,  rule=output_rule, doc='output constraint')
        model.undesirable_output = Constraint(model.M, rule=undesirable_output_rule, doc='undesirable output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            thetax[j] = np.asarray(list(model.thetax[:].value))  # 提取决策变量thetax
            thetay[j] = np.asarray(list(model.thetay[:].value))  # 提取决策变量thetay
            thetab[j] = np.asarray(list(model.thetab[:].value))   # 提取决策变量thetab
            obj[j]= value(model.obj) # 提取目标函数 
        objdf= pd.DataFrame(obj,index=["obj"]).T
        thetaxdf = pd.DataFrame(thetax,index=xcol).T
        thetaxdf.columns = thetaxdf.columns.map(lambda x : "scale "+ str(x) ) 
        thetaydf = pd.DataFrame(thetay,index=ycol).T
        thetaydf.columns = thetaydf.columns.map(lambda y : "scale "+ str(y) ) 
        thetabdf = pd.DataFrame(thetab,index=bcol).T
        thetabdf.columns = thetabdf.columns.map(lambda b : "scale "+ str(b) ) 
        thetadf=pd.concat([thetaxdf,thetaydf],axis=1)
        thetadf=pd.concat([thetadf,thetabdf],axis=1)
        redata = pd.concat([objdf,thetadf],axis=1)
    return redata

In [9]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
nddfte=nddf(" Y:    CO2   ~K     L  ",  ex4,  )
nddfte

Unnamed: 0,obj,scale K,scale L,scale Y,scale CO2
0,0.470815,0.000000,0.532420,0.938739,0.207496
1,0.476914,0.000000,0.433812,1.167459,0.046376
2,0.452563,0.000000,0.330494,1.192441,0.000000
3,0.101899,0.000000,0.103912,0.000000,0.253741
4,0.065225,0.000000,0.050296,0.000000,0.170526
...,...,...,...,...,...
85,0.230192,0.349458,0.475568,0.000000,0.278063
86,0.214955,0.391186,0.436788,0.000000,0.230879
87,0.318132,0.522366,0.642499,0.000000,0.371962
88,0.299767,0.550581,0.606670,0.000000,0.320674


In [10]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def dea8(formula, dataframe, rts, orient, evaquery=None, refquery=None):
    """
    formula: 产出变量 ~ 投入变量，如“ Y    ~ K     L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    rts: 传入表示可变规模报酬或不变规模报酬的字符串，"crs","vrs" 二选一
    orient: 传入表示产出方向或投入方向的字符串，"oo","io" 二选一
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetalt = {}          # 定义thetalt 用于存储计算结果
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0].strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars).split(' ')    
    data = dataframe.loc[indexlt,xcol+ycol]
    dataref = dataframe.loc[indexltref,xcol+ycol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(within=Reals,bounds=(None, None), doc='efficiency') # 定义决策变量theta
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables') # 定义决策变量lamda
        def obj_rule(model):
            return model.theta 
            #return model.theta[0]*1  + sum(model.lamda[i] *0 for i in model.I)
        model.obj = Objective(rule = obj_rule, sense= (maximize if orient == "oo" else minimize),doc='objective function') # 定义目标函数
        def input_rule(model,k):
            """Return the proper input constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= x[k]
            elif orient == "io":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= model.theta*x[k]
        def output_rule(model,l):
            """Return the proper output constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=model.theta*y[l]
            elif orient == "io":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=y[l]
        def vrs_rule(model):
            """Return the various return to scale constraint"""
            return sum(model.lamda[i] for i in model.I) == 1
        model.input = Constraint(model.K, rule=input_rule, doc='input constraint') # 定义与投入变量有关的约束条件
        model.output =Constraint(model.L, rule=output_rule, doc='output constraint') # 定义与产出变量有关的约束条件
        if rts == "vrs":
            model.vrs = Constraint(rule=vrs_rule, doc='various return to scale rule') # 定义与可变规模报酬有关的约束条件
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        theta = np.asarray(list(model.theta[:].value)) # 提取决策变量theta
        lamda = np.asarray(list(model.lamda[:].value)) # 提取决策变量lamda
        obj = value(model.obj) # 提取目标函数 
        #print("optimum theta: \n {} ".format(theta))
        #print("optimum lamda: \n {} ".format(lamda))
        #print("optimal objective: {}".format(obj))
        thetalt[j]=theta  
        thetadf =pd.DataFrame(thetalt,index=["theta"]).T
        thetadf["te"] = (1/thetadf["theta"] if orient == "oo" else  thetadf["theta"] )
    return thetadf

"""D^{t}(X_{t},Y_{t})""" 
D11_list = []
for t in range(1,4):
    D11 = dea8("Y~K L ", ex4, "crs", "oo", "t=={}".format(t), "t=={}".format(t))[["te"]]
    D11_list.append(D11)
D11df = pd.concat(D11_list)
D11df.rename(columns={"te":"D11"}, inplace=True)
"""D^{t+1}(X_{t+1},Y_{t+1})""" 
D22_list = []
for t in range(2,4):
    D22 = dea8("Y~K L ", ex4, "crs", "oo", "t=={}".format(t), "t=={}".format(t))[["te"]]
    D22_list.append(D22)
D22df = pd.concat(D22_list)
D22df.rename(columns={"te":"D22"}, inplace=True)
"""D^{t}(X_{t+1},Y_{t+1})""" 
D12_list = []
for t in range(2,4):
    D12 = dea8("Y~K L ", ex4, "crs", "oo", "t=={}".format(t), "t=={}".format(t-1))[["te"]]
    D12_list.append(D12)
D12df = pd.concat(D12_list)
D12df.rename(columns={"te":"D12"}, inplace=True)
"""D^{t+1}(X_{t},Y_{t})""" 
D21_list = []
for t in range(2,4):
    D21 = dea8("Y~K L ", ex4, "crs", "oo", "t=={}".format(t-1), "t=={}".format(t))[["te"]]
    D21_list.append(D21)
D21df = pd.concat(D21_list)
D21df.rename(columns={"te":"D21"}, inplace=True)

df = pd.concat([D11df, D22df, D12df, D21df], axis=1)
ex42 = pd.merge(ex4, df, left_index=True, right_index=True, how="left")
ex42["mpi"] = ex42["D12"] / ex42["D11"].shift(1) * ex42["D11"] / ex42["D21"].shift(1)

In [11]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def dea8(formula, dataframe, rts, orient, evaquery=None, refquery=None):
    """
    formula: 产出变量 ~ 投入变量，如“ Y    ~ K     L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    rts: 传入表示可变规模报酬或不变规模报酬的字符串，"crs","vrs" 二选一
    orient: 传入表示产出方向或投入方向的字符串，"oo","io" 二选一
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetalt = {}          # 定义thetalt 用于存储计算结果
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0].strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars).split(' ')    
    data = dataframe.loc[indexlt,xcol+ycol]
    dataref = dataframe.loc[indexltref,xcol+ycol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(within=Reals,bounds=(None, None), doc='efficiency') # 定义决策变量theta
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables') # 定义决策变量lamda
        def obj_rule(model):
            return model.theta 
            #return model.theta[0]*1  + sum(model.lamda[i] *0 for i in model.I)
        model.obj = Objective(rule = obj_rule, sense= (maximize if orient == "oo" else minimize),doc='objective function') # 定义目标函数
        def input_rule(model,k):
            """Return the proper input constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= x[k]
            elif orient == "io":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= model.theta*x[k]
        def output_rule(model,l):
            """Return the proper output constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=model.theta*y[l]
            elif orient == "io":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=y[l]
        def vrs_rule(model):
            """Return the various return to scale constraint"""
            return sum(model.lamda[i] for i in model.I) == 1
        model.input = Constraint(model.K, rule=input_rule, doc='input constraint') # 定义与投入变量有关的约束条件
        model.output =Constraint(model.L, rule=output_rule, doc='output constraint') # 定义与产出变量有关的约束条件
        if rts == "vrs":
            model.vrs = Constraint(rule=vrs_rule, doc='various return to scale rule') # 定义与可变规模报酬有关的约束条件
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        theta = np.asarray(list(model.theta[:].value)) # 提取决策变量theta
        lamda = np.asarray(list(model.lamda[:].value)) # 提取决策变量lamda
        obj = value(model.obj) # 提取目标函数 
        #print("optimum theta: \n {} ".format(theta))
        #print("optimum lamda: \n {} ".format(lamda))
        #print("optimal objective: {}".format(obj))
        thetalt[j]=theta  
        thetadf =pd.DataFrame(thetalt,index=["theta"]).T
        thetadf["te"] = (1/thetadf["theta"] if orient == "oo" else  thetadf["theta"] )
    return thetadf
def mpi( formula ,data, id, t):
    """
    formula: 产出变量~投入变量 ，如“Y~K L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    id: 个体变量
    t: 时间变量
    """
    tlt = pd.Series(data[t]).drop_duplicates().sort_values() 
    D11_list = []
    for tindex in tlt.index:
        print(t, tlt.iloc[tindex])
        D11 = dea8(
            formula, data, "crs", "oo",
            "{}=={}".format(t, tlt.iloc[tindex]),
            "{}=={}".format(t, tlt.iloc[tindex])
        )[["te"]]
        D11_list.append(D11)
    D11df = pd.concat(D11_list)
    D11df.rename(columns={"te": "D11"}, inplace=True)
    """D^{t}(X_{t+1},Y_{t+1})""" 
    D12_list = []
    for tindex in tlt.index[1:]:
        D12 = dea8(
            formula, data, "crs", "oo",  # 关键修复：ex4 -> data
            "{}=={}".format(t, tlt.iloc[tindex]),
            "{}=={}".format(t, tlt.iloc[tindex-1])
        )[["te"]]
        D12_list.append(D12)
    D12df = pd.concat(D12_list)
    D12df.rename(columns={"te": "D12"}, inplace=True)
    """D^{t+1}(X_{t},Y_{t})""" 
    D21_list = []
    for tindex in tlt.index[1:]:
        D21 = dea8(
            formula, data, "crs", "oo",  # 关键修复：ex4 -> data
            "{}=={}".format(t, tlt.iloc[tindex-1]),
            "{}=={}".format(t, tlt.iloc[tindex])
        )[["te"]]
        D21_list.append(D21)
    D21df = pd.concat(D21_list)
    D21df.rename(columns={"te": "D21"}, inplace=True)
    df = pd.concat([D11df,D12df],axis=1)
    df = pd.concat([df,D21df],axis=1)
    data2 = pd.merge(data,df,left_index=True,right_index=True,how="left")
    data2["mpi"] = data2["D12"] / data2["D11"].shift(1) * data2["D11"]/ data2["D21"].shift(1)
    data2.drop(columns = ["D11","D12","D21"],inplace =True)
    return data2

In [12]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
data2=mpi( "Y~K L " , ex4, "id", "t" )
data2

t 1
t 2
t 3


Unnamed: 0,id,t,K,L,E,Y,CO2,mpi
0,Anhui,1,46184.375000,4275.90,116.963884,14649.664062,368985280.0,
1,Anhui,2,56383.246094,4311.00,120.110220,15997.461914,374358944.0,0.800086
2,Anhui,3,67765.281250,4342.10,123.319663,17391.113281,373214144.0,0.818160
3,Beijing,1,24637.605469,1141.00,67.238950,9249.392578,84418936.0,
4,Beijing,2,26473.578125,1156.70,68.312260,9924.598633,81342600.0,1.101098
...,...,...,...,...,...,...,...,...
85,Zhejiang,2,69164.578125,3714.14,188.264229,27670.107422,394616160.0,0.871419
86,Zhejiang,3,79787.046875,3733.65,196.104725,29872.316406,399887424.0,0.875828
87,Chongqing,1,29107.794922,1683.51,80.493045,8549.800781,140163104.0,
88,Chongqing,2,34307.117188,1696.94,85.927340,9481.729492,143705328.0,0.885347


In [13]:
## mpi函数的部分内容，不需要运行
# D11 = dea8(formula, data, "crs", "oo","{}=={}".format(t,tlt.iloc[tindex]) ,"{}<={}<={}".format(tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h]) )[["te"]]
# D12 = dea8(formula, ex4, "crs", "oo","{}=={}".format(t,tlt.iloc[tindex]) ,"{}<={}<={}".format(tlt.iloc[0 if tindex-1-h<0 else tindex-1-h],t,tlt.iloc[tlt.index.max() if tindex-1+h>tlt.index.max() else tindex-1+h]) )[["te"]] 
# D21 = dea8(formula, ex4, "crs", "oo","{}=={}".format(t,tlt.iloc[tindex-1]) ,"{}<={}<={}".format(tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h])  ) [["te"]]

In [14]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def dea8(formula, dataframe, rts, orient, evaquery=None, refquery=None):
    """
    formula: 产出变量 ~ 投入变量，如“ Y    ~ K     L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    rts: 传入表示可变规模报酬或不变规模报酬的字符串，"crs","vrs" 二选一
    orient: 传入表示产出方向或投入方向的字符串，"oo","io" 二选一
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetalt = {}          # 定义thetalt 用于存储计算结果
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0].strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars).split(' ')    
    data = dataframe.loc[indexlt,xcol+ycol]
    dataref = dataframe.loc[indexltref,xcol+ycol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(within=Reals,bounds=(None, None), doc='efficiency') # 定义决策变量theta
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables') # 定义决策变量lamda
        def obj_rule(model):
            return model.theta 
            #return model.theta[0]*1  + sum(model.lamda[i] *0 for i in model.I)
        model.obj = Objective(rule = obj_rule, sense= (maximize if orient == "oo" else minimize),doc='objective function') # 定义目标函数
        def input_rule(model,k):
            """Return the proper input constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= x[k]
            elif orient == "io":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= model.theta*x[k]
        def output_rule(model,l):
            """Return the proper output constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=model.theta*y[l]
            elif orient == "io":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=y[l]
        def vrs_rule(model):
            """Return the various return to scale constraint"""
            return sum(model.lamda[i] for i in model.I) == 1
        model.input = Constraint(model.K, rule=input_rule, doc='input constraint') # 定义与投入变量有关的约束条件
        model.output =Constraint(model.L, rule=output_rule, doc='output constraint') # 定义与产出变量有关的约束条件
        if rts == "vrs":
            model.vrs = Constraint(rule=vrs_rule, doc='various return to scale rule') # 定义与可变规模报酬有关的约束条件
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        theta = np.asarray(list(model.theta[:].value)) # 提取决策变量theta
        lamda = np.asarray(list(model.lamda[:].value)) # 提取决策变量lamda
        obj = value(model.obj) # 提取目标函数 
        #print("optimum theta: \n {} ".format(theta))
        #print("optimum lamda: \n {} ".format(lamda))
        #print("optimal objective: {}".format(obj))
        thetalt[j]=theta  
        thetadf =pd.DataFrame(thetalt,index=["theta"]).T
        thetadf["te"] = (1/thetadf["theta"] if orient == "oo" else  thetadf["theta"] )
    return thetadf
def mpi2(formula, data, id, t, tech=None ):
    """
    formula: 投入变量~产出变量，如“ Y~K     L  ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    id: 个体变量
    t: 时间变量
    tecn: 从当期生产技术（None/"com"）、时序生产技术（"seq"）、视窗生产技术中选择（"window 4"）"window"后面添加视窗的大小。
    """
    tlt = pd.Series(data[t]).drop_duplicates().sort_values() 
    """D^{t}(X_{t},Y_{t})""" 
    D11df = pd.DataFrame()
    for tindex in tlt.index:
        evaquery11="{}=={}".format(t,tlt.iloc[tindex])
        if (type(tech)==type(None)) or (tech=="com"):
            refquery11="{}=={}".format(t,tlt.iloc[tindex])
        elif tech=="seq":
            refquery11="{}<={}".format(t,tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery11="{}<={}<={}".format(
                    tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h])
        D11 = dea8(formula,data, "crs", "oo",evaquery11 ,refquery11 )[["te"]]
        D11df = D11df.append(D11 )
    D11df.rename(columns = {"te":"D11"},inplace=True)
    """D^{t}(X_{t+1},Y_{t+1})""" 
    D12df = pd.DataFrame()
    for tindex in tlt.index[1:]:
        evaquery12="{}=={}".format(t,tlt.iloc[tindex])
        if (type(tech)==type(None)) or tech=="com":
            refquery12="{}=={}".format(t,tlt.iloc[tindex-1])
        elif tech=="seq":
            refquery12="{}<={}".format(t,tlt.iloc[tindex-1])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery12="{}<={}<={}".format(
                tlt.iloc[0 if tindex-1-h<0 else tindex-1-h],t,tlt.iloc[tlt.index.max() if tindex-1+h>tlt.index.max() else tindex-1+h])
        D12 = dea8(formula,ex4, "crs", "oo",evaquery12 ,refquery12 )[["te"]] 
        D12df = D12df.append(D12 )
    D12df.rename(columns = {"te":"D12"},inplace=True)
    """D^{t+1}(X_{t},Y_{t})""" 
    D21df = pd.DataFrame()
    for tindex in tlt.index[1:]:
        evaquery21="{}=={}".format(t,tlt.iloc[tindex-1])
        if (type(tech)==type(None)) or tech=="com":
            refquery21="{}=={}".format(t,tlt.iloc[tindex])
        elif tech=="seq":
            refquery21="{}<={}".format(t,tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery21="{}<={}<={}".format(
                    tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h])
        D21 = dea8(formula,ex4, "crs", "oo",evaquery21 ,refquery21  ) [["te"]]
        D21df = D21df.append(D21)
    D21df.rename(columns = {"te":"D21"},inplace=True)
    df = pd.concat([D11df,D12df],axis=1)
    df = pd.concat([df,D21df],axis=1)
    data2 = pd.merge(data,df,left_index=True,right_index=True,how="left")
    data2["mpi"] = data2["D12"] / data2["D11"].shift(1) * data2["D11"]/ data2["D21"].shift(1)
    data2.drop(columns = ["D11","D12","D21"],inplace =True)
    return data2

In [15]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
DG_list = [] 
for t in range(1, 4):
    DG = dea8(
        "Y~K L ", 
        ex4, 
        "crs", 
        "oo",
        evaquery="t=={}".format(t), 
        refquery=None               
    )[["te"]]
    DG_list.append(DG)
DGdf = pd.concat(DG_list)  
ex42 = pd.concat([ex4,DGdf],axis=1)
ex42["mpi"] = ex42["te"] / ex42["te"].shift(1)   
ex42.drop(columns = ["te"],inplace = True)
ex42

Unnamed: 0,id,t,K,L,E,Y,CO2,mpi
0,Anhui,1,46184.375000,4275.90,116.963884,14649.664062,368985280.0,
1,Anhui,2,56383.246094,4311.00,120.110220,15997.461914,374358944.0,0.894475
2,Anhui,3,67765.281250,4342.10,123.319663,17391.113281,373214144.0,0.904522
3,Beijing,1,24637.605469,1141.00,67.238950,9249.392578,84418936.0,1.462832
4,Beijing,2,26473.578125,1156.70,68.312260,9924.598633,81342600.0,0.998586
...,...,...,...,...,...,...,...,...
85,Zhejiang,2,69164.578125,3714.14,188.264229,27670.107422,394616160.0,0.933498
86,Zhejiang,3,79787.046875,3733.65,196.104725,29872.316406,399887424.0,0.935857
87,Chongqing,1,29107.794922,1683.51,80.493045,8549.800781,140163104.0,0.784531
88,Chongqing,2,34307.117188,1696.94,85.927340,9481.729492,143705328.0,0.940929


In [16]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def dea8(formula, dataframe, rts, orient, evaquery=None, refquery=None):
    """
    formula: 产出变量 ~ 投入变量，如“ Y    ~ K     L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    rts: 传入表示可变规模报酬或不变规模报酬的字符串，"crs","vrs" 二选一
    orient: 传入表示产出方向或投入方向的字符串，"oo","io" 二选一
    evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"
    """
    thetalt = {}          # 定义thetalt 用于存储计算结果
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0].strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars).split(' ')    
    data = dataframe.loc[indexlt,xcol+ycol]
    dataref = dataframe.loc[indexltref,xcol+ycol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index) # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(within=Reals,bounds=(None, None), doc='efficiency') # 定义决策变量theta
        model.lamda = Var(model.I, bounds=(0.0, None), doc='intensity variables') # 定义决策变量lamda
        def obj_rule(model):
            return model.theta 
            #return model.theta[0]*1  + sum(model.lamda[i] *0 for i in model.I)
        model.obj = Objective(rule = obj_rule, sense= (maximize if orient == "oo" else minimize),doc='objective function') # 定义目标函数
        def input_rule(model,k):
            """Return the proper input constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= x[k]
            elif orient == "io":
                return sum(model.lamda[i]*xref.loc[i,xcol[k]] for i in model.I) <= model.theta*x[k]
        def output_rule(model,l):
            """Return the proper output constraint"""
            if orient == "oo":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=model.theta*y[l]
            elif orient == "io":
                return sum(model.lamda[i]*yref.loc[i,ycol[l]] for i in model.I) >=y[l]
        def vrs_rule(model):
            """Return the various return to scale constraint"""
            return sum(model.lamda[i] for i in model.I) == 1
        model.input = Constraint(model.K, rule=input_rule, doc='input constraint') # 定义与投入变量有关的约束条件
        model.output =Constraint(model.L, rule=output_rule, doc='output constraint') # 定义与产出变量有关的约束条件
        if rts == "vrs":
            model.vrs = Constraint(rule=vrs_rule, doc='various return to scale rule') # 定义与可变规模报酬有关的约束条件
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        theta = np.asarray(list(model.theta[:].value)) # 提取决策变量theta
        lamda = np.asarray(list(model.lamda[:].value)) # 提取决策变量lamda
        obj = value(model.obj) # 提取目标函数 
        #print("optimum theta: \n {} ".format(theta))
        #print("optimum lamda: \n {} ".format(lamda))
        #print("optimal objective: {}".format(obj))
        thetalt[j]=theta  
        thetadf =pd.DataFrame(thetalt,index=["theta"]).T
        thetadf["te"] = (1/thetadf["theta"] if orient == "oo" else  thetadf["theta"] )
    return thetadf
def mpi3( formula, data, id, t, tech=None ):
    """
    formula: 投入变量~产出变量，如“ Y~K     L  ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    id: 个体变量
    t: 时间变量
    tech: 从当期生产技术（None/"com"）、时序生产技术（"seq"）、视窗生产技术中选择（"window 4"）"window"后面添加视窗的大小和全局生产技术（"global"）。    """
    tlt = pd.Series(data[t]).drop_duplicates().sort_values() 
    """D^{t}(X_{t},Y_{t})"""
    D11_list = []
    for tindex in tlt.index:
        evaquery11 = "{}=={}".format(t, tlt.iloc[tindex])
        if (tech is None) or (tech == "com"):
            refquery11 = "{}=={}".format(t, tlt.iloc[tindex])
        elif tech == "seq":
            refquery11 = "{}<={}".format(t, tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery11 = "{}<={}<={}".format(
                tlt.iloc[0 if tindex-h < 0 else tindex-h], t,
                tlt.iloc[tlt.index.max() if tindex+h > tlt.index.max() else tindex+h]
            )
        elif tech == "global":
            refquery11 = None
        D11 = dea8(formula, data, "crs", "oo", evaquery11, refquery11)[["te"]]
        D11_list.append(D11)
    D11df = pd.concat(D11_list)
    D11df.rename(columns={"te": "D11"}, inplace=True)

    """D^{t}(X_{t+1},Y_{t+1})"""
    D12_list = []
    for tindex in tlt.index[1:]:
        evaquery12 = "{}=={}".format(t, tlt.iloc[tindex])
        if (tech is None) or (tech == "com"):
            refquery12 = "{}=={}".format(t, tlt.iloc[tindex-1])
        elif tech == "seq":
            refquery12 = "{}<={}".format(t, tlt.iloc[tindex-1])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery12 = "{}<={}<={}".format(
                tlt.iloc[0 if tindex-1-h < 0 else tindex-1-h], t,
                tlt.iloc[tlt.index.max() if tindex-1+h > tlt.index.max() else tindex-1+h]
            )
        elif tech == "global":
            refquery12 = None
        D12 = dea8(formula, data, "crs", "oo", evaquery12, refquery12)[["te"]]  # 关键修复：ex4 -> data
        D12_list.append(D12)
    D12df = pd.concat(D12_list)
    D12df.rename(columns={"te": "D12"}, inplace=True)

    """D^{t+1}(X_{t},Y_{t})"""
    D21_list = []
    for tindex in tlt.index[1:]:
        evaquery21 = "{}=={}".format(t, tlt.iloc[tindex-1])
        if (tech is None) or (tech == "com"):
            refquery21 = "{}=={}".format(t, tlt.iloc[tindex])
        elif tech == "seq":
            refquery21 = "{}<={}".format(t, tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery21 = "{}<={}<={}".format(
                tlt.iloc[0 if tindex-h < 0 else tindex-h], t,
                tlt.iloc[tlt.index.max() if tindex+h > tlt.index.max() else tindex+h]
            )
        elif tech == "global":
            refquery21 = None
        D21 = dea8(formula, data, "crs", "oo", evaquery21, refquery21)[["te"]]  # 关键修复：ex4 -> data
        D21_list.append(D21)
    D21df = pd.concat(D21_list)
    D21df.rename(columns={"te": "D21"}, inplace=True)
    df = pd.concat([D11df,D12df],axis=1)
    df = pd.concat([df,D21df],axis=1)
    data2 = pd.merge(data,df,left_index=True,right_index=True,how="left")
    data2["mpi"] = data2["D12"] / data2["D11"].shift(1) * data2["D11"]/ data2["D21"].shift(1)
    data2.drop(columns = ["D11","D12","D21"],inplace =True)
    return data2

In [17]:
import pandas as pd
ex4 = pd.read_stata(r"../../data/Ex4.dta")
data2=mpi3( "Y~K L ", ex4, "id", "t",  tech="com")
data3=mpi3( "Y~K L ",ex4, "id", "t",  tech="seq")
data4=mpi3( "Y~K L ", ex4, "id", "t", tech="window 2")
data5=mpi3( "Y~K L ",ex4, "id", "t",   tech="global")

In [18]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def ddf(formula, dataframe, gx=None , gy=None , gb=None , evaquery=None, refquery=None ):
    """ddf: Directional distance function
    	formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
        dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
        gx (list, optional): 投入方向向量. 默认为 [-1].
        gy (list, optional): 合意产出方向向量. 默认为 [1].
        gb (list, optional): 非合意产出方向向量. 默认为[-1].
        evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"。默认为全部
        refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]。默认为全部
    """
    obj = {}                # 定义obj 用于存储计算结果，是obj
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0] .split(':')[0] .strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars) .split(' ')
    unoutputvars = formula.split('~')[0] .split(':')[1] .strip(' ') 
    bcol=re.compile(' +').sub(' ',unoutputvars) .split(' ')   
    data = dataframe.loc[indexlt,xcol+ycol+bcol]
    dataref = dataframe.loc[indexltref,xcol+ycol+bcol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    bref=dataref.loc[:,bcol]
    if type(gx)==type(None):
        gx=[-1]*len(xcol)
    if type(gy)==type(None):
        gy=[1]*len(ycol)
    if type(gb)==type(None):
        gb=[-1]*len(bcol)
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        b=data.loc[j,bcol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index)    # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.M = Set(initialize = range(len(bcol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(bounds=(None, None), within=Reals,doc='directional distance')
        model.lamda = Var(model.I , bounds=(0.0, None),within=Reals, doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return model.theta *1  
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                    ) - model.theta*gx[k]*x[k] <= x[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return -sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                    ) + model.theta*gy[l] *y[l]<= -y[l]
        def undesirable_output_rule(model, m):
            """Return the proper undesirable output constraint"""
            return sum(model.lamda[i] * bref.loc[i,bcol[m]] for i in model.I
                    ) -model.theta*gb[m]*b[m]==  b[m]
        model.obj = Objective(rule=objective_rule, sense=maximize, doc='objective function')
        model.input = Constraint(model.K,  rule=input_rule, doc='input constraint')
        model.output = Constraint(model.L,  rule=output_rule, doc='output constraint')
        model.undesirable_output = Constraint(model.M, rule=undesirable_output_rule, doc='undesirable output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            obj[j]= value(model.obj) # 提取目标函数 
        objdf= pd.DataFrame(obj,index=["te"]).T
    return objdf

"""D^{t}(X_{t},Y_{t})""" 
D11_list = []
for t in range(1, 4):
    D11 = ddf("Y:CO2~K L", ex4, evaquery="t=={}".format(t), refquery="t=={}".format(t))
    D11_list.append(D11)
D11df = pd.concat(D11_list)
D11df.rename(columns={"te": "D11"}, inplace=True)

"""D^{t+1}(X_{t+1},Y_{t+1})""" 
D22_list = []
for t in range(2, 4):
    D22 = ddf("Y:CO2~K L", ex4, evaquery="t=={}".format(t), refquery="t=={}".format(t))
    D22_list.append(D22)
D22df = pd.concat(D22_list)
D22df.rename(columns={"te": "D22"}, inplace=True)

"""D^{t}(X_{t+1},Y_{t+1})""" 
D12_list = []
for t in range(2, 4):
    D12 = ddf("Y:CO2~K L", ex4, evaquery="t=={}".format(t), refquery="t=={}".format(t-1))
    D12_list.append(D12)
D12df = pd.concat(D12_list)
D12df.rename(columns={"te": "D12"}, inplace=True)

"""D^{t+1}(X_{t},Y_{t})""" 
D21_list = []
for t in range(2, 4):
    D21 = ddf("Y:CO2~K L", ex4, evaquery="t=={}".format(t-1), refquery="t=={}".format(t))
    D21_list.append(D21)
D21df = pd.concat(D21_list)
D21df.rename(columns={"te": "D21"}, inplace=True)
df = pd.concat([D11df,D22df],axis=1)
df = pd.concat([df,D12df],axis=1)
df = pd.concat([df,D21df],axis=1)
ex42 = pd.merge(ex4,df,left_index=True,right_index=True,how="left")
ex42["mpi"] = (1+ex42["D11"].shift(1))/(1+ex42["D12"])   * (1+ ex42["D21"].shift(1))/ (1+ex42["D22"])

model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Problem is primal infeasible. The solution is a
      certificate of primal infeasibility.
model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Problem is primal infeasible. The solution is a
      certificate of primal infeasibility.
model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Problem is primal infeasible. The solution is a
      certificate of primal infeasibility.
model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Problem is primal infeasible. The solution is a
      certificate of primal infeasibility.
model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Problem is primal infeasible. The solution is a
      certificate of primal infeasibility.
model.name="unknown";
    - termination condition: infeasible
    - message from solver:  Probl

In [19]:
ex42

Unnamed: 0,id,t,K,L,E,Y,CO2,D11,D22,D12,D21,mpi
0,Anhui,1,46184.375000,4275.90,116.963884,14649.664062,368985280.0,0.291707,,,0.282604,
1,Anhui,2,56383.246094,4311.00,120.110220,15997.461914,374358944.0,0.358948,0.358948,0.311873,0.360064,0.929313
2,Anhui,3,67765.281250,4342.10,123.319663,17391.113281,373214144.0,0.373520,0.373520,0.349872,,0.996861
3,Beijing,1,24637.605469,1141.00,67.238950,9249.392578,84418936.0,0.000000,,,0.029002,
4,Beijing,2,26473.578125,1156.70,68.312260,9924.598633,81342600.0,0.000000,0.000000,-0.053738,0.042466,1.087439
...,...,...,...,...,...,...,...,...,...,...,...,...
85,Zhejiang,2,69164.578125,3714.14,188.264229,27670.107422,394616160.0,0.153998,0.153998,0.103445,0.179605,1.025294
86,Zhejiang,3,79787.046875,3733.65,196.104725,29872.316406,399887424.0,0.173158,0.173158,0.144337,,1.013984
87,Chongqing,1,29107.794922,1683.51,80.493045,8549.800781,140163104.0,0.206820,,,0.248471,
88,Chongqing,2,34307.117188,1696.94,85.927340,9481.729492,143705328.0,0.236192,0.236192,0.199996,0.268216,1.015676


In [None]:
from pyomo.environ import *
import pandas as pd ; import numpy as np ; import re
def ddf(formula, dataframe, gx=None , gy=None , gb=None , evaquery=None, refquery=None ):
    """ddf: Directional distance function
    	formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
        dataframe: 待评价决策单元的投入产出数据框，按照投入和产出排列
        gx (list, optional): 投入方向向量. 默认为 [-1].
        gy (list, optional): 合意产出方向向量. 默认为 [1].
        gb (list, optional): 非合意产出方向向量. 默认为[-1].
        evaquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]"。默认为全部
        refquery:传入数据框.query()方法中的参数，如"dmu==1","dmu==[1,2,3]。默认为全部
    """
    obj = {}                # 定义obj 用于存储计算结果，是obj
    if type(evaquery)==type(None):
        indexlt = dataframe.index
    else:
        indexlt = dataframe.query(evaquery).index
    if type(refquery)==type(None):
        indexltref = dataframe. index
    else:
        indexltref = dataframe.query(refquery).index
    inputvars = formula.split('~')[1].strip(' ') 
    xcol = re.compile(' +').sub(' ',inputvars).split(' ')
    outputvars = formula.split('~')[0] .split(':')[0] .strip(' ') 
    ycol = re.compile(' +').sub(' ',outputvars) .split(' ')
    unoutputvars = formula.split('~')[0] .split(':')[1] .strip(' ') 
    bcol=re.compile(' +').sub(' ',unoutputvars) .split(' ')   
    data = dataframe.loc[indexlt,xcol+ycol+bcol]
    dataref = dataframe.loc[indexltref,xcol+ycol+bcol]
    xref=dataref.loc[:,xcol]
    yref=dataref.loc[:,ycol]
    bref=dataref.loc[:,bcol]
    if type(gx)==type(None):
        gx=[-1]*len(xcol)
    if type(gy)==type(None):
        gy=[1]*len(ycol)
    if type(gb)==type(None):
        gb=[-1]*len(bcol)
    for j in data.index:              # 在data的索引上循环
        x=data.loc[j,xcol]
        y=data.loc[j,ycol]
        b=data.loc[j,bcol]
        model = ConcreteModel()
        model.I = Set(initialize = dataref.index)    # 采用列表初始化技术参照决策单元个数的集合
        model.K = Set(initialize = range(len(xcol))) # 采用列表初始化投入变量个数的集合
        model.L = Set(initialize = range(len(ycol))) # 采用列表初始化产出变量个数的集合
        model.M = Set(initialize = range(len(bcol))) # 采用列表初始化产出变量个数的集合
        model.theta = Var(bounds=(None, None), within=Reals,doc='directional distance')
        model.lamda = Var(model.I , bounds=(0.0, None),within=Reals, doc='intensity variables')
        def objective_rule(model):
            """Return the proper objective function"""
            return model.theta *1  
        def input_rule(model, k):
            """Return the proper input constraint"""
            return sum(model.lamda[i] * xref.loc[i,xcol[k]] for i in model.I
                    ) - model.theta*gx[k]*x[k] <= x[k]
        def output_rule(model, l):
            """Return the proper output constraint"""
            return -sum(model.lamda[i] * yref.loc[i,ycol[l]] for i in model.I
                    ) + model.theta*gy[l] *y[l]<= -y[l]
        def undesirable_output_rule(model, m):
            """Return the proper undesirable output constraint"""
            return sum(model.lamda[i] * bref.loc[i,bcol[m]] for i in model.I
                    ) -model.theta*gb[m]*b[m]==  b[m]
        model.obj = Objective(rule=objective_rule, sense=maximize, doc='objective function')
        model.input = Constraint(model.K,  rule=input_rule, doc='input constraint')
        model.output = Constraint(model.L,  rule=output_rule, doc='output constraint')
        model.undesirable_output = Constraint(model.M, rule=undesirable_output_rule, doc='undesirable output constraint')
        opt = SolverFactory('mosek') # 指定 mosek 作为求解器
        solution = opt.solve(model) # 调用求解器求解
        if solution.solver.termination_condition == "optimal":     # 终止条件 一般包括三种 optimal, feasible, infeasible
            obj[j]= value(model.obj) # 提取目标函数 
    objdf= pd.DataFrame(obj,index=["te"]).T
    return objdf


def mlpi( formula, data, id, t, tech=None ):
    """
    formula: 产出变量:非期望产出变量~投入变量，如“ Y :CO2  ~ K     L ”
    data: 待评价决策单元的投入产出数据框，按照投入和产出排列
    id: 个体变量
    t: 时间变量
    tech: 从当期生产技术（None/"com"）、时序生产技术（"seq"）、视窗生产技术中选择（"window 4"）"window"后面添加视窗的大小和全局生产技术（"global"）。    """
    tlt = pd.Series(data[t]).drop_duplicates().sort_values() 
    """D^{t}(X_{t},Y_{t})""" 
    D11df = pd.DataFrame()
    for tindex in tlt.index:
        evaquery11="{}=={}".format(t,tlt.iloc[tindex])
        if (type(tech)==type(None)) or (tech=="com"):
            refquery11="{}=={}".format(t,tlt.iloc[tindex])
        elif tech=="seq":
            refquery11="{}<={}".format(t,tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery11="{}<={}<={}".format(
                    tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h])
        elif tech =="global":
            refquery11=None
        D11 = ddf(formula, data, evaquery=evaquery11 ,refquery=refquery11 )[["te"]]
        D11df = D11df.append(D11 )
    D11df.rename(columns = {"te":"D11"},inplace=True)
    """D^{t}(X_{t+1},Y_{t+1})""" 
    D12df = pd.DataFrame()
    for tindex in tlt.index[1:]:
        evaquery12="{}=={}".format(t,tlt.iloc[tindex])
        if (type(tech)==type(None)) or tech=="com":
            refquery12="{}=={}".format(t,tlt.iloc[tindex-1])
        elif tech=="seq":
            refquery12="{}<={}".format(t,tlt.iloc[tindex-1])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery12="{}<={}<={}".format(
                tlt.iloc[0 if tindex-1-h<0 else tindex-1-h],t,tlt.iloc[tlt.index.max() if tindex-1+h>tlt.index.max() else tindex-1+h])
        elif tech =="global":
            break
        D12 = ddf(formula, data, evaquery=evaquery12 ,refquery=refquery12 )[["te"]] 
        D12df = D12df.append(D12 )
    try:
        D12df.rename(columns = {"te":"D12"},inplace=True)
    except:
        pass
    """D^{t+1}(X_{t},Y_{t})""" 
    D21df = pd.DataFrame()
    for tindex in tlt.index[1:]:
        evaquery21="{}=={}".format(t,tlt.iloc[tindex-1])
        if (type(tech)==type(None)) or tech=="com":
            refquery21="{}=={}".format(t,tlt.iloc[tindex])
        elif tech=="seq":
            refquery21="{}<={}".format(t,tlt.iloc[tindex])
        elif "window " in tech:
            h = int(tech.split(" ")[1])
            refquery21="{}<={}<={}".format(
                    tlt.iloc[0 if tindex-h<0 else tindex-h],t,tlt.iloc[tlt.index.max() if tindex+h>tlt.index.max() else tindex+h])
        elif tech =="global":
            break
        D21 = ddf(formula, data, evaquery= evaquery21 ,refquery=refquery21  ) [["te"]]
        D21df = D21df.append(D21)
    try:
        D21df.rename(columns = {"te":"D21"},inplace=True)
    except:
        pass
    if tech !="global":
        df = pd.concat([D11df,D12df],axis=1)
        df = pd.concat([df,D21df],axis=1)
        data2 = pd.merge(data,df,left_index=True,right_index=True,how="left")
        data2["mlpi"] = (1+data2["D11"].shift(1))/(1+data2["D12"])   * (1+ data2["D21"].shift(1))/ (1+data2["D11"])
        data2.drop(columns = ["D11","D12","D21"],inplace =True)
    else:
        data2 = pd.merge(data,D11df,left_index=True,right_index=True,how="left")
        data2["mlpi"] = (1+data2["D11"].shift(1) )/(1+ data2["D11"]   ) 
    return data2