In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

# Read tables
Read caselist from file "study.caselist" and the combination of results and parameters from file "collect.csv".

In [2]:
caselist=pd.read_csv('C:/Users/olawid/PycharmProjects/psm/doc/demo/study_box_fullfact/study.caselist', 
                     index_col=0, delim_whitespace=True)
collect=pd.read_csv('C:/Users/olawid/PycharmProjects/psm/doc/demo/study_box_fullfact/collect.csv', 
                    sep=';', index_col=0)

In [3]:
caselist[:5]

Unnamed: 0_level_0,density,height,width,length
CASENAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1049.95,1.6998,4.999,13.998
2,1049.95,1.6998,4.999,12.0
3,1049.95,1.6998,4.999,10.002
4,1049.95,1.6998,4.0,13.998
5,1049.95,1.6998,4.0,12.0


In [4]:
collect[:5]

Unnamed: 0_level_0,base_area,mass,volume,density,height,width,length
CASENAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,69.976002,124886.521349,118.945208,1049.95,1.6998,4.999,13.998
2,59.988,107060.88414,101.967602,1049.95,1.6998,4.999,12.0
3,49.999998,89235.246931,84.989997,1049.95,1.6998,4.999,10.002
4,55.992,99929.20292,95.175202,1049.95,1.6998,4.0,13.998
5,48.0,85665.84048,81.5904,1049.95,1.6998,4.0,12.0


A table of results can be created from the collect data, by excluding from it the parameter columns found in the caselist.

In [5]:
results=collect[[c for c in collect.columns if c not in caselist.columns]]

In [6]:
results[:5]

Unnamed: 0_level_0,base_area,mass,volume
CASENAME,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,69.976002,124886.521349,118.945208
2,59.988,107060.88414,101.967602
3,49.999998,89235.246931,84.989997
4,55.992,99929.20292,95.175202
5,48.0,85665.84048,81.5904


# Analysis of variance
We can compute some basic statistics for both parameters and results.

## Parameters

In [7]:
caselist.describe().T[['mean','std','max','min']]

Unnamed: 0,mean,std,max,min
density,1000.0,50.658546,1049.95,950.05
height,1.5,0.202634,1.6998,1.3002
width,4.0,0.827251,4.999,3.001
length,12.0,1.654501,13.998,10.002


## Results

In [8]:
results.describe().T[['mean','std','max','min']]

Unnamed: 0,mean,std,max,min
base_area,48.0,12.006862,69.976002,30.016002
mass,72000.0,20954.538494,124886.521349,37077.416851
volume,72.0,20.608963,118.945208,39.026806


# Analysis
## Normalization of data

In [9]:
spar=(caselist-caselist.mean())/caselist.std()
sres=(results-results.mean())/results.std()

## Ordinary Linear Regression
We select one of the output variables by index.

In [10]:
iout = 0
y=sres[sres.columns[iout]]

Regression...

In [11]:
mod = sm.OLS(y, sm.add_constant(spar))
reg = mod.fit()
reg.summary()

  return ptp(axis=axis, out=out, **kwargs)


0,1,2,3
Dep. Variable:,base_area,R-squared:,0.987
Model:,OLS,Adj. R-squared:,0.986
Method:,Least Squares,F-statistic:,605.7
Date:,"Fri, 05 Jul 2019",Prob (F-statistic):,6.11e-29
Time:,22:26:01,Log-Likelihood:,28.111
No. Observations:,36,AIC:,-46.22
Df Residuals:,31,BIC:,-38.3
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-6.724e-16,0.020,-3.38e-14,1.000,-0.041,0.041
density,0,0.020,0,1.000,-0.041,0.041
height,1.076e-16,0.020,5.33e-15,1.000,-0.041,0.041
width,0.8268,0.020,40.956,0.000,0.786,0.868
length,0.5512,0.020,27.304,0.000,0.510,0.592

0,1,2,3
Omnibus:,1.015,Durbin-Watson:,1.5
Prob(Omnibus):,0.602,Jarque-Bera (JB):,0.844
Skew:,-0.0,Prob(JB):,0.656
Kurtosis:,2.25,Cond. No.,1.01


Re-scale coefficents and standard error to original quantities:

In [12]:
coeff=reg.params*results.std()[iout]/caselist.std()
se=reg.bse*results.std()[iout]/caselist.std()

Compute main effects, in terms of original model parameters:

In [13]:
main_effect=coeff*caselist.std()

Assemble analysis output table, with parameters sorted by significance (low P-values):

In [14]:
out=pd.DataFrame({'params':reg.params, 'bse':reg.bse, 'pvalues':reg.pvalues, 'coeff':coeff, 'se':se, 'main_effect':main_effect})
out=out.drop('const')
out=out.sort_values(by='pvalues')
out

Unnamed: 0,params,bse,pvalues,coeff,se,main_effect
width,0.8267778,0.020187,1.471107e-28,12.0,0.293001,9.927007
length,0.5511852,0.020187,3.020349e-23,4.0,0.1465,6.618005
height,1.07626e-16,0.020187,1.0,6.377256e-15,1.196171,1.29225e-15
density,0.0,0.020187,1.0,0.0,0.004785,0.0
