# Goal

1. Create a script that creates a model
    1. Given a PSO
    2. Map all FLCs for all benchmark functions
    3. To the DRoC measurements for all benchmark function
    4. And return a list of coefficients with p-values
2. Get the coefficients and p-values for term for each PSO
3. Average the coefficients and p-values across all PSOs
4. Create a table with headings for `term`, `avg coefficient`, `avg p-val`

# 1. Model script

Given a PSO name:
1. Fetch all relevant data -- all FLCs for all benchmarks, and DRoCs for all benchmarks
2. Perform multiple linear regression up the 2nd degree
3. Also compute p-avlues
4. Return mapping of [`term-name`: (`coefficient`, `p-value`)]

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from sklearn.pipeline import Pipeline
from termcolor import colored
import sys
from scipy import stats

from results import psos, benchmarks
from results.flc.ruggedness import fem_0_1 as fem0_1_results, \
    fem_0_01 as fem0_01_results
from results.flc.neutrality import pn as pn_results, \
    lsn as lsn_results
from results.flc.gradients import g_avg as g_avg_results, \
    g_dev as g_dev_results
from results.flc.funnels import dm as dm_results
from results.flc.deception import fdc as fdc_results
from results.flc.searchability import fci_soc as fci_soc_results, \
    fci_cog as fci_cog_results, \
    fci_sigma as fci_sigma_results
from results import drocs as droc_results
from util.chunks import chunks

_pso_names = psos.all_names
_swarm_size = 25
_num_iterations = 2000
_benchmark_names = benchmarks.all_names
_benchmark_names = sorted(_benchmark_names)
_num_experiments = 30

_flcs = {
    'fem0_1': fem0_1_results,
    'fem0_01': fem0_01_results,
    'pn': pn_results,
    'lsn': lsn_results,
    'g_avg': g_avg_results,
    'g_dev': g_dev_results,
    'dm': dm_results,
    'fdc': fdc_results,
    'fci_cog': fci_cog_results,
    'fci_soc': fci_soc_results,
    'fci_sigma': fci_sigma_results
}
_flc_names = _flcs.keys()

def _get_D(benchmark_name):
    """Return the dimensionality to use for the given benchmark name"""
    D = 5
    benchmark = benchmarks.get(benchmark_name)
    if not benchmark.is_dimensionality_valid(D):
        D = 2
    return D

def _get_droc_df(pso_name, verbose=False):
    """Return a dataframe of DRoC data"""
    if verbose:
        print 'Getting', 'DRoC values for', colored(pso_name, 'magenta')
    droc_dict = {}
    pso_dict = {}
    for benchmark_name in _benchmark_names:
        D = _get_D(benchmark_name)
        droc_vals = []
        for e in range(_num_experiments):
            droc = droc_results.get(
                pso_name, _swarm_size, benchmark_name, D, _num_iterations, e)
            droc_vals.append(droc)
        pso_dict[benchmark_name] = droc_vals
    droc_dict[pso_name] = pso_dict
    droc_df = pd.DataFrame(droc_dict)
    return droc_df

def _get_flcs(flc_name, benchmark_name, verbose=False):
    """Return all FLC values for the given FLC on the given benchmark"""
    if verbose:
        print 'Getting', colored(flc_name, 'green'), 'values for', colored(benchmark_name, 'cyan')
    # Get the measure function for the given name
    flc_measure = _flcs[flc_name]
    D = _get_D(benchmark_name)
    
    # Get the results for each experiment
    results = []
    for e in range(_num_experiments):
        result = flc_measure.get(benchmark_name, D, e)
        results.append(result)
    
    return results

def _get_data(pso_name, verbose=False):
    """Return a dataframe containing FLC and DRoC measurements for benchmarks.
    The DF has a column for each FLC, as well as for DRoC.
    The DF has 30 rows for each benchmark (1 for each experiment).
    """
    if verbose:
        print 'Getting data for', colored(pso_name, 'magenta')
    droc_df = _get_droc_df(pso_name, verbose=verbose)
    X_raw = {}
    
    # Prep the FLCs:
    flc_measures = {}
    for flc_name in _flc_names:
        flcs = {}
        for benchmark_name in _benchmark_names:
            flcs[benchmark_name] = _get_flcs(flc_name, benchmark_name, verbose=verbose)
        flc_measures[flc_name] = flcs
    

    for (x, benchmark_name) in enumerate(_benchmark_names):
        for e in range(30):
            # The results for this benchmark function for this experiment (sample num):
            X = pd.Series()

            # Get the mean DRoC value for this benchmark/experiment
            droc = droc_df[pso_name][benchmark_name][e]
            X = X.append(pd.Series({'DRoC': droc}))

            # Each FLC
            for flc_name in _flc_names:
                flc_measure = flc_measures[flc_name][benchmark_name][e]
                X = X.append(pd.Series({flc_name: flc_measure}))

            # Save the series for this benchmark function to the results.
            row_id = benchmark_name + '_' + '{}'.format(e)
            X_raw[row_id] = X
    
    data = pd.DataFrame(X_raw)
    data = data.transpose()
    return data


_input_names = [
    'dm',
    'lsn',
    'fdc',
    'fci_sigma',
    'fem0_01',
    'fem0_1',
    'g_avg',
    'fci_soc',
    'g_dev',
    'pn',
    'fci_cog'
]
_output_name = 'DRoC'

def _adjusted_r2_score(y, y_pred):
    """Return the adjusted R^2 score of the given predicted y against the given y"""
    SS_Residual = sum((y-y_pred)**2)
    SS_Total = sum((y-np.mean(y))**2)
    r_squared = 1 - (float(SS_Residual))/SS_Total
    adjusted_r_squared = 1 - (1-r_squared)*(len(y)-1)/(len(y)-X.shape[1]-1)
    return adjusted_r_squared

def _get_model(data, degree=2, interaction_only=True, verbose=False):
    """Return a multiple regression model for the given data"""

    X = data[_input_names]
    y = data[_output_name]
    
    pf = PolynomialFeatures(degree=degree, interaction_only=interaction_only)
    lm = linear_model.LinearRegression()
    pipeline = Pipeline([
        ('pf', pf),
        ('lm', lm)
    ])
    
    print 'Fitting'
    pipeline.fit(X, y)
    
    lm = pipeline.named_steps['lm']
    pf = pipeline.named_steps['pf']
    
    feature_names = pf.get_feature_names()
    coeffs = lm.coef_
    return zip(feature_names, coeffs)
    
def model(pso_name, degree=2, interaction_only=True, verbose=False):
    """Return a multiple regression model from FLCs to DRoCs for the given PSO."""
    data = _get_data(pso_name, verbose=verbose)
    return _get_model(data, degree=degree, interaction_only=interaction_only, verbose=verbose)


getting db connection...
getting db cursor...
db ready.


In [2]:
# The easy way; fetches data, which might take a while.
#m = model('gbest_pso')

In [3]:
# The (slightly) more verbose way; first fetch data, but only once :)
data = _get_data('gbest_pso')

In [4]:
# Then compute the model
m = _get_model(data, degree=1)
print m

Fitting
[('1', 0.0), ('x0', -10.386855033436722), ('x1', -70.705092637765063), ('x2', -4.9017856325741995), ('x3', 147.06131578673535), ('x4', 3.6210131741001947), ('x5', 4.4457468807937097), ('x6', -1.5942695273964524), ('x7', -3.1539965476590388), ('x8', 0.85458075746527529), ('x9', 732.81262552748512), ('x10', 11.259349769863018)]


Let's try `statsmodels`. For a laugh.

In [5]:
import statsmodels.api as sm

X = data[_input_names]
y = data[_output_name]

X = sm.add_constant(X)
est = sm.OLS(y, X).fit()
est.summary()

0,1,2,3
Dep. Variable:,DRoC,R-squared:,0.126
Model:,OLS,Adj. R-squared:,0.112
Method:,Least Squares,F-statistic:,9.269
Date:,"Sun, 02 Sep 2018",Prob (F-statistic):,1.18e-15
Time:,20:38:15,Log-Likelihood:,-2293.3
No. Observations:,720,AIC:,4611.0
Df Residuals:,708,BIC:,4666.0
Df Model:,11,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-14.0848,6.458,-2.181,0.030,-26.763,-1.406
dm,-10.3869,7.269,-1.429,0.153,-24.659,3.885
lsn,-70.7051,155.853,-0.454,0.650,-376.695,235.285
fdc,-4.9018,2.425,-2.021,0.044,-9.663,-0.140
fci_sigma,147.0613,29.833,4.930,0.000,88.490,205.633
fem0_01,3.6210,2.317,1.563,0.119,-0.928,8.170
fem0_1,4.4457,3.478,1.278,0.202,-2.383,11.274
g_avg,-1.5943,0.318,-5.019,0.000,-2.218,-0.971
fci_soc,-3.1540,3.236,-0.975,0.330,-9.507,3.199

0,1,2,3
Omnibus:,638.036,Durbin-Watson:,0.239
Prob(Omnibus):,0.0,Jarque-Bera (JB):,12516.503
Skew:,-4.151,Prob(JB):,0.0
Kurtosis:,21.663,Cond. No.,12700.0


1. This is quite nice, actually, except that it doesn't give interaction coefficients. But that might be easy to get.
2. What about them warnings?

In [6]:
import statsmodels.formula.api as smf

def _get_formula(input_names, output_name):
    formula = ''
    formula += output_name
    formula += ' ~ '
    _is_first_input = True
    for i in range(len(input_names)-1):
        for j in range(i, len(input_names)):
            if _is_first_input:
                _is_first_input = False
            else:
                formula += ' + '
            formula += input_names[i]
            formula += '*'
            formula += input_names[j]
    return formula
    
formula = _get_formula(_input_names, _output_name)
    
est = smf.ols(formula=formula, data=data).fit()

def _get_model2(data, verbose=False):
    """Return a model using statsmodels.ols.
    The degree is limited to 1, with interaction coefficients included.
    """
    # Create a "formula" that maps input names, with interaction, to the output name.
    formula = _get_formula(_input_names, _output_name)
    
    if verbose:
        print 'Fitting'
    est = smf.ols(data=data, formula=formula).fit()
    
    # Structure the return value as
    # [coeff-name: (coeff, p)]
    model = {}
    for coeff_name in est.params.keys():
        coeff_results = {}
        coeff_results['coeff'] = est.params[coeff_name]
        coeff_results['p'] = est.pvalues[coeff_name]
        coeff_results = pd.Series(coeff_results)
        model[coeff_name] = coeff_results
    model = pd.DataFrame(model)
    return model.T

def model2(pso_name, verbose=False):
    """Return a multiple regression model from FLCs to DRoCs for the given PSO."""
    data = _get_data(pso_name, verbose=verbose)
    return _get_model2(data, verbose=verbose)

model2('gbest_pso', verbose=True)

Getting data for [35mgbest_pso[0m
Getting DRoC values for [35mgbest_pso[0m
Getting [32mdm[0m values for [36mackley[0m
Getting [32mdm[0m values for [36malpine[0m
Getting [32mdm[0m values for [36mbeale[0m
Getting [32mdm[0m values for [36mbohachevsky1_generalized[0m
Getting [32mdm[0m values for [36meggholder_generalized[0m
Getting [32mdm[0m values for [36mgoldstein_price[0m
Getting [32mdm[0m values for [36mgriewank[0m
Getting [32mdm[0m values for [36mlevy13_generalized[0m
Getting [32mdm[0m values for [36mmichalewicz[0m
Getting [32mdm[0m values for [36mpathological[0m
Getting [32mdm[0m values for [36mquadric[0m
Getting [32mdm[0m values for [36mquartic[0m
Getting [32mdm[0m values for [36mrana[0m
Getting [32mdm[0m values for [36mrastrigin[0m
Getting [32mdm[0m values for [36mrosenbrock[0m
Getting [32mdm[0m values for [36msalomon[0m
Getting [32mdm[0m values for [36mschwefel_2_22[0m
Getting [32mdm[0m values for [36mschwe

Getting [32mfci_soc[0m values for [36mlevy13_generalized[0m
Getting [32mfci_soc[0m values for [36mmichalewicz[0m
Getting [32mfci_soc[0m values for [36mpathological[0m
Getting [32mfci_soc[0m values for [36mquadric[0m
Getting [32mfci_soc[0m values for [36mquartic[0m
Getting [32mfci_soc[0m values for [36mrana[0m
Getting [32mfci_soc[0m values for [36mrastrigin[0m
Getting [32mfci_soc[0m values for [36mrosenbrock[0m
Getting [32mfci_soc[0m values for [36msalomon[0m
Getting [32mfci_soc[0m values for [36mschwefel_2_22[0m
Getting [32mfci_soc[0m values for [36mschwefel_2_26[0m
Getting [32mfci_soc[0m values for [36msix_hump_camel_back[0m
Getting [32mfci_soc[0m values for [36mskew_rastrigin[0m
Getting [32mfci_soc[0m values for [36mspherical[0m
Getting [32mfci_soc[0m values for [36mstep[0m
Getting [32mfci_soc[0m values for [36mweierstrass[0m
Getting [32mfci_soc[0m values for [36mzakharov[0m
Getting [32mg_dev[0m values for [36mack

Unnamed: 0,coeff,p
Intercept,57.763058,4.727219e-01
dm,403.428882,5.742620e-02
dm:fci_cog,-195.547805,3.161455e-01
dm:fci_sigma,-2072.221865,5.193305e-02
dm:fci_soc,-64.234273,5.230730e-01
dm:fdc,-140.507555,6.220586e-05
dm:fem0_01,12.354587,8.604647e-01
dm:fem0_1,-98.187501,3.951796e-01
dm:g_avg,-17.643779,9.780708e-02
dm:g_dev,21.205923,4.475520e-02


In [7]:
est.pvalues['dm']

0.057426198529260987

# 2. Get the coefficients and p-values for term for each PSO

First, prepare all the data. This is a once-off task.

In [8]:
all_pso_data = {}
for pso_name in _pso_names:
    pso_data = _get_data(pso_name, verbose=True)
    all_pso_data[pso_name] = pso_data

Getting data for [35mgbest_pso[0m
Getting DRoC values for [35mgbest_pso[0m
Getting [32mdm[0m values for [36mackley[0m
Getting [32mdm[0m values for [36malpine[0m
Getting [32mdm[0m values for [36mbeale[0m
Getting [32mdm[0m values for [36mbohachevsky1_generalized[0m
Getting [32mdm[0m values for [36meggholder_generalized[0m
Getting [32mdm[0m values for [36mgoldstein_price[0m
Getting [32mdm[0m values for [36mgriewank[0m
Getting [32mdm[0m values for [36mlevy13_generalized[0m
Getting [32mdm[0m values for [36mmichalewicz[0m
Getting [32mdm[0m values for [36mpathological[0m
Getting [32mdm[0m values for [36mquadric[0m
Getting [32mdm[0m values for [36mquartic[0m
Getting [32mdm[0m values for [36mrana[0m
Getting [32mdm[0m values for [36mrastrigin[0m
Getting [32mdm[0m values for [36mrosenbrock[0m
Getting [32mdm[0m values for [36msalomon[0m
Getting [32mdm[0m values for [36mschwefel_2_22[0m
Getting [32mdm[0m values for [36mschwe

Getting [32mg_avg[0m values for [36mgriewank[0m
Getting [32mg_avg[0m values for [36mlevy13_generalized[0m
Getting [32mg_avg[0m values for [36mmichalewicz[0m
Getting [32mg_avg[0m values for [36mpathological[0m
Getting [32mg_avg[0m values for [36mquadric[0m
Getting [32mg_avg[0m values for [36mquartic[0m
Getting [32mg_avg[0m values for [36mrana[0m
Getting [32mg_avg[0m values for [36mrastrigin[0m
Getting [32mg_avg[0m values for [36mrosenbrock[0m
Getting [32mg_avg[0m values for [36msalomon[0m
Getting [32mg_avg[0m values for [36mschwefel_2_22[0m
Getting [32mg_avg[0m values for [36mschwefel_2_26[0m
Getting [32mg_avg[0m values for [36msix_hump_camel_back[0m
Getting [32mg_avg[0m values for [36mskew_rastrigin[0m
Getting [32mg_avg[0m values for [36mspherical[0m
Getting [32mg_avg[0m values for [36mstep[0m
Getting [32mg_avg[0m values for [36mweierstrass[0m
Getting [32mg_avg[0m values for [36mzakharov[0m
Getting [32mfci_soc[0m

Getting [32mlsn[0m values for [36mstep[0m
Getting [32mlsn[0m values for [36mweierstrass[0m
Getting [32mlsn[0m values for [36mzakharov[0m
Getting [32mfdc[0m values for [36mackley[0m
Getting [32mfdc[0m values for [36malpine[0m
Getting [32mfdc[0m values for [36mbeale[0m
Getting [32mfdc[0m values for [36mbohachevsky1_generalized[0m
Getting [32mfdc[0m values for [36meggholder_generalized[0m
Getting [32mfdc[0m values for [36mgoldstein_price[0m
Getting [32mfdc[0m values for [36mgriewank[0m
Getting [32mfdc[0m values for [36mlevy13_generalized[0m
Getting [32mfdc[0m values for [36mmichalewicz[0m
Getting [32mfdc[0m values for [36mpathological[0m
Getting [32mfdc[0m values for [36mquadric[0m
Getting [32mfdc[0m values for [36mquartic[0m
Getting [32mfdc[0m values for [36mrana[0m
Getting [32mfdc[0m values for [36mrastrigin[0m
Getting [32mfdc[0m values for [36mrosenbrock[0m
Getting [32mfdc[0m values for [36msalomon[0m
Getting 

Getting [32mg_dev[0m values for [36mbeale[0m
Getting [32mg_dev[0m values for [36mbohachevsky1_generalized[0m
Getting [32mg_dev[0m values for [36meggholder_generalized[0m
Getting [32mg_dev[0m values for [36mgoldstein_price[0m
Getting [32mg_dev[0m values for [36mgriewank[0m
Getting [32mg_dev[0m values for [36mlevy13_generalized[0m
Getting [32mg_dev[0m values for [36mmichalewicz[0m
Getting [32mg_dev[0m values for [36mpathological[0m
Getting [32mg_dev[0m values for [36mquadric[0m
Getting [32mg_dev[0m values for [36mquartic[0m
Getting [32mg_dev[0m values for [36mrana[0m
Getting [32mg_dev[0m values for [36mrastrigin[0m
Getting [32mg_dev[0m values for [36mrosenbrock[0m
Getting [32mg_dev[0m values for [36msalomon[0m
Getting [32mg_dev[0m values for [36mschwefel_2_22[0m
Getting [32mg_dev[0m values for [36mschwefel_2_26[0m
Getting [32mg_dev[0m values for [36msix_hump_camel_back[0m
Getting [32mg_dev[0m values for [36mskew_rastr

Getting [32mfci_sigma[0m values for [36mquadric[0m
Getting [32mfci_sigma[0m values for [36mquartic[0m
Getting [32mfci_sigma[0m values for [36mrana[0m
Getting [32mfci_sigma[0m values for [36mrastrigin[0m
Getting [32mfci_sigma[0m values for [36mrosenbrock[0m
Getting [32mfci_sigma[0m values for [36msalomon[0m
Getting [32mfci_sigma[0m values for [36mschwefel_2_22[0m
Getting [32mfci_sigma[0m values for [36mschwefel_2_26[0m
Getting [32mfci_sigma[0m values for [36msix_hump_camel_back[0m
Getting [32mfci_sigma[0m values for [36mskew_rastrigin[0m
Getting [32mfci_sigma[0m values for [36mspherical[0m
Getting [32mfci_sigma[0m values for [36mstep[0m
Getting [32mfci_sigma[0m values for [36mweierstrass[0m
Getting [32mfci_sigma[0m values for [36mzakharov[0m
Getting [32mfem0_01[0m values for [36mackley[0m
Getting [32mfem0_01[0m values for [36malpine[0m
Getting [32mfem0_01[0m values for [36mbeale[0m
Getting [32mfem0_01[0m values for [

Getting [32mfci_cog[0m values for [36mstep[0m
Getting [32mfci_cog[0m values for [36mweierstrass[0m
Getting [32mfci_cog[0m values for [36mzakharov[0m
Getting data for [35mgc_lbest_pso[0m
Getting DRoC values for [35mgc_lbest_pso[0m
Getting [32mdm[0m values for [36mackley[0m
Getting [32mdm[0m values for [36malpine[0m
Getting [32mdm[0m values for [36mbeale[0m
Getting [32mdm[0m values for [36mbohachevsky1_generalized[0m
Getting [32mdm[0m values for [36meggholder_generalized[0m
Getting [32mdm[0m values for [36mgoldstein_price[0m
Getting [32mdm[0m values for [36mgriewank[0m
Getting [32mdm[0m values for [36mlevy13_generalized[0m
Getting [32mdm[0m values for [36mmichalewicz[0m
Getting [32mdm[0m values for [36mpathological[0m
Getting [32mdm[0m values for [36mquadric[0m
Getting [32mdm[0m values for [36mquartic[0m
Getting [32mdm[0m values for [36mrana[0m
Getting [32mdm[0m values for [36mrastrigin[0m
Getting [32mdm[0m values 

Getting [32mg_avg[0m values for [36mquadric[0m
Getting [32mg_avg[0m values for [36mquartic[0m
Getting [32mg_avg[0m values for [36mrana[0m
Getting [32mg_avg[0m values for [36mrastrigin[0m
Getting [32mg_avg[0m values for [36mrosenbrock[0m
Getting [32mg_avg[0m values for [36msalomon[0m
Getting [32mg_avg[0m values for [36mschwefel_2_22[0m
Getting [32mg_avg[0m values for [36mschwefel_2_26[0m
Getting [32mg_avg[0m values for [36msix_hump_camel_back[0m
Getting [32mg_avg[0m values for [36mskew_rastrigin[0m
Getting [32mg_avg[0m values for [36mspherical[0m
Getting [32mg_avg[0m values for [36mstep[0m
Getting [32mg_avg[0m values for [36mweierstrass[0m
Getting [32mg_avg[0m values for [36mzakharov[0m
Getting [32mfci_soc[0m values for [36mackley[0m
Getting [32mfci_soc[0m values for [36malpine[0m
Getting [32mfci_soc[0m values for [36mbeale[0m
Getting [32mfci_soc[0m values for [36mbohachevsky1_generalized[0m
Getting [32mfci_soc[0m

Getting [32mlsn[0m values for [36mstep[0m
Getting [32mlsn[0m values for [36mweierstrass[0m
Getting [32mlsn[0m values for [36mzakharov[0m
Getting [32mfdc[0m values for [36mackley[0m
Getting [32mfdc[0m values for [36malpine[0m
Getting [32mfdc[0m values for [36mbeale[0m
Getting [32mfdc[0m values for [36mbohachevsky1_generalized[0m
Getting [32mfdc[0m values for [36meggholder_generalized[0m
Getting [32mfdc[0m values for [36mgoldstein_price[0m
Getting [32mfdc[0m values for [36mgriewank[0m
Getting [32mfdc[0m values for [36mlevy13_generalized[0m
Getting [32mfdc[0m values for [36mmichalewicz[0m
Getting [32mfdc[0m values for [36mpathological[0m
Getting [32mfdc[0m values for [36mquadric[0m
Getting [32mfdc[0m values for [36mquartic[0m
Getting [32mfdc[0m values for [36mrana[0m
Getting [32mfdc[0m values for [36mrastrigin[0m
Getting [32mfdc[0m values for [36mrosenbrock[0m
Getting [32mfdc[0m values for [36msalomon[0m
Getting 

Getting [32mpn[0m values for [36mbeale[0m
Getting [32mpn[0m values for [36mbohachevsky1_generalized[0m
Getting [32mpn[0m values for [36meggholder_generalized[0m
Getting [32mpn[0m values for [36mgoldstein_price[0m
Getting [32mpn[0m values for [36mgriewank[0m
Getting [32mpn[0m values for [36mlevy13_generalized[0m
Getting [32mpn[0m values for [36mmichalewicz[0m
Getting [32mpn[0m values for [36mpathological[0m
Getting [32mpn[0m values for [36mquadric[0m
Getting [32mpn[0m values for [36mquartic[0m
Getting [32mpn[0m values for [36mrana[0m
Getting [32mpn[0m values for [36mrastrigin[0m
Getting [32mpn[0m values for [36mrosenbrock[0m
Getting [32mpn[0m values for [36msalomon[0m
Getting [32mpn[0m values for [36mschwefel_2_22[0m
Getting [32mpn[0m values for [36mschwefel_2_26[0m
Getting [32mpn[0m values for [36msix_hump_camel_back[0m
Getting [32mpn[0m values for [36mskew_rastrigin[0m
Getting [32mpn[0m values for [36mspherical

Getting [32mfem0_1[0m values for [36mackley[0m
Getting [32mfem0_1[0m values for [36malpine[0m
Getting [32mfem0_1[0m values for [36mbeale[0m
Getting [32mfem0_1[0m values for [36mbohachevsky1_generalized[0m
Getting [32mfem0_1[0m values for [36meggholder_generalized[0m
Getting [32mfem0_1[0m values for [36mgoldstein_price[0m
Getting [32mfem0_1[0m values for [36mgriewank[0m
Getting [32mfem0_1[0m values for [36mlevy13_generalized[0m
Getting [32mfem0_1[0m values for [36mmichalewicz[0m
Getting [32mfem0_1[0m values for [36mpathological[0m
Getting [32mfem0_1[0m values for [36mquadric[0m
Getting [32mfem0_1[0m values for [36mquartic[0m
Getting [32mfem0_1[0m values for [36mrana[0m
Getting [32mfem0_1[0m values for [36mrastrigin[0m
Getting [32mfem0_1[0m values for [36mrosenbrock[0m
Getting [32mfem0_1[0m values for [36msalomon[0m
Getting [32mfem0_1[0m values for [36mschwefel_2_22[0m
Getting [32mfem0_1[0m values for [36mschwefel_2_2

Getting [32mdm[0m values for [36mrana[0m
Getting [32mdm[0m values for [36mrastrigin[0m
Getting [32mdm[0m values for [36mrosenbrock[0m
Getting [32mdm[0m values for [36msalomon[0m
Getting [32mdm[0m values for [36mschwefel_2_22[0m
Getting [32mdm[0m values for [36mschwefel_2_26[0m
Getting [32mdm[0m values for [36msix_hump_camel_back[0m
Getting [32mdm[0m values for [36mskew_rastrigin[0m
Getting [32mdm[0m values for [36mspherical[0m
Getting [32mdm[0m values for [36mstep[0m
Getting [32mdm[0m values for [36mweierstrass[0m
Getting [32mdm[0m values for [36mzakharov[0m
Getting [32mlsn[0m values for [36mackley[0m
Getting [32mlsn[0m values for [36malpine[0m
Getting [32mlsn[0m values for [36mbeale[0m
Getting [32mlsn[0m values for [36mbohachevsky1_generalized[0m
Getting [32mlsn[0m values for [36meggholder_generalized[0m
Getting [32mlsn[0m values for [36mgoldstein_price[0m
Getting [32mlsn[0m values for [36mgriewank[0m
Getting 

Getting [32mfci_soc[0m values for [36mzakharov[0m
Getting [32mg_dev[0m values for [36mackley[0m
Getting [32mg_dev[0m values for [36malpine[0m
Getting [32mg_dev[0m values for [36mbeale[0m
Getting [32mg_dev[0m values for [36mbohachevsky1_generalized[0m
Getting [32mg_dev[0m values for [36meggholder_generalized[0m
Getting [32mg_dev[0m values for [36mgoldstein_price[0m
Getting [32mg_dev[0m values for [36mgriewank[0m
Getting [32mg_dev[0m values for [36mlevy13_generalized[0m
Getting [32mg_dev[0m values for [36mmichalewicz[0m
Getting [32mg_dev[0m values for [36mpathological[0m
Getting [32mg_dev[0m values for [36mquadric[0m
Getting [32mg_dev[0m values for [36mquartic[0m
Getting [32mg_dev[0m values for [36mrana[0m
Getting [32mg_dev[0m values for [36mrastrigin[0m
Getting [32mg_dev[0m values for [36mrosenbrock[0m
Getting [32mg_dev[0m values for [36msalomon[0m
Getting [32mg_dev[0m values for [36mschwefel_2_22[0m
Getting [32mg_

Getting [32mfci_sigma[0m values for [36mquadric[0m
Getting [32mfci_sigma[0m values for [36mquartic[0m
Getting [32mfci_sigma[0m values for [36mrana[0m
Getting [32mfci_sigma[0m values for [36mrastrigin[0m
Getting [32mfci_sigma[0m values for [36mrosenbrock[0m
Getting [32mfci_sigma[0m values for [36msalomon[0m
Getting [32mfci_sigma[0m values for [36mschwefel_2_22[0m
Getting [32mfci_sigma[0m values for [36mschwefel_2_26[0m
Getting [32mfci_sigma[0m values for [36msix_hump_camel_back[0m
Getting [32mfci_sigma[0m values for [36mskew_rastrigin[0m
Getting [32mfci_sigma[0m values for [36mspherical[0m
Getting [32mfci_sigma[0m values for [36mstep[0m
Getting [32mfci_sigma[0m values for [36mweierstrass[0m
Getting [32mfci_sigma[0m values for [36mzakharov[0m
Getting [32mfem0_01[0m values for [36mackley[0m
Getting [32mfem0_01[0m values for [36malpine[0m
Getting [32mfem0_01[0m values for [36mbeale[0m
Getting [32mfem0_01[0m values for [

Getting [32mfci_cog[0m values for [36mskew_rastrigin[0m
Getting [32mfci_cog[0m values for [36mspherical[0m
Getting [32mfci_cog[0m values for [36mstep[0m
Getting [32mfci_cog[0m values for [36mweierstrass[0m
Getting [32mfci_cog[0m values for [36mzakharov[0m
Getting data for [35msocial_only_pso[0m
Getting DRoC values for [35msocial_only_pso[0m
Getting [32mdm[0m values for [36mackley[0m
Getting [32mdm[0m values for [36malpine[0m
Getting [32mdm[0m values for [36mbeale[0m
Getting [32mdm[0m values for [36mbohachevsky1_generalized[0m
Getting [32mdm[0m values for [36meggholder_generalized[0m
Getting [32mdm[0m values for [36mgoldstein_price[0m
Getting [32mdm[0m values for [36mgriewank[0m
Getting [32mdm[0m values for [36mlevy13_generalized[0m
Getting [32mdm[0m values for [36mmichalewicz[0m
Getting [32mdm[0m values for [36mpathological[0m
Getting [32mdm[0m values for [36mquadric[0m
Getting [32mdm[0m values for [36mquartic[0m
G

Getting [32mg_avg[0m values for [36mgriewank[0m
Getting [32mg_avg[0m values for [36mlevy13_generalized[0m
Getting [32mg_avg[0m values for [36mmichalewicz[0m
Getting [32mg_avg[0m values for [36mpathological[0m
Getting [32mg_avg[0m values for [36mquadric[0m
Getting [32mg_avg[0m values for [36mquartic[0m
Getting [32mg_avg[0m values for [36mrana[0m
Getting [32mg_avg[0m values for [36mrastrigin[0m
Getting [32mg_avg[0m values for [36mrosenbrock[0m
Getting [32mg_avg[0m values for [36msalomon[0m
Getting [32mg_avg[0m values for [36mschwefel_2_22[0m
Getting [32mg_avg[0m values for [36mschwefel_2_26[0m
Getting [32mg_avg[0m values for [36msix_hump_camel_back[0m
Getting [32mg_avg[0m values for [36mskew_rastrigin[0m
Getting [32mg_avg[0m values for [36mspherical[0m
Getting [32mg_avg[0m values for [36mstep[0m
Getting [32mg_avg[0m values for [36mweierstrass[0m
Getting [32mg_avg[0m values for [36mzakharov[0m
Getting [32mfci_soc[0m

Next, build the models from the data.

In [9]:
all_pso_models = {}
for pso_name in _pso_names:
    data = all_pso_data[pso_name]
    pso_model = _get_model2(data)
    all_pso_models[pso_name] = pso_model
    
print all_models['gbest_pso'].head()
print all_models['lbest_pso'].head()
# These seem ok 

NameError: name 'all_models' is not defined

# 3. Average the coefficients and p-values across all PSOs

In [10]:
avg_model = {}

# Start by getting the names of all features, interaction included.
# Do this by looking at the features for one PSO.
_eg_model = all_pso_models['gbest_pso']
feature_names = _eg_model.index

# Go row-by-row (i.e. feature-by-feature).
# For each feature, get the average for all PSOs.
for feature_name in feature_names:
    print 'Averaging', feature_name
    coeffs = []
    pvals = []
    for pso_name in _pso_names:
        pso_model = all_pso_models[pso_name]
        coeff = pso_model['coeff'][feature_name]
        pval = pso_model['p'][feature_name]
        coeffs.append(coeff)
        pvals.append(pval)

    avg_coeff = np.average(coeffs)
    avg_pval = np.average(pvals)
    
    feature_series = {}
    feature_series['coeff'] = avg_coeff
    feature_series['p'] = avg_pval
    feature_series = pd.Series(feature_series)
    
    avg_model[feature_name] = feature_series

avg_model = pd.DataFrame(avg_model)
avg_model = avg_model.T

Averaging Intercept
Averaging dm
Averaging dm:fci_cog
Averaging dm:fci_sigma
Averaging dm:fci_soc
Averaging dm:fdc
Averaging dm:fem0_01
Averaging dm:fem0_1
Averaging dm:g_avg
Averaging dm:g_dev
Averaging dm:lsn
Averaging dm:pn
Averaging fci_cog
Averaging fci_sigma
Averaging fci_sigma:fci_cog
Averaging fci_sigma:fci_soc
Averaging fci_sigma:fem0_01
Averaging fci_sigma:fem0_1
Averaging fci_sigma:g_avg
Averaging fci_sigma:g_dev
Averaging fci_sigma:pn
Averaging fci_soc
Averaging fci_soc:fci_cog
Averaging fci_soc:g_dev
Averaging fci_soc:pn
Averaging fdc
Averaging fdc:fci_cog
Averaging fdc:fci_sigma
Averaging fdc:fci_soc
Averaging fdc:fem0_01
Averaging fdc:fem0_1
Averaging fdc:g_avg
Averaging fdc:g_dev
Averaging fdc:pn
Averaging fem0_01
Averaging fem0_01:fci_cog
Averaging fem0_01:fci_soc
Averaging fem0_01:fem0_1
Averaging fem0_01:g_avg
Averaging fem0_01:g_dev
Averaging fem0_01:pn
Averaging fem0_1
Averaging fem0_1:fci_cog
Averaging fem0_1:fci_soc
Averaging fem0_1:g_avg
Averaging fem0_1:g_dev
A

In [11]:
avg_model.head()

Unnamed: 0,coeff,p
Intercept,-30.086393,0.599427
dm,203.129967,0.179131
dm:fci_cog,-59.463314,0.332154
dm:fci_sigma,-2824.332924,0.144064
dm:fci_soc,72.88043,0.446499


# 4. Create a table

Format: 

|Term name|Coefficient|p-value|
|---|---|---|
|DM|203.13|0.6|
|etc|...|...|

First, we need a way to convert term names to print format -- `fci_sigma:fem0_01` just won't fly. Here goes

In [12]:
# Print names are formatted for TeX.
_feature_print_names = {
    'Intercept': '\\(\\text{Intercept}\\)',
    'dm': '\\(\\text{DM}\\)', 
    'lsn': '\\(\\text{LSN}\\)', 
    'fdc': '\\(\\text{FDC}\\)', 
    'fci_sigma': '\\(\\text{FCI}_{\\sigma}\\)', 
    'fem0_01': '\\(\\text{FEM}_{0.01}\\)', 
    'fem0_1': '\\(\\text{FEM}_{0.1}\\)', 
    'g_avg': '\\(\\text{G}_{avg}\\)', 
    'fci_soc': '\\(\\text{FCI}_{soc}\\)', 
    'g_dev': '\\(\\text{G}_{dev}\\)', 
    'pn': '\\(\\text{PN}\\)', 
    'fci_cog': '\\(\\text{FCI}_{cog}\\)',
}

def print_format(feature_name):
    feature_names = feature_name.split(':')
    formatted = ''
    for i, name in enumerate(feature_names):
        name = _feature_print_names[name]
        if i != 0:
            formatted += '\\(\\cdot\\)'
        formatted += name
    return formatted

Seems fine. May want to consider tweaking so that, for combined terms, everything is inside a single `\(...\)`, but let's first see if it's needed.

Now, the table.

In [13]:
from tabulate import tabulate

table_df = avg_model.copy()
for feature_name in avg_model.index:
    print_name = print_format(feature_name)
    table_df.rename(index={feature_name:print_name}, inplace=True)
table_df
# Mathematical!

tabulate(table_df, headers=['Feature name', 'Coefficient', 'P-value'], tablefmt='latex')

u'\\begin{tabular}{lrr}\n\\hline\n Feature name                                          &    Coefficient &   P-value \\\\\n\\hline\n \\textbackslash{}(\\textbackslash{}text\\{Intercept\\}\\textbackslash{})                                  &     -30.0864   & 0.599427  \\\\\n \\textbackslash{}(\\textbackslash{}text\\{DM\\}\\textbackslash{})                                         &     203.13     & 0.179131  \\\\\n \\textbackslash{}(\\textbackslash{}text\\{DM\\}\\textbackslash{})\\textbackslash{}(\\textbackslash{}cdot\\textbackslash{})\\textbackslash{}(\\textbackslash{}text\\{FCI\\}\\_\\{cog\\}\\textbackslash{})            &     -59.4633   & 0.332154  \\\\\n \\textbackslash{}(\\textbackslash{}text\\{DM\\}\\textbackslash{})\\textbackslash{}(\\textbackslash{}cdot\\textbackslash{})\\textbackslash{}(\\textbackslash{}text\\{FCI\\}\\_\\{\\textbackslash{}sigma\\}\\textbackslash{})         &   -2824.33     & 0.144064  \\\\\n \\textbackslash{}(\\textbackslash{}text\\{DM\\}\\textbackslash{})\\tex

In [14]:
table_df

Unnamed: 0,coeff,p
\(\text{Intercept}\),-30.086393,0.599427
\(\text{DM}\),203.129967,0.179131
\(\text{DM}\)\(\cdot\)\(\text{FCI}_{cog}\),-59.463314,0.332154
\(\text{DM}\)\(\cdot\)\(\text{FCI}_{\sigma}\),-2824.332924,0.144064
\(\text{DM}\)\(\cdot\)\(\text{FCI}_{soc}\),72.880430,0.446499
\(\text{DM}\)\(\cdot\)\(\text{FDC}\),-138.780403,0.130251
\(\text{DM}\)\(\cdot\)\(\text{FEM}_{0.01}\),-80.878876,0.643547
\(\text{DM}\)\(\cdot\)\(\text{FEM}_{0.1}\),-0.110703,0.535072
\(\text{DM}\)\(\cdot\)\(\text{G}_{avg}\),-17.176765,0.359336
\(\text{DM}\)\(\cdot\)\(\text{G}_{dev}\),20.704172,0.300480


In [15]:
table_df.sort_values('p')

Unnamed: 0,coeff,p
\(\text{FCI}_{\sigma}\)\(\cdot\)\(\text{FEM}_{0.1}\),-3221.933597,0.080313
\(\text{FCI}_{\sigma}\)\(\cdot\)\(\text{G}_{dev}\),-338.145820,0.090188
\(\text{FCI}_{\sigma}\)\(\cdot\)\(\text{G}_{avg}\),328.734248,0.097171
\(\text{FCI}_{\sigma}\)\(\cdot\)\(\text{FEM}_{0.01}\),-1919.812899,0.118568
\(\text{G}_{avg}\)\(\cdot\)\(\text{G}_{dev}\),0.564197,0.120929
\(\text{G}_{avg}\),-42.846406,0.126223
\(\text{DM}\)\(\cdot\)\(\text{FDC}\),-138.780403,0.130251
\(\text{G}_{dev}\),33.975342,0.134669
\(\text{DM}\)\(\cdot\)\(\text{FCI}_{\sigma}\),-2824.332924,0.144064
\(\text{FEM}_{0.1}\),233.730859,0.172172


Let's use this cell to find the coeffs of some stuff

In [22]:
# Neutrality:

print 'lsn'
print avg_model.T['lsn']
print ''

print 'pn'
print avg_model.T['pn']
print ''

print 'lsn:pn'
print avg_model.T['lsn:pn']
print ''


lsn
coeff   -5605.995672
p           0.900671
Name: lsn, dtype: float64

pn
coeff    48088.699240
p            0.780777
Name: pn, dtype: float64

lsn:pn
coeff    36678.830784
p            0.806897
Name: lsn:pn, dtype: float64



This is useless :)

Seems like neutrality decreases rate of diversity decrease; however
- The p-value is so high, this is absolutely not to be trusted
- LSN has the opposite effect somehow