In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from termcolor import colored
import sys

from results import psos, benchmarks
from results.flc.ruggedness import fem_0_1 as fem0_1_results, \
    fem_0_01 as fem0_01_results
from results.flc.neutrality import pn as pn_results, \
    lsn as lsn_results
from results.flc.gradients import g_avg as g_avg_results, \
    g_dev as g_dev_results
from results.flc.funnels import dm as dm_results
from results.flc.deception import fdc as fdc_results
from results.flc.searchability import fci_soc as fci_soc_results, \
    fci_cog as fci_cog_results, \
    fci_sigma as fci_sigma_results
from results import drocs as droc_results
from util.chunks import chunks

getting db connection...
getting db cursor...
db ready.


In [9]:
# Params

pso_names = psos.all_names
swarm_size = 25
benchmark_names = benchmarks.all_names
benchmark_names = sorted(benchmark_names)
num_iterations = 2000
num_experiments = 30

flcs = {
    'fem0_1': fem0_1_results,
    'fem0_01': fem0_01_results,
    'pn': pn_results,
    'lsn': lsn_results,
    'g_avg': g_avg_results,
    'g_dev': g_dev_results,
    'dm': dm_results,
    'fdc': fdc_results,
    'fci_cog': fci_cog_results,
    'fci_soc': fci_soc_results,
    'fci_sigma': fci_sigma_results
}
flc_names = flcs.keys()


In [11]:
# Data getter helpers

def get_flcs(flc_name, benchmark_name):
    # Get the measure function for the given name
    flc_measure = flcs[flc_name]
    
    # Determine which dimensionality to use
    D = 5
    benchmark = benchmarks.get(benchmark_name)
    if not benchmark.is_dimensionality_valid(D):
        D = 2
    
    # Get the results for each experiment
    results = []
    for e in range(num_experiments):
        result = flc_measure.get(benchmark_name, D, e)
        results.append(result)
    
    return results



In [13]:
# DRoCs

droc_dict = {}
for pso_name in pso_names:
    pso_dict = {}
    for benchmark_name in benchmark_names:
        D = 5
        benchmark = benchmarks.get(benchmark_name)
        if not benchmark.is_dimensionality_valid(D):
            D = 2

        droc_vals = []
        for e in range(num_experiments):
            droc = droc_results.get(
                pso_name, swarm_size, benchmark_name, D, num_iterations, e)
            droc_vals.append(droc)
        pso_dict[benchmark_name] = droc_vals
    droc_dict[pso_name] = pso_dict
droc_df = pd.DataFrame(droc_dict)


In [21]:
# Only look at Gbest PSO (for now!)
pso_name = 'gbest_pso'

# The results
X_raw = {}

for benchmark_name in benchmark_names:
    progress_msg = '\rProcessing ' + benchmark_name
    sys.stdout.write('\r                                       ')
    sys.stdout.write(progress_msg)
    sys.stdout.flush
    
    # The results for this benchmark function, in list format.
    X = pd.Series()
    
    # Get the mean DRoC value for this benchmark name:
    drocs = droc_df[pso_name][benchmark_name]
    droc_mean = np.average(drocs)
    X = X.append(pd.Series({'DRoC': droc_mean}))
    
    # Each FLC
    for flc_name in flcs.keys():
        flc_measures = get_flcs(flc_name, benchmark_name)
        flc_measures_mean = np.average(flc_measures)
        X = X.append(pd.Series({flc_name: flc_measures_mean}))
    
    # Save the series for this benchmark function to the results.
    X_raw[benchmark_name] = X

print ''
print 'Done.'
data = pd.DataFrame(X_raw)
data = data.transpose()

Processing zakharov                    
Done.


In [162]:
# Define inputs and outputs

input_names = [
    'dm',
    'lsn',
    'fdc',
    'fci_sigma',
    'fem0_01',
    'fem0_1',
    'g_avg',
    'fci_soc',
    'g_dev',
    'pn',
    'fci_cog'
]
output_name = 'DRoC'

X = data[input_names]
y = data[output_name]


In [163]:
# Set up a fitting pipeline.

# Params:
degree = 2

# Components etc
pf = PolynomialFeatures(degree=degree)
lm = linear_model.LinearRegression()
pipeline = Pipeline([
    ('pf', pf),
    ('lm', lm)
])


In [164]:
# Here, we will use all the data as training data
# Do the fit:
pipeline.fit(X, y)

# Get the model -- print the coefficients
# print pipeline.named_steps['lm'].coef_
# print(list(zip(pipeline.named_steps['lm'].coef_, input_names)))
print 'model:'
lm = pipeline.named_steps['lm']
pf = pipeline.named_steps['pf']
print zip(lm.coef_, pf.get_feature_names())
print ''

# Predict
y_pred = pipeline.predict(X)

# Score the prediction
from sklearn.metrics import mean_squared_error
print 'mse:', mean_squared_error(y, y_pred)

from sklearn.metrics import r2_score
print 'r2:', r2_score(y, y_pred)

from sklearn.metrics import explained_variance_score
print 'e. v.:', explained_variance_score(y, y_pred)


model:
[(1.2380370340014935e-10, '1'), (-128.41013263614343, 'x0'), (66.769719060803112, 'x1'), (-204.91277908233718, 'x2'), (284.92563124911629, 'x3'), (-356.21374568977689, 'x4'), (60.314019219165573, 'x5'), (-790.15878287081534, 'x6'), (-201.77033726794974, 'x7'), (379.42371815070697, 'x8'), (24.165876823902796, 'x9'), (17.765522417847137, 'x10'), (163.11076602111018, 'x0^2'), (-15.385081762407051, 'x0 x1'), (-234.71376765664678, 'x0 x2'), (-74.271030631395377, 'x0 x3'), (-169.53357099348207, 'x0 x4'), (-84.310251167169909, 'x0 x5'), (18.868323516156345, 'x0 x6'), (-284.31110318365933, 'x0 x7'), (675.17273679186746, 'x0 x8'), (-5.7844674415118291, 'x0 x9'), (-164.2507320761855, 'x0 x10'), (0.46859737201474982, 'x1^2'), (52.5477518009886, 'x1 x2'), (1.4629506170887028, 'x1 x3'), (55.066377783406715, 'x1 x4'), (47.415608603321679, 'x1 x5'), (183.17547835275249, 'x1 x6'), (59.385715023823806, 'x1 x7'), (133.75472909518837, 'x1 x8'), (0.17307311118644261, 'x1 x9'), (45.862315195521205, 

In [165]:
# Now we'll split training and test data and see what that does.

# Params:
test_size = .01

# Split:

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

# Fit:
pipeline.fit(X_train, y_train)

# Get the model -- print the coefficients
print 'model:'
lm = pipeline.named_steps['lm']
pf = pipeline.named_steps['pf']
print zip(lm.coef_, pf.get_feature_names())
print ''

# Predict
y_pred = pipeline.predict(X_test)

# Score the prediction
from sklearn.metrics import mean_squared_error
print 'mse:', mean_squared_error(y_test, y_pred)

from sklearn.metrics import r2_score
print 'r2:', r2_score(y_test, y_pred)

from sklearn.metrics import explained_variance_score
print 'e. v.:', explained_variance_score(y_test, y_pred)


model:
[(1.2224316003894842e-10, '1'), (-74.414916033031986, 'x0'), (27.688209758443683, 'x1'), (-457.81758727111156, 'x2'), (255.56352769659225, 'x3'), (-309.53699883871843, 'x4'), (62.397041026907473, 'x5'), (-756.04351457686778, 'x6'), (-163.1257226963711, 'x7'), (334.93023239955659, 'x8'), (9.30427611148823, 'x9'), (125.66413407261506, 'x10'), (147.5232503977459, 'x0^2'), (-5.7103096604015962, 'x0 x1'), (-254.86414492482558, 'x0 x2'), (-65.512928154817686, 'x0 x3'), (-133.40857478678768, 'x0 x4'), (-56.620154831703339, 'x0 x5'), (76.111546284019397, 'x0 x6'), (-249.81341759655822, 'x0 x7'), (467.03298622277487, 'x0 x8'), (-2.0852769655857442, 'x0 x9'), (-101.35182097297023, 'x0 x10'), (0.18355677109909563, 'x1^2'), (19.20227263470197, 'x1 x2'), (0.64291600196864929, 'x1 x3'), (21.53266644983518, 'x1 x4'), (20.194542063960437, 'x1 x5'), (82.74622001170421, 'x1 x6'), (23.807125692056797, 'x1 x7'), (73.586185212342585, 'x1 x8'), (0.064356375576167635, 'x1 x9'), (18.647850723041721, 'x