In [1]:
import re
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pystorms.scenarios import epsilon

In [2]:
def extract_performance(path=".",
                        seed=None,
                        controller=None):
    metrics = {"time": [],
               "performance": [],
               "iterations": [],
               'completed': [],
               'seed': []}

    if controller is None:
        if seed is None:
            reports = glob.glob(path + "/*_GeneralizabilityEpsilon_report.txt")
        else:
            reports = glob.glob(path + str(seed) + "_*_GeneralizabilityEpsilon_report.txt")
    else:
        if seed is None:
            reports = glob.glob(path + "/*?[_" + str(controller) + "_]_GeneralizabilityEpsilon_report.txt")
        else:
            reports = glob.glob(path + str(seed) + "_" + str(controller) + "_GeneralizabilityEpsilon_report.txt")

    # parse them to find the compute time and performance for each controller
    for file in reports:
        if seed is None:
            metrics["seed"].append(int(re.search(r"\d+\.?\d*", file).__getitem__(0)))

        # open the file
        with open(file, "r") as f:
            content = f.readlines()
        for line in content:
            # if line has optimization time, extract time and append it to array
            if bool(re.match(r"Optimization time:", line)):
                metrics["time"].append(
                    float(re.search(r"\d+\.?\d*", line).__getitem__(0)) / 60.0
                )
            # if line has performance
            if bool(re.match(r"Value at minimum:", line)):
                metrics["performance"].append(
                    float(re.search(r"\d+\.?\d*", line).__getitem__(0))
                )
            # if line has performance
            if bool(re.match(r"Optimization completed:", line)):
                metrics["iterations"].append(
                    float(re.search(r"\d+\.?\d*", line).__getitem__(0))
                )
                idic_yes = re.search(r'\bYES\b', line)
                if (idic_yes is not None):
                    metrics["completed"].append(1)
                else:
                    metrics["completed"].append(0)

    return metrics

In [3]:
data = extract_performance("./Baes_25_1/")

### Convert to dataframe for analysis

In [4]:
data = pd.DataFrame.from_dict(data)

In [9]:
data.sort_values(by=["performance"])

Unnamed: 0,time,performance,iterations,completed,seed
31,17.987235,0.000000,30.0,1,25
79,18.219192,0.000000,30.0,1,25
62,17.628231,72.143744,30.0,1,25
21,20.626147,78.458720,30.0,1,25
17,19.984203,128.986359,30.0,1,25
47,18.068835,179.844088,30.0,1,25
4,20.515187,223.326338,30.0,1,25
39,19.984641,454.433357,30.0,1,25
55,17.778287,504.127989,30.0,1,25
3,19.745665,514.926457,30.0,1,25


In [12]:
data = data.drop([31, 79])

In [19]:
data = data.sort_values(by = "performance")

In [23]:
data["performance"][:30].mean()

1008.356475795

In [24]:
data["performance"][:30].std()

573.8325374681764

In [25]:
data["performance"][:30]

62      72.143744
21      78.458720
17     128.986359
47     179.844088
4      223.326338
39     454.433357
55     504.127989
3      514.926457
40     645.584528
35     649.015356
27     697.436851
52     733.063783
74     810.882566
54     873.596590
16    1034.799059
20    1129.664098
63    1217.583345
78    1229.330866
57    1290.526090
41    1430.915032
29    1449.123297
5     1459.544496
42    1544.497585
53    1587.418634
36    1617.297081
2     1650.621036
6     1724.842871
12    1765.989228
14    1774.120111
1     1778.594716
Name: performance, dtype: float64