## Monte Carlo Pi Statistics Generator

expects two test types with the following `testName` conventions:

#### Increased Nodes
```
^(?:[0-9]+)(?:-[0-9]+)*$
```

example: "2-4", "1-3-4"

#### Increased Processes

```
^[0-9]+n-[0-9]+p$
```

example: "2n-4p"

In [97]:
import re
import pandas as pd
import numpy as np

In [105]:
def extractNodes(testName):
    matches = list(filter(lambda n: n.isnumeric(), re.split('([0-9]+)', testName)))
    return "" + str(len(matches)) + "n"

def extractProcs(testName):
    matches = list(filter(lambda n: n.isnumeric(), re.split('([0-9]+)', testName)))
    return "" + matches[len(matches) - 1] + "p"

In [119]:
testNameColumn = 'testName'
originColumn = 'thisID'
destinationColumn = 'thatID'
timeColumn = 'timeDelta'

inputFile = "data-source/mcpi-parsed-1mil-01.csv"
outputFile = "data-result/mcpi-stats-1mil-01.csv"

In [120]:
data = pd.read_csv(inputFile,
                   index_col=0,
                   dtype={timeColumn: np.float64},
                   comment='#')

In [121]:
data = data[data['timeDelta'] != 0.0]
testNames = np.unique(data[testNameColumn].values)
result = pd.DataFrame()
for test in testNames:
    testTitle = extractNodes(test) if re.search('^(?:[0-9]+)(?:-[0-9]+)*$', test) else extractProcs(test)
    testData = data[data[testNameColumn] == test]
    testSum = testData[timeColumn].sum()
    testMin = testData[timeColumn].min()
    testMax = testData[timeColumn].max()
    testMean = testData[timeColumn].mean()
    testStd = testData[timeColumn].std()
    thisRowData = {"min": testMin, "max": testMax, "mean": testMean, "std": testStd}
    thisRow = pd.DataFrame(thisRowData, index=[testTitle])
    result = result.append(thisRow)
result.to_csv(outputFile)