Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CSV data export directly from Postgres, and add integration test #187

Merged
merged 9 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ jobs:
--health-interval 10s
--health-timeout 5s
--health-retries 5
-v ${{ github.workspace }}:/postgres-export:rw
ports:
# Maps tcp port 5432 on service container to the host
- 5432:5432
Expand Down Expand Up @@ -79,3 +80,42 @@ jobs:
- name: Lint and Formatting
run: |
npm run verify

- name: Install ReBench
run: |
git clone --depth 1 --branch rebenchdb https://github.com/smarr/rebench.git
pushd rebench
pip install .
popd

- name: Run ReBench Integration Tests
run: |
# make workspace writable for postgres container
chmod a+wx ${{ github.workspace }}

# start ReBenchDB server
NODE_DATA_EXPORT_PATH=${{ github.workspace }} RDB_DATA_EXPORT_PATH=/postgres-export DEV=true npm run start &
sleep 5

# run integration tests
pushd tests/rebench-integration
rebench --experiment IntegrationTest rebench.conf

sleep 1
PROJID=$(curl -s http://localhost:33333/ReBenchDB-integration-test/data | grep project-id | grep -o -E '[0-9]+')
EXPID=$(curl -s http://localhost:33333/rebenchdb/dash/$PROJID/data-overview | jq '.data[0].expid')

# Trigger data generation
curl -s http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.json.gz > /dev/null
curl -s http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.csv.gz > /dev/null

sleep 10 # give the server some time to generate the files
# reposses the files to be able to read them
sudo chown $(whoami):$(id -g -n) ${{ github.workspace }}/*.gz

# fetch the generated files via node and check them
curl -sL http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.json.gz -o actual.json.gz
curl -sL http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.csv.gz -o actual.csv.gz
gzip -k -d actual.json.gz
gzip -k -d actual.csv.gz
node check-data.js
23 changes: 23 additions & 0 deletions src/backend/db/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,29 @@ export abstract class Database {
return result.rows;
}

public async storeExperimentMeasurements(
expId: number,
outputFile: string
): Promise<any[]> {
// Postgres doesn't support parameters for COPY
// so, just doing string substitution here
const query = `COPY (
SELECT
${measurementDataColumns.replace('$1', '6')}
FROM
${measurementDataTableJoins}
WHERE
Experiment.id = ${expId}
ORDER BY
runId, trialId, cmdline, invocation, iteration, criterion
) TO PROGRAM 'gzip -9 > ${outputFile}'
WITH (FORMAT csv, HEADER true)`;
const result = await this.query({
text: query
});
return result.rows;
}

public async recordExperimentCompletion(
expId: number,
endTime: string
Expand Down
8 changes: 6 additions & 2 deletions src/backend/dev-server/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { ParameterizedContext } from 'koa';
import { readFileSync } from 'node:fs';

import { log } from '../logging.js';
import { robustPath, robustSrcPath } from '../util.js';
import { robustPath, robustSrcPath, siteConfig } from '../util.js';

export async function serveStaticResource(
ctx: ParameterizedContext
Expand Down Expand Up @@ -31,7 +31,11 @@ export async function serveStaticResource(
} else if (filename.endsWith('.json.gz')) {
ctx.type = 'application/json';
ctx.set('Content-Encoding', 'gzip');
path = robustPath(`../resources/${filename}`);
path = `${siteConfig.dataExportPath}/${filename}`;
} else if (filename.endsWith('.csv.gz')) {
ctx.type = 'text/csv';
ctx.set('Content-Encoding', 'gzip');
path = `${siteConfig.dataExportPath}/${filename}`;
} else {
throw new Error(`Unsupported file type. Filename: ${filename}`);
}
Expand Down
36 changes: 24 additions & 12 deletions src/backend/project/data-export.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { existsSync } from 'node:fs';
import { completeRequest, startRequest } from '../perf-tracker.js';
import { robustPath, siteConfig, storeJsonGzip } from '../util.js';
import { dbConfig, siteConfig, storeJsonGzip } from '../util.js';
import { log } from '../logging.js';
import { Database } from '../db/db.js';
import { ParameterizedContext } from 'koa';
Expand All @@ -10,7 +10,8 @@ const expDataPreparation = new Map();
export async function getExpData(
projectSlug: string,
expId: number,
db: Database
db: Database,
format: 'json' | 'csv'
): Promise<any> {
const result = await db.getExperimentDetails(expId, projectSlug);

Expand All @@ -25,40 +26,51 @@ export async function getExpData(
data = result;
}

const expDataId = `${data.project}-${expId}`;
const expFileName = `exp-data/${expDataId}.json.gz`;
const expDataFile = robustPath(`../resources/${expFileName}`);
const expFilePrefix = `${data.project}-${expId}`;
const expFileName = `${expFilePrefix}.${format}.gz`;

if (existsSync(expDataFile)) {
if (existsSync(`${siteConfig.dataExportPath}/${expFileName}`)) {
data.preparingData = false;
data.downloadUrl = `${siteConfig.staticUrl}/${expFileName}`;
} else {
const expRequestId = `${expFilePrefix}-${format}`;
data.currentTime = new Date().toISOString();

const prevPrepDetails = expDataPreparation.get(expDataId);
const prevPrepDetails = expDataPreparation.get(expRequestId);

// no previous attempt to prepare data
if (!prevPrepDetails) {
const start = startRequest();

data.preparingData = true;

const resultP = db.getExperimentMeasurements(expId);
const resultP =
format === 'json'
? db.getExperimentMeasurements(expId)
: db.storeExperimentMeasurements(
expId,
`${dbConfig.dataExportPath}/${expFileName}`
);

expDataPreparation.set(expDataId, {
expDataPreparation.set(expRequestId, {
inProgress: true
});

resultP
.then(async (data: any[]) => {
await storeJsonGzip(data, expDataFile);
expDataPreparation.set(expDataId, {
if (format === 'json') {
await storeJsonGzip(
data,
`${siteConfig.dataExportPath}/${expFileName}`
);
}
expDataPreparation.set(expRequestId, {
inProgress: false
});
})
.catch(async (error) => {
log.error('Data preparation failed', error);
expDataPreparation.set(expDataId, {
expDataPreparation.set(expRequestId, {
error,
inProgress: false
});
Expand Down
9 changes: 7 additions & 2 deletions src/backend/project/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,16 @@ export async function renderDataExport(
db: Database
): Promise<void> {
const start = startRequest();
const format = ctx.params.expIdAndExtension.endsWith('.json.gz')
? 'json'
: 'csv';
const expId = ctx.params.expIdAndExtension.replace(`.${format}.gz`, '');

const data = await getExpData(
ctx.params.projectSlug,
Number(ctx.params.expId),
db
Number(expId),
db,
format
);

if (data.preparingData) {
Expand Down
2 changes: 1 addition & 1 deletion src/backend/rebench/results.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ export async function acceptResultData(
.then(([recMs, recPs]) =>
log.info(
// eslint-disable-next-line max-len
`/rebenchdb/results: stored ${recMs} measurements, ${recPs} profiles`
`/rebenchdb/results: stored ${recMs} sets of measurements, ${recPs} profiles`
)
)
.catch((e) => {
Expand Down
41 changes: 33 additions & 8 deletions src/backend/util.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { dirname } from 'path';
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';

import { promisify } from 'node:util';
Expand Down Expand Up @@ -28,10 +28,10 @@ const __dirname = getDirname(import.meta.url);
*/
export const robustPath = __dirname.includes('dist/')
? function (path) {
return `${__dirname}/../../../src/${path}`;
return resolve(`${__dirname}/../../../src/${path}`);
}
: function (path) {
return `${__dirname}/../${path}`;
return resolve(`${__dirname}/../${path}`);
};

/**
Expand All @@ -49,11 +49,31 @@ const port: number = process.env.RDB_PORT
? parseInt(process.env.RDB_PORT)
: 5432;

const _rebench_dev = 'https://rebench.dev';
const reportsUrl = process.env.REPORTS_URL || '/static/reports';
const staticUrl = process.env.STATIC_URL || '/static';
const publicUrl = process.env.PUBLIC_URL || _rebench_dev;

// configuration for data export is a little more involved,
// because the database might run elsewhere, but may produce
// data files, which we need to be able to serve, at least in the dev mode.
const dbDataExportPath =
process.env.RDB_DATA_EXPORT_PATH || robustPath('../resources/exp-data');

// I assume that Node has access to files produced by itself and PostgreSQL.
const nodeDataExportPath =
process.env.NODE_DATA_EXPORT_PATH || dbDataExportPath;

const dataExportUrlBase = process.env.DATA_URL_BASE || `${staticUrl}/exp-data`;

export const dbConfig = {
user: process.env.RDB_USER || '',
password: process.env.RDB_PASS || '',
host: process.env.RDB_HOST || 'localhost',
database: process.env.RDB_DB || 'rdb_smde2',

/** The path where PostgreSQL writes data files to. */
dataExportPath: dbDataExportPath,
port
};

Expand All @@ -63,8 +83,6 @@ export const refreshSecret =
/** How long to still hold on to the cache after it became invalid. In ms. */
export const cacheInvalidationDelay = 1000 * 60 * 5; /* 5 minutes */

const _rebench_dev = 'https://rebench.dev';

export function isReBenchDotDev(): boolean {
return siteConfig.publicUrl === _rebench_dev;
}
Expand All @@ -83,9 +101,16 @@ export const statsConfig = {

export const siteConfig = {
port: process.env.PORT || 33333,
reportsUrl: process.env.REPORTS_URL || '/static/reports',
staticUrl: process.env.STATIC_URL || '/static',
publicUrl: process.env.PUBLIC_URL || _rebench_dev,
reportsUrl,
staticUrl,
publicUrl,
dataExportUrlBase,

/**
* The path where Node.js writes data files to,
* and Postgres generated files are accessible.
*/
dataExportPath: nodeDataExportPath,
appId: parseInt(process.env.GITHUB_APP_ID || '') || 76497,
githubPrivateKey:
process.env.GITHUB_PK || 'rebenchdb.2020-08-11.private-key.pem',
Expand Down
7 changes: 5 additions & 2 deletions src/frontend/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export function renderProjectDataOverview(
projectSlug: string
): void {
const tBody = $('#data-overview');
const pSlug = projectSlug;

let hasDesc = false;

Expand All @@ -73,8 +74,10 @@ export function renderProjectDataOverview(
)}</p></td>
<td>${row.hostnames}</td>
<td class="num-col">${row.runs}</td>
<td class="num-col"><a rel="nofollow"
href="/${projectSlug}/data/${row.expid}">${row.measurements}</a></td>
<td class="num-col">${row.measurements}
<a rel="nofollow" href="/${pSlug}/data/${row.expid}.json.gz">JSON</a>,
<a rel="nofollow" href="/${pSlug}/data/${row.expid}.csv.gz">CSV</a>
</td>
</tr>`);
}

Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ router.get('/:projectSlug/source/:sourceId', async (ctx) =>
);
router.get('/:projectSlug/timeline', async (ctx) => renderTimeline(ctx, db));
router.get('/:projectSlug/data', async (ctx) => renderProjectDataPage(ctx, db));
router.get('/:projectSlug/data/:expId', async (ctx) => {
router.get('/:projectSlug/data/:expIdAndExtension', async (ctx) => {
if (
ctx.header['X-Purpose'] === 'preview' ||
ctx.header['Purpose'] === 'prefetch' ||
Expand Down
2 changes: 1 addition & 1 deletion tests/backend/main/main.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ describe('getLast100Measurements', () => {

let db: TestDatabase;
beforeAll(async () => {
db = await createAndInitializeDB('main_basic', 25, true, false);
db = await createAndInitializeDB('main_main', 25, true, false);
});

afterAll(async () => {
Expand Down
2 changes: 1 addition & 1 deletion tests/backend/main/with-data.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ describe('Test with basic test data loaded', () => {
// switch suites to use a template database

beforeAll(async () => {
db = await createAndInitializeDB('main_basic', 25, true, false);
db = await createAndInitializeDB('main_with_data', 25, true, false);

const data = readFileSync(
robustPath('../tests/data/small-payload.json')
Expand Down
Loading