Skip to content

Commit

Permalink
Added support to download data as CSV.gz
Browse files Browse the repository at this point in the history
The CSV is directly generated by Postgres, which avoids any memory issues in Node.js with very large data sets.

Signed-off-by: Stefan Marr <git@stefan-marr.de>
  • Loading branch information
smarr committed Mar 19, 2024
1 parent c383411 commit 99a356e
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 28 deletions.
10 changes: 7 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,12 @@ jobs:
EXPID=$(curl -s http://localhost:33333/rebenchdb/dash/$PROJID/data-overview | jq '.data[0].expid')
# Trigger data generation
curl -s http://localhost:33333/ReBenchDB-integration-test/data/$EXPID > /dev/null
sleep 10 # give the server some time to generate the json.gz file
curl -sL http://localhost:33333/ReBenchDB-integration-test/data/$EXPID -o actual.json.gz
curl -s http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.json.gz > /dev/null
curl -s http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.csv.gz > /dev/null
sleep 10 # give the server some time to generate the files
curl -sL http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.json.gz -o actual.json.gz
curl -sL http://localhost:33333/ReBenchDB-integration-test/data/$EXPID.csv.gz -o actual.csv.gz
gzip -k -d actual.json.gz
gzip -k -d actual.csv.gz
node check-data.js
23 changes: 23 additions & 0 deletions src/backend/db/db.ts
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,29 @@ export abstract class Database {
return result.rows;
}

public async storeExperimentMeasurements(
expId: number,
outputFile: string
): Promise<any[]> {
// Postgres doesn't support parameters for COPY
// so, just doing string substitution here
const query = `COPY (
SELECT
${measurementDataColumns.replace('$1', '6')}
FROM
${measurementDataTableJoins}
WHERE
Experiment.id = ${expId}
ORDER BY
runId, trialId, cmdline, invocation, iteration, criterion
) TO PROGRAM 'gzip -9 > ${outputFile}'
WITH (FORMAT csv, HEADER true)`;
const result = await this.query({
text: query
});
return result.rows;
}

public async recordExperimentCompletion(
expId: number,
endTime: string
Expand Down
4 changes: 4 additions & 0 deletions src/backend/dev-server/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ export async function serveStaticResource(
ctx.type = 'application/json';
ctx.set('Content-Encoding', 'gzip');
path = robustPath(`../resources/${filename}`);
} else if (filename.endsWith('.csv.gz')) {
ctx.type = 'text/csv';
ctx.set('Content-Encoding', 'gzip');
path = robustPath(`../resources/${filename}`);
} else {
throw new Error(`Unsupported file type. Filename: ${filename}`);
}
Expand Down
25 changes: 16 additions & 9 deletions src/backend/project/data-export.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ const expDataPreparation = new Map();
export async function getExpData(
projectSlug: string,
expId: number,
db: Database
db: Database,
format: 'json' | 'csv'
): Promise<any> {
const result = await db.getExperimentDetails(expId, projectSlug);

Expand All @@ -25,40 +26,46 @@ export async function getExpData(
data = result;
}

const expDataId = `${data.project}-${expId}`;
const expFileName = `exp-data/${expDataId}.json.gz`;
const expFilePrefix = `${data.project}-${expId}`;
const expFileName = `exp-data/${expFilePrefix}.${format}.gz`;
const expDataFile = robustPath(`../resources/${expFileName}`);

if (existsSync(expDataFile)) {
data.preparingData = false;
data.downloadUrl = `${siteConfig.staticUrl}/${expFileName}`;
} else {
const expRequestId = `${expFilePrefix}-${format}`;
data.currentTime = new Date().toISOString();

const prevPrepDetails = expDataPreparation.get(expDataId);
const prevPrepDetails = expDataPreparation.get(expRequestId);

// no previous attempt to prepare data
if (!prevPrepDetails) {
const start = startRequest();

data.preparingData = true;

const resultP = db.getExperimentMeasurements(expId);
const resultP =
format === 'json'
? db.getExperimentMeasurements(expId)
: db.storeExperimentMeasurements(expId, expDataFile);

expDataPreparation.set(expDataId, {
expDataPreparation.set(expRequestId, {
inProgress: true
});

resultP
.then(async (data: any[]) => {
await storeJsonGzip(data, expDataFile);
expDataPreparation.set(expDataId, {
if (format === 'json') {
await storeJsonGzip(data, expDataFile);
}
expDataPreparation.set(expRequestId, {
inProgress: false
});
})
.catch(async (error) => {
log.error('Data preparation failed', error);
expDataPreparation.set(expDataId, {
expDataPreparation.set(expRequestId, {
error,
inProgress: false
});
Expand Down
9 changes: 7 additions & 2 deletions src/backend/project/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,16 @@ export async function renderDataExport(
db: Database
): Promise<void> {
const start = startRequest();
const format = ctx.params.expIdAndExtension.endsWith('.json.gz')
? 'json'
: 'csv';
const expId = ctx.params.expIdAndExtension.replace(`.${format}.gz`, '');

const data = await getExpData(
ctx.params.projectSlug,
Number(ctx.params.expId),
db
Number(expId),
db,
format
);

if (data.preparingData) {
Expand Down
7 changes: 5 additions & 2 deletions src/frontend/render.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export function renderProjectDataOverview(
projectSlug: string
): void {
const tBody = $('#data-overview');
const pSlug = projectSlug;

let hasDesc = false;

Expand All @@ -73,8 +74,10 @@ export function renderProjectDataOverview(
)}</p></td>
<td>${row.hostnames}</td>
<td class="num-col">${row.runs}</td>
<td class="num-col"><a rel="nofollow"
href="/${projectSlug}/data/${row.expid}">${row.measurements}</a></td>
<td class="num-col">${row.measurements}
<a rel="nofollow" href="/${pSlug}/data/${row.expid}.json.gz">JSON</a>,
<a rel="nofollow" href="/${pSlug}/data/${row.expid}.csv.gz">CSV</a>
</td>
</tr>`);
}

Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ router.get('/:projectSlug/source/:sourceId', async (ctx) =>
);
router.get('/:projectSlug/timeline', async (ctx) => renderTimeline(ctx, db));
router.get('/:projectSlug/data', async (ctx) => renderProjectDataPage(ctx, db));
router.get('/:projectSlug/data/:expId', async (ctx) => {
router.get('/:projectSlug/data/:expIdAndExtension', async (ctx) => {
if (
ctx.header['X-Purpose'] === 'preview' ||
ctx.header['Purpose'] === 'prefetch' ||
Expand Down
55 changes: 44 additions & 11 deletions tests/rebench-integration/check-data.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import { readFileSync } from 'node:fs';

const data = JSON.parse(readFileSync('actual.json', 'utf-8'));

let allCorrect = true;

function assert(values, criterion, step) {
Expand All @@ -23,20 +21,55 @@ function assert(values, criterion, step) {
}
}

const byCriterion = { mem: [], compile: [], total: [] };
function getJsonData() {
const data = JSON.parse(readFileSync('actual.json', 'utf-8'));

const byCriterion = { mem: [], compile: [], total: [] };

for (const e of data) {
byCriterion[e.criterion][e.iteration] = e.value;
}

return byCriterion;
}

function getCsvData() {
const data = readFileSync('actual.csv', 'utf-8');

for (const e of data) {
byCriterion[e.criterion][e.iteration] = e.value;
const lines = data.split('\n');
const columnArr = lines.shift().split(',');
const criterionIdx = columnArr.indexOf('criterion');
const iterationIdx = columnArr.indexOf('iteration');
const valueIdx = columnArr.indexOf('value');

const byCriterion = { mem: [], compile: [], total: [] };

for (const line of lines) {
if (line === '') {
continue;
}
const columns = line.split(',');
byCriterion[columns[criterionIdx]][columns[iterationIdx]] = parseInt(
columns[valueIdx]
);
}

return byCriterion;
}

for (const [c, step] of [
['mem', 3],
['compile', 7],
['total', 1]
]) {
assert(byCriterion[c], c, step);
function check(byCriterion) {
for (const [c, step] of [
['mem', 3],
['compile', 7],
['total', 1]
]) {
assert(byCriterion[c], c, step);
}
}

check(getJsonData());
check(getCsvData());

if (allCorrect) {
process.exit(0);
} else {
Expand Down

0 comments on commit 99a356e

Please sign in to comment.