Skip to content

Commit

Permalink
Cache generated dir and allow forcefull overwrites
Browse files Browse the repository at this point in the history
  • Loading branch information
rubensworks committed Aug 5, 2021
1 parent d86203e commit 10ee378
Show file tree
Hide file tree
Showing 19 changed files with 281 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ export class ExperimentHandlerLdbcSnbDecentralized extends ExperimentHandler<Exp
configQueries: 'input/config-queries.json',
configServer: 'input/config-server.json',
directoryQueryTemplates: 'input/templates/queries',
overwriteFilesDuringPrepare: false,
hadoopMemory: '4G',
dockerfileServer: 'input/dockerfiles/Dockerfile-server',

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ export class ExperimentLdbcSnbDecentralized implements Experiment {
public readonly configQueries: string;
public readonly configServer: string;
public readonly directoryQueryTemplates: string;
public readonly overwriteFilesDuringPrepare: boolean;
public readonly hadoopMemory: string;
public readonly dockerfileServer: string;
public readonly hookSparqlEndpoint: Hook;
Expand All @@ -37,7 +36,6 @@ export class ExperimentLdbcSnbDecentralized implements Experiment {
configQueries: string,
configServer: string,
directoryQueryTemplates: string,
overwriteFilesDuringPrepare: boolean,
hadoopMemory: string,
dockerfileServer: string,
hookSparqlEndpoint: Hook,
Expand All @@ -56,7 +54,6 @@ export class ExperimentLdbcSnbDecentralized implements Experiment {
this.configQueries = configQueries;
this.configServer = configServer;
this.directoryQueryTemplates = directoryQueryTemplates;
this.overwriteFilesDuringPrepare = overwriteFilesDuringPrepare;
this.hadoopMemory = hadoopMemory;
this.dockerfileServer = dockerfileServer;
this.hookSparqlEndpoint = hookSparqlEndpoint;
Expand All @@ -73,15 +70,15 @@ export class ExperimentLdbcSnbDecentralized implements Experiment {
return `jrb-experiment-${Path.basename(Path.join(context.experimentPaths.generated, '..'))}-server`;
}

public async prepare(context: ITaskContext): Promise<void> {
public async prepare(context: ITaskContext, forceOverwriteGenerated: boolean): Promise<void> {
// Prepare hook
await this.hookSparqlEndpoint.prepare(context);
await this.hookSparqlEndpoint.prepare(context, forceOverwriteGenerated);

// Prepare dataset
await new Generator({
verbose: context.verbose,
cwd: context.experimentPaths.generated,
overwrite: this.overwriteFilesDuringPrepare,
overwrite: forceOverwriteGenerated,
scale: this.scale,
enhancementConfig: this.configGenerateAux,
fragmentConfig: this.configFragment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ describe('ExperimentHandlerLdbcSnbDecentralized', () => {
describe('getDefaultParams', () => {
it('returns a hash', () => {
expect(handler.getDefaultParams(experimentPaths)).toBeInstanceOf(Object);
expect(Object.entries(handler.getDefaultParams(experimentPaths)).length).toEqual(17);
expect(Object.entries(handler.getDefaultParams(experimentPaths)).length).toEqual(16);
});
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ describe('ExperimentLdbcSnbDecentralized', () => {
'input/config-queries.json',
'input/config-server.json',
'input/templates/queries',
false,
'4G',
'input/dockerfiles/Dockerfile-server',
hookSparqlEndpoint,
Expand All @@ -126,9 +125,27 @@ describe('ExperimentLdbcSnbDecentralized', () => {

describe('prepare', () => {
it('should prepare the experiment', async() => {
await experiment.prepare(context);
await experiment.prepare(context, false);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context);
expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, false);
expect(generatorGenerate).toHaveBeenCalled();
expect(context.docker.imageBuilder.build).toHaveBeenCalledWith({
cwd: context.cwd,
dockerFile: 'input/dockerfiles/Dockerfile-server',
auxiliaryFiles: [ 'input/config-server.json' ],
imageName: 'jrb-experiment-CWD-server',
buildArgs: {
CONFIG_SERVER: 'input/config-server.json',
LOG_LEVEL: 'info',
},
logger,
});
});

it('should prepare the experiment with force overwrite', async() => {
await experiment.prepare(context, true);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, true);
expect(generatorGenerate).toHaveBeenCalled();
expect(context.docker.imageBuilder.build).toHaveBeenCalledWith({
cwd: context.cwd,
Expand Down
87 changes: 48 additions & 39 deletions packages/experiment-watdiv/lib/ExperimentWatDiv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,60 +41,69 @@ export class ExperimentWatDiv implements Experiment {
this.queryRunnerRecordTimestamps = queryRunnerRecordTimestamps;
}

public async prepare(context: ITaskContext): Promise<void> {
public async prepare(context: ITaskContext, forceOverwriteGenerated: boolean): Promise<void> {
// Prepare hook
await this.hookSparqlEndpoint.prepare(context);
await this.hookSparqlEndpoint.prepare(context, forceOverwriteGenerated);

// Ensure logs directory exists
await fs.ensureDir(Path.join(context.experimentPaths.output, 'logs'));

// Prepare dataset
context.logger.info(`Generating WatDiv dataset and queries`);
await context.docker.imagePuller.pull({ repoTag: ExperimentWatDiv.DOCKER_IMAGE_WATDIV });
await (await context.docker.containerCreator.start({
imageName: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
cmdArgs: [ '-s', String(this.datasetScale), '-q', String(this.queryCount), '-r', String(this.queryRecurrence) ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-generation.txt'),
})).join();

if (this.generateHdt) {
// Create HDT file
context.logger.info(`Converting WatDiv dataset to HDT`);

// Pull HDT Docker image
await context.docker.imagePuller.pull({ repoTag: ExperimentWatDiv.DOCKER_IMAGE_HDT });

// Remove any existing index files
await fs.rm(Path.join(context.experimentPaths.generated, 'dataset.hdt.index.v1-1'), { force: true });

// Convert dataset to HDT
if (!forceOverwriteGenerated && await fs.pathExists(Path.join(context.experimentPaths.generated, 'dataset.nt'))) {
context.logger.info(` Skipped`);
} else {
await context.docker.imagePuller.pull({ repoTag: ExperimentWatDiv.DOCKER_IMAGE_WATDIV });
await (await context.docker.containerCreator.start({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'rdf2hdt', '/output/dataset.nt', '/output/dataset.hdt' ],
imageName: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
cmdArgs: [ '-s', String(this.datasetScale), '-q', String(this.queryCount), '-r', String(this.queryRecurrence) ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt.txt'),
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-generation.txt'),
})).join();
}

// Generate HDT index file
await (await context.docker.containerCreator.start({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'hdtSearch', '/output/dataset.hdt', '-q', '0' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt-index.txt'),
})).join();
if (this.generateHdt) {
// Create HDT file
context.logger.info(`Converting WatDiv dataset to HDT`);

if (!forceOverwriteGenerated &&
await fs.pathExists(Path.join(context.experimentPaths.generated, 'dataset.hdt'))) {
context.logger.info(` Skipped`);
} else {
// Pull HDT Docker image
await context.docker.imagePuller.pull({ repoTag: ExperimentWatDiv.DOCKER_IMAGE_HDT });

// Remove any existing index files
await fs.rm(Path.join(context.experimentPaths.generated, 'dataset.hdt.index.v1-1'), { force: true });

// Convert dataset to HDT
await (await context.docker.containerCreator.start({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'rdf2hdt', '/output/dataset.nt', '/output/dataset.hdt' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt.txt'),
})).join();

// Generate HDT index file
await (await context.docker.containerCreator.start({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'hdtSearch', '/output/dataset.hdt', '-q', '0' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt-index.txt'),
})).join();
}
}
}

Expand Down
128 changes: 124 additions & 4 deletions packages/experiment-watdiv/test/ExperimentWatDiv.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,129 @@ describe('ExperimentWatDiv', () => {

describe('prepare', () => {
it('should prepare the experiment', async() => {
await experiment.prepare(context);
await experiment.prepare(context, false);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context);
expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, false);

expect(context.docker.imagePuller.pull).toHaveBeenCalledTimes(2);
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
repoTag: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
});
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
repoTag: ExperimentWatDiv.DOCKER_IMAGE_HDT,
});

expect(context.docker.containerCreator.start).toHaveBeenCalledTimes(3);
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
cmdArgs: [ '-s', '1', '-q', '5', '-r', '1' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-generation.txt'),
});
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'rdf2hdt', '/output/dataset.nt', '/output/dataset.hdt' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt.txt'),
});
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'hdtSearch', '/output/dataset.hdt', '-q', '0' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt-index.txt'),
});

expect(dirsOut).toEqual({
'CWD/output/logs': true,
});
});

it('should prepare the experiment if files already exist', async() => {
files[Path.join(context.experimentPaths.generated, 'dataset.nt')] = true;
files[Path.join(context.experimentPaths.generated, 'dataset.hdt')] = true;

await experiment.prepare(context, false);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, false);

expect(context.docker.imagePuller.pull).toHaveBeenCalledTimes(0);

expect(context.docker.containerCreator.start).toHaveBeenCalledTimes(0);

expect(dirsOut).toEqual({
'CWD/output/logs': true,
});
});

it('should forcefully prepare the experiment', async() => {
await experiment.prepare(context, true);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, true);

expect(context.docker.imagePuller.pull).toHaveBeenCalledTimes(2);
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
repoTag: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
});
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
repoTag: ExperimentWatDiv.DOCKER_IMAGE_HDT,
});

expect(context.docker.containerCreator.start).toHaveBeenCalledTimes(3);
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_WATDIV,
cmdArgs: [ '-s', '1', '-q', '5', '-r', '1' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-generation.txt'),
});
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'rdf2hdt', '/output/dataset.nt', '/output/dataset.hdt' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt.txt'),
});
expect(context.docker.containerCreator.start).toHaveBeenCalledWith({
imageName: ExperimentWatDiv.DOCKER_IMAGE_HDT,
cmdArgs: [ 'hdtSearch', '/output/dataset.hdt', '-q', '0' ],
hostConfig: {
Binds: [
`${context.experimentPaths.generated}:/output`,
],
},
logFilePath: Path.join(context.experimentPaths.output, 'logs', 'watdiv-hdt-index.txt'),
});

expect(dirsOut).toEqual({
'CWD/output/logs': true,
});
});

it('should forcefully prepare the experiment if files already exist', async() => {
files[Path.join(context.experimentPaths.generated, 'dataset.nt')] = true;
files[Path.join(context.experimentPaths.generated, 'dataset.hdt')] = true;

await experiment.prepare(context, true);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, true);

expect(context.docker.imagePuller.pull).toHaveBeenCalledTimes(2);
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
Expand Down Expand Up @@ -154,9 +274,9 @@ describe('ExperimentWatDiv', () => {
true,
);

await experiment.prepare(context);
await experiment.prepare(context, false);

expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context);
expect(hookSparqlEndpoint.prepare).toHaveBeenCalledWith(context, false);

expect(context.docker.imagePuller.pull).toHaveBeenCalledTimes(1);
expect(context.docker.imagePuller.pull).toHaveBeenCalledWith({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export class HookSparqlEndpointComunica implements Hook {
return `jrb-experiment-${Path.basename(context.experimentPaths.root)}-sparql-endpoint-comunica`;
}

public async prepare(context: ITaskContext): Promise<void> {
public async prepare(context: ITaskContext, forceOverwriteGenerated: boolean): Promise<void> {
// Build client Dockerfile
await context.docker.imageBuilder.build({
cwd: context.experimentPaths.root,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ describe('HookSparqlEndpointComunica', () => {

describe('prepare', () => {
it('should prepare the hook', async() => {
await hook.prepare(context);
await hook.prepare(context, false);

expect(context.docker.imageBuilder.build).toHaveBeenCalledWith({
cwd: context.cwd,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ export class HookSparqlEndpointLdf implements Hook {
return `jrb-experiment-${Path.basename(context.experimentPaths.root)}-sparql-endpoint-ldf-${type}`;
}

public async prepare(context: ITaskContext): Promise<void> {
public async prepare(context: ITaskContext, forceOverwriteGenerated: boolean): Promise<void> {
// Build server Dockerfile
context.logger.info(`Building LDF server Docker image`);
await context.docker.imageBuilder.build({
Expand All @@ -73,7 +73,7 @@ export class HookSparqlEndpointLdf implements Hook {

// Prepare LDF engine
context.logger.info(`Preparing LDF engine hook`);
await this.hookSparqlEndpointLdfEngine.prepare(context);
await this.hookSparqlEndpointLdfEngine.prepare(context, forceOverwriteGenerated);
}

public async start(context: ITaskContext, options?: IHookStartOptions): Promise<ProcessHandler> {
Expand Down

0 comments on commit 10ee378

Please sign in to comment.