Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
Merge 56b13be into ffd8463
Browse files Browse the repository at this point in the history
  • Loading branch information
markysoft committed May 22, 2018
2 parents ffd8463 + 56b13be commit 4616a66
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 22 deletions.
29 changes: 25 additions & 4 deletions AzureDataService.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ const requireEnv = require('require-environment-variables');
requireEnv(['AZURE_STORAGE_CONNECTION_STRING']);

const azureService = require('./lib/azureService');
const createFilter = require('./lib/createFileVersionFilter');
const createFileVersionFilter = require('./lib/createFileVersionFilter');
const createExpiredFileFilter = require('./lib/createExpiredFileFilter');
const fsHelper = require('./lib/fsHelper');
const getDateFromFilename = require('./lib/getDateFromFilename');
const sortDateDesc = require('./lib/sortByFilenameDateDesc');
const validateConfig = require('./lib/validateConfig');

function getSuffix(startMoment) {
Expand Down Expand Up @@ -51,8 +51,8 @@ class AzureDataService {
}

async getLatestData() {
const filter = createFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter, sortDateDesc);
const filter = createFileVersionFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter);
if (lastScan) {
return this.downloadLatest(lastScan.name, this.localFile);
}
Expand All @@ -76,6 +76,27 @@ class AzureDataService {
this.log.info('Saving summary file in Azure');
await azureService.uploadToAzure(this.containerName, this.localSummaryFile, `${this.outputFile}-${this.summaryFile}${this.getSuffixWithVersion(startMoment)}`);
}

async pruneDataFiles(oldestMoment, files) {
const expiredFileFilter = createExpiredFileFilter(this.outputFile, this.version, oldestMoment);
const expiredFiles = files.filter(expiredFileFilter);

const fileVersionFilter = createFileVersionFilter(this.outputFile, this.version);
const latestData = await azureService.getLatestBlob(this.containerName, fileVersionFilter);
// eslint-disable-next-line no-restricted-syntax
for (const file of expiredFiles) {
// safeguard to stop deleting latest data
if (file.name !== latestData.name) {
// eslint-disable-next-line no-await-in-loop
await azureService.deleteFromAzure(this.containerName, file.name);
}
}
}

async pruneFilesOlderThan(oldestMoment) {
const files = await azureService.listBlobs(this.containerName);
await this.pruneDataFiles(oldestMoment, files);
}
}

module.exports = AzureDataService;
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.3.0 / TBA
==================
- Provide function to purge old files

0.3.0 / 2018-04-25
==================
- Update npm dependencies
Expand Down
4 changes: 3 additions & 1 deletion lib/azureService.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ function deleteFromAzure(containerName, blobName) {
if (!error) {
resolve(result);
} else {
reject(error);
// treat not found as successful delete
// eslint-disable-next-line no-unused-expressions
error.code === 'BlobNotFound' ? resolve(error) : reject(error);
}
});
});
Expand Down
15 changes: 15 additions & 0 deletions lib/createExpiredFileFilter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
const moment = require('moment');

function fileVersionFilter(outputFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-(\\d{8}).*-${version}.json`);
return (file) => {
const match = regex.exec(file.name);
if (match && match[1]) {
const date = moment(match[1], 'YYYYMMDD');
return date.isBefore(oldestDate);
}
return false;
};
}

module.exports = fileVersionFilter;
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "azure-data-service",
"version": "0.3.0",
"version": "0.4.0",
"description": "service to upload and retrieve latest files from azure storage",
"main": "AzureDataService.js",
"repository": "https://github.com/nhsuk/azure-data-service.git",
Expand Down
74 changes: 58 additions & 16 deletions test/integration/azureDataService.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@ const chai = require('chai');
const moment = require('moment');

const AzureDataService = require('../../AzureDataService');
const azureService = require('../../lib/azureService');
const createFileVersionFilter = require('../../lib/createFileVersionFilter');

const expect = chai.expect;
const timeout = 15000;

const version = '0.1';
const outputDir = './test/output';
const outputFile = 'test-data';
const summaryFile = 'summary';
const seedIdFile = 'test-seed-ids';
const containerName = 'data-test';
const stubbedLog = { info: () => { } };

Expand All @@ -19,40 +19,82 @@ const azureDataService = new AzureDataService({
outputFile,
log: stubbedLog,
outputDir,
seedIdFile,
summaryFile,
version,
});

const timeOfEtl = moment('20180319');
const dateString = '20180319';
const timeOfEtl = moment(dateString);

describe('Azure Data Service', function azureDataServiceTest() {
this.timeout(timeout);
describe('upload functions', () => {
it('should upload data', async () => {
await azureDataService.uploadData(timeOfEtl);
});

describe('summary functions', () => {
it('should upload summary', async () => {
await azureDataService.uploadSummary(timeOfEtl);
});
});

it('should upload ids', async () => {
await azureDataService.uploadIds(timeOfEtl);
describe('upload/download functions', () => {
after(async function deleteGeneratedFile() {
this.timeout(timeout);
await azureService.deleteFromAzure(containerName, `${outputFile}-${dateString}-${version}.json`);
await azureService.deleteFromAzure(containerName, `${outputFile}-seed-ids-${dateString}.json`);
await azureService.deleteFromAzure(containerName, `${outputFile}-summary-${dateString}-${version}.json`);
});
});

describe('download functions', () => {
it('should get latest data', async () => {
it('should upload and download latest data', async () => {
await azureDataService.uploadData(timeOfEtl);
const { data, date } = await azureDataService.getLatestData();
expect(data).to.exist;
expect(date).to.exist;
});

it('should get latest ids', async () => {
it('should upload and download latest ids', async () => {
await azureDataService.uploadIds(timeOfEtl);
const { data, date } = await azureDataService.getLatestIds();
expect(data).to.exist;
expect(date).to.exist;
});
});

function uploadOldDateStampedData(date, days) {
return azureService.uploadToAzure(
containerName,
azureDataService.localFile,
`${outputFile}${azureDataService.getSuffixWithVersion(moment(date).subtract(days, 'days'))}`
);
}

describe('prune functions', () => {
after(async function deleteGeneratedFile() {
this.timeout(timeout);
await azureService.deleteFromAzure(containerName, `${outputFile}-20180321-${version}.json`);
await azureService.deleteFromAzure(containerName, `${outputFile}-20180314-${version}.json`);
});

it('should remove files older than date', async () => {
const date = '20180321';
await uploadOldDateStampedData(date, 0);
await uploadOldDateStampedData(date, 7);
await uploadOldDateStampedData(date, 14);
await uploadOldDateStampedData(date, 21);

const oldestDate = moment(date).subtract(7, 'days');
await azureDataService.pruneFilesOlderThan(oldestDate);
const files = await azureService.listBlobs(containerName);
const dataFiles = files.filter(createFileVersionFilter(outputFile, version));
expect(dataFiles.length).to.equal(2);
});

it('should not remove latest file, even if before oldest date', async () => {
const date = '20180321';
await uploadOldDateStampedData(date, 0);
await uploadOldDateStampedData(date, 7);

const oldestDate = moment(date).add(7, 'days');
await azureDataService.pruneFilesOlderThan(oldestDate);
const files = await azureService.listBlobs(containerName);
const dataFiles = files.filter(createFileVersionFilter(outputFile, version));
expect(dataFiles.length).to.equal(1);
});
});
});
9 changes: 9 additions & 0 deletions test/integration/azureService.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ describe('Azure Service', () => {
})
.catch(done);
});

it('should fail silently for delete of missing file', function test(done) {
this.timeout(timeout);
azureService.deleteFromAzure(containerName, 'notAnExistingFile.xml')
.then(() => {
done();
})
.catch(done);
});
});

describe('getLatestBlob', () => {
Expand Down
52 changes: 52 additions & 0 deletions test/unit/createExpiredFileFilter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
const chai = require('chai');
const moment = require('moment');
const createFilter = require('../../lib/createExpiredFileFilter');

const expect = chai.expect;

describe('create createExpiredFileFilter', () => {
it('should return filenames earlier than provided oldest date', () => {
const oldestDate = moment('2018-05-08');
const expired1 = 'test-data-20180319-0.1.json';
const expired2 = 'test-data-20180501-0.1.json';
const files = [
{ name: expired1 },
{ name: expired2 },
{ name: 'test-data-20180508-0.1.json' },
{ name: 'test-data-20180515-0.1.json' },
];

const filter = createFilter('test-data', '0.1', oldestDate);
const result = files.filter(filter);
expect(result.length).to.be.equal(2);
expect(result[0].name).to.be.equal(expired1);
expect(result[1].name).to.be.equal(expired2);
});

it('should return no files if all in date', () => {
const oldestDate = moment('2018-05-08');
const files = [
{ name: 'test-data-20180508-0.1.json' },
{ name: 'test-data-20180515-0.1.json' },
];

const filter = createFilter('test-data', '0.1', oldestDate);
const result = files.filter(filter);
expect(result.length).to.be.equal(0);
});

it('should ignore non date files', () => {
const oldestDate = moment('2018-05-15');
const expiredFile = 'test-data-20180508-0.1.json';
const files = [
{ name: 'test-data.json' },
{ name: expiredFile },
{ name: 'test-data-20180515-0.1.json' },
];

const filter = createFilter('test-data', '0.1', oldestDate);
const result = files.filter(filter);
expect(result.length).to.equal(1);
expect(result[0].name).to.equal(expiredFile);
});
});

0 comments on commit 4616a66

Please sign in to comment.