Skip to content
This repository has been archived by the owner on Mar 19, 2024. It is now read-only.

Commit

Permalink
Merge 2d951f9 into ffd8463
Browse files Browse the repository at this point in the history
  • Loading branch information
markysoft authored May 29, 2018
2 parents ffd8463 + 2d951f9 commit 570f05e
Show file tree
Hide file tree
Showing 11 changed files with 334 additions and 34 deletions.
64 changes: 55 additions & 9 deletions AzureDataService.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,11 @@ const requireEnv = require('require-environment-variables');
requireEnv(['AZURE_STORAGE_CONNECTION_STRING']);

const azureService = require('./lib/azureService');
const createFilter = require('./lib/createFileVersionFilter');
const filters = require('./lib/filters');
const fsHelper = require('./lib/fsHelper');
const getDateFromFilename = require('./lib/getDateFromFilename');
const sortDateDesc = require('./lib/sortByFilenameDateDesc');
const validateConfig = require('./lib/validateConfig');

function getSuffix(startMoment) {
return `-${startMoment.format('YYYYMMDD')}.json`;
}

class AzureDataService {
constructor(config) {
this.log = config.log;
Expand All @@ -28,6 +23,11 @@ class AzureDataService {
validateConfig(this);
}

// eslint-disable-next-line class-methods-use-this
getSuffix(startMoment) {
return `-${startMoment.format('YYYYMMDD')}.json`;
}

getSuffixWithVersion(startMoment) {
return `-${startMoment.format('YYYYMMDD')}-${this.version}.json`;
}
Expand All @@ -51,8 +51,8 @@ class AzureDataService {
}

async getLatestData() {
const filter = createFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter, sortDateDesc);
const filter = filters.createFileVersionFilter(this.outputFile, this.version);
const lastScan = await azureService.getLatestBlob(this.containerName, filter);
if (lastScan) {
return this.downloadLatest(lastScan.name, this.localFile);
}
Expand All @@ -69,13 +69,59 @@ class AzureDataService {

async uploadIds(startMoment) {
this.log.info(`Saving date stamped version of '${this.seedIdFile}' in Azure`);
await azureService.uploadToAzure(this.containerName, this.localSeedIdFile, `${this.seedIdFile}${getSuffix(startMoment)}`);
await azureService.uploadToAzure(this.containerName, this.localSeedIdFile, `${this.seedIdFile}${this.getSuffix(startMoment)}`);
}

async uploadSummary(startMoment) {
this.log.info('Saving summary file in Azure');
await azureService.uploadToAzure(this.containerName, this.localSummaryFile, `${this.outputFile}-${this.summaryFile}${this.getSuffixWithVersion(startMoment)}`);
}

async pruneDataFiles(oldestMoment, files) {
const filter = filters.createExpiredDataFilter(this.outputFile, this.version, oldestMoment);
const fileVersionFilter = filters.createFileVersionFilter(this.outputFile, this.version);
await this.pruneExpiredFiles(files, filter, fileVersionFilter);
}

async pruneIdListFiles(oldestMoment, files) {
const filter = filters.createExpiredIdListFilter(this.outputFile, this.version, oldestMoment);
const latestFilter = filters.createIdListFilter(this.seedIdFile);
await this.pruneExpiredFiles(files, filter, latestFilter);
}

async pruneSummaryFiles(oldestMoment, files) {
const filter = filters.createExpiredSummaryFilter(
this.outputFile,
this.summaryFile,
this.version, oldestMoment
);
const latestFilter = filters.createSummaryFileFilter(this.outputFile, this.summaryFile);
await this.pruneExpiredFiles(files, filter, latestFilter);
}

async pruneExpiredFiles(files, filter, latestFilter) {
const expiredFiles = files.filter(filter);

const latest = await azureService.getLatestBlob(this.containerName, latestFilter);
// eslint-disable-next-line no-restricted-syntax
for (const file of expiredFiles) {
// safeguard to stop deleting latest data
if (!latest || file.name !== latest.name) {
// eslint-disable-next-line no-await-in-loop
await azureService.deleteFromAzure(this.containerName, file.name);
}
}
}

async pruneFilesOlderThan(oldestMoment) {
const files = await azureService.listBlobs(this.containerName);
if (files) {
await this.pruneDataFiles(oldestMoment, files);
await this.pruneIdListFiles(oldestMoment, files);
await this.pruneDataFiles(oldestMoment, files);
await this.pruneSummaryFiles(oldestMoment, files);
}
}
}

module.exports = AzureDataService;
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
0.3.0 / TBA
==================
- Provide function to purge old files

0.3.0 / 2018-04-25
==================
- Update npm dependencies
Expand Down
4 changes: 3 additions & 1 deletion lib/azureService.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ function deleteFromAzure(containerName, blobName) {
if (!error) {
resolve(result);
} else {
reject(error);
// treat not found as successful delete
// eslint-disable-next-line no-unused-expressions
error.code === 'BlobNotFound' ? resolve(error) : reject(error);
}
});
});
Expand Down
6 changes: 0 additions & 6 deletions lib/createFileVersionFilter.js

This file was deleted.

44 changes: 44 additions & 0 deletions lib/filters.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
const moment = require('moment');

function dateExpired(regex, file, oldestDate) {
const match = regex.exec(file.name);
if (match && match[1]) {
const date = moment(match[1], 'YYYYMMDD');
return date.isBefore(oldestDate);
}
return false;
}

function createExpiredDataFilter(outputFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-(\\d{8})-${version}.json`);
return file => dateExpired(regex, file, oldestDate);
}
function createExpiredSummaryFilter(outputFile, summaryFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-${summaryFile}-(\\d{8})-${version}.json`);
return file => dateExpired(regex, file, oldestDate);
}

function createExpiredIdListFilter(outputFile, version, oldestDate) {
const regex = new RegExp(`^${outputFile}-seed-ids-(\\d{8}).json`);
return file => dateExpired(regex, file, oldestDate);
}
function createFileVersionFilter(outputFile, version) {
const regex = new RegExp(`^${outputFile}-\\d{8}.*-${version}.json`);
return file => file.name.match(regex);
}
function createIdListFilter(seedIdFile) {
return file => file.name.startsWith(`${seedIdFile}-`);
}

function createSummaryFileFilter(outputFile, summaryFile) {
return file => file.name.startsWith(`${outputFile}-${summaryFile}`);
}

module.exports = {
createExpiredDataFilter,
createExpiredIdListFilter,
createExpiredSummaryFilter,
createFileVersionFilter,
createIdListFilter,
createSummaryFileFilter,
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "azure-data-service",
"version": "0.3.0",
"version": "0.4.0",
"description": "service to upload and retrieve latest files from azure storage",
"main": "AzureDataService.js",
"repository": "https://github.com/nhsuk/azure-data-service.git",
Expand Down
25 changes: 25 additions & 0 deletions prune-sample.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
const moment = require('moment');
const AzureDataService = require('./AzureDataService');

const stubbedLog = { info: () => { } };

async function run() {
const azureDataService = new AzureDataService({
containerName: 'etl-output',
log: stubbedLog,
outputDir: './output',
outputFile: 'pharmacy-data',
version: '0.8',
});
try {
await azureDataService.pruneFilesOlderThan(moment());
} catch (ex) {
throw ex;
}
}

async function wait() {
await run();
}

wait();
Loading

0 comments on commit 570f05e

Please sign in to comment.