/
LocalArcRepositoryClient.java
402 lines (371 loc) · 17.3 KB
/
LocalArcRepositoryClient.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
/*
* #%L
* Netarchivesuite - common
* %%
* Copyright (C) 2005 - 2018 The Royal Danish Library,
* the National Library of France and the Austrian National Library.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/
package dk.netarkivet.common.distribute.arcrepository;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import dk.netarkivet.common.distribute.FileRemoteFile;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.utils.ChecksumCalculator;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.batch.BatchLocalFiles;
import dk.netarkivet.common.utils.batch.ChecksumJob;
import dk.netarkivet.common.utils.batch.FileBatchJob;
/**
* A simple implementation of ArcRepositoryClient that just has a number of local directories where it stores its files.
* This class doesn't implement credentials checking or checksum storing!
*/
public class LocalArcRepositoryClient implements ArcRepositoryClient {
/** The logger for this class. */
private static final Logger log = LoggerFactory.getLogger(LocalArcRepositoryClient.class);
/** The default place in classpath where the settings file can be found. */
private static String defaultSettingsClasspath = "dk/netarkivet/common/distribute/arcrepository/"
+ "LocalArcRepositoryClientSettings.xml";
/*
* The static initialiser is called when the class is loaded. It will add default values for all settings defined in
* this class, by loading them from a settings.xml file in classpath.
*/
static {
Settings.addDefaultClasspathSettings(defaultSettingsClasspath);
}
/** List of the directories that we store files in. Non-absolute dirs are relative to the current directory. */
private final List<File> storageDirs = new ArrayList<File>(1);
/** Store the file in the directories designated by this setting. */
private static final String FILE_DIRS = "settings.common.arcrepositoryClient.fileDir";
/** The credentials used to correct data in the archive. */
private static final String CREDENTIALS_SETTING = "settings.archive.bitarchive.thisCredentials";
/** Create a new LocalArcRepositoryClient based on current settings. */
public LocalArcRepositoryClient() {
List<String> fileDirs = Arrays.asList(Settings.getAll(FILE_DIRS));
for (String fileName : fileDirs) {
File f = new File(fileName);
FileUtils.createDir(f);
log.info("directory '{}' is part of this local archive repository", f.getAbsolutePath());
storageDirs.add(f);
}
}
@Override
public void close() {
}
/**
* Store the given file in the ArcRepository. After storing, the file is deleted.
*
* @param file A file to be stored. Must exist.
* @throws IOFailure thrown if store is unsuccessful, or failed to clean up files after the store operation.
* @throws IllegalState if file already exists.
* @throws ArgumentNotValid if file parameter is null or file is not an existing file.
*/
@Override
public void store(File file) throws IOFailure, ArgumentNotValid {
ArgumentNotValid.checkNotNull(file, "File file");
ArgumentNotValid.checkTrue(file.exists(), "File '" + file + "' does not exist");
if (findFile(file.getName()) != null) {
throw new IllegalState("A file with the name '" + file.getName() + " is already stored");
}
for (File dir : storageDirs) {
if (dir.canWrite() && FileUtils.getBytesFree(dir) > file.length()) {
FileUtils.moveFile(file, new File(dir, file.getName()));
return;
}
}
throw new IOFailure("Not enough room for '" + file + "' in any of the dirs " + storageDirs);
}
/**
* Gets a single ARC record out of the ArcRepository.
*
* @param arcfile The name of a file containing the desired record.
* @param index The offset of the desired record in the file
* @return a BitarchiveRecord-object, or null if request times out or object is not found.
* @throws ArgumentNotValid on null or empty filenames, or if index is negative.
* @throws IOFailure If the get operation failed.
*/
@Override
public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid {
ArgumentNotValid.checkNotNullOrEmpty(arcfile, "String arcfile");
ArgumentNotValid.checkNotNegative(index, "long index");
File f = findFile(arcfile);
if (f == null) {
log.warn("File '{}' does not exist. Null BitarchiveRecord returned", arcfile);
return null;
}
return BitarchiveRecord.getBitarchiveRecord(arcfile, f, index);
}
/**
* Retrieves a file from an ArcRepository and places it in a local file.
*
* @param arcfilename Name of the arcfile to retrieve.
* @param replica The bitarchive to retrieve the data from. (Note argument is ignored)
* @param toFile Filename of a place where the file fetched can be put.
* @throws ArgumentNotValid if arcfilename is null or empty, or if toFile is null
* @throws IOFailure if there are problems reading or writing file, or the file with the given arcfilename could not
* be found.
*/
@Override
public void getFile(String arcfilename, Replica replica, File toFile) {
ArgumentNotValid.checkNotNullOrEmpty(arcfilename, "String arcfilename");
ArgumentNotValid.checkNotNull(toFile, "File toFile");
File f = findFile(arcfilename);
if (f != null) {
FileUtils.copyFile(f, toFile);
} else {
throw new IOFailure("File '" + arcfilename + "' does not exist");
}
}
/**
* Runs a batch job on each file in the ArcRepository.
*
* @param job An object that implements the FileBatchJob interface. The initialize() method will be called before
* processing and the finish() method will be called afterwards. The process() method will be called with each File
* entry. An optional function postProcess() allows handling the combined results of the batchjob, e.g. summing the
* results, sorting, etc.
* @param replicaId The archive to execute the job on.
* @param args The arguments for the batchjob. This can be null.
* @return The status of the batch job after it ended.
* @throws ArgumentNotValid If the job is null or the replicaId is either null or the empty string.
* @throws IOFailure If a problem occurs during processing the batchjob.
*/
@Override
public BatchStatus batch(final FileBatchJob job, String replicaId, String... args) throws ArgumentNotValid,
IOFailure {
ArgumentNotValid.checkNotNull(job, "FileBatchJob job");
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
OutputStream os = null;
File resultFile;
try {
resultFile = File.createTempFile("batch", replicaId, FileUtils.getTempDir());
os = new FileOutputStream(resultFile);
List<File> files = new ArrayList<File>();
final FilenameFilter filenameFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
Pattern filenamePattern = job.getFilenamePattern();
return new File(dir, name).isFile()
&& (filenamePattern == null || filenamePattern.matcher(name).matches());
}
};
for (File dir : storageDirs) {
File[] filesInDir = dir.listFiles(filenameFilter);
if (filesInDir != null) {
files.addAll(Arrays.asList(filesInDir));
}
}
BatchLocalFiles batcher = new BatchLocalFiles(files.toArray(new File[files.size()]));
batcher.run(job, os);
} catch (IOException e) {
throw new IOFailure("Cannot perform batch '" + job + "'", e);
} finally {
if (os != null) {
try {
os.close();
} catch (IOException e) {
log.warn("Error closing batch output stream '{}'", os, e);
}
}
}
return new BatchStatus(replicaId, job.getFilesFailed(), job.getNoOfFilesProcessed(), new FileRemoteFile(
resultFile), job.getExceptions());
}
/**
* Updates the administrative data in the ArcRepository for a given file and replica. This implementation does
* nothing.
*
* @param fileName The name of a file stored in the ArcRepository.
* @param bitarchiveId The id of the replica that the administrative data for fileName is wrong for.
* @param newval What the administrative data will be updated to.
*/
@Override
public void updateAdminData(String fileName, String bitarchiveId, ReplicaStoreState newval) {
}
/**
* Updates the checksum kept in the ArcRepository for a given file. It is the responsibility of the ArcRepository
* implementation to ensure that this checksum matches that of the underlying files. This implementation does
* nothing.
*
* @param filename The name of a file stored in the ArcRepository.
* @param checksum The new checksum.
*/
@Override
public void updateAdminChecksum(String filename, String checksum) {
}
/**
* Remove a file from one part of the ArcRepository, retrieving a copy for security purposes. This is typically used
* when repairing a file that has been corrupted.
*
* @param fileName The name of the file to remove.
* @param bitarchiveId The id of the replica from which to remove the file. Not used in this implementation, may be
* null.
* @param checksum The checksum of the file to be removed.
* @param credentials A string that shows that the user is allowed to perform this operation.
* @return A local copy of the file removed.
* @throws ArgumentNotValid On null or empty parameters for fileName, checksum or credentials.
* @throws IOFailure On IO trouble.
* @throws PermissionDenied On wrong MD5 sum or wrong credentials.
*/
@Override
public File removeAndGetFile(String fileName, String bitarchiveId, String checksum, String credentials) {
// Ignores bitarchiveName, checksum, and credentials for now
ArgumentNotValid.checkNotNullOrEmpty(fileName, "String fileName");
ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials");
File file = findFile(fileName);
if (file == null) {
throw new IOFailure("Cannot find file '" + fileName + "'");
}
if (!ChecksumCalculator.calculateMd5(file).equals(checksum)) {
throw new PermissionDenied("Wrong checksum for removing file '" + fileName + "'");
}
if (!credentials.equals(Settings.get(CREDENTIALS_SETTING))) {
throw new PermissionDenied("Wrong credentials for removing file '" + fileName + "'");
}
File copiedTo = null;
try {
copiedTo = File.createTempFile("removeAndGetFile", fileName);
} catch (IOException e) {
throw new IOFailure("Cannot make temp file to copy '" + fileName + "' into", e);
}
FileUtils.moveFile(file, copiedTo);
return copiedTo;
}
/**
* Returns a File object for a filename if it exists in the archive.
*
* @param filename Name of file to find.
* @return A File object for the filename if the file exists, otherwise null.
*/
private File findFile(String filename) {
for (File dir : storageDirs) {
final File file = new File(dir, filename);
if (file.isFile()) {
return file;
}
}
return null;
}
/**
* Method for retrieving the checksums of all the files of the replica.
*
* @param replicaId Inherited dummy argument.
* @return A file containing the names and checksum of all the files in the system.
* @throws ArgumentNotValid If the replicaId is either null or the empty string.
* @throws IOFailure If an unexpected IOException is caught.
*/
@Override
public File getAllChecksums(String replicaId) throws IOFailure, ArgumentNotValid {
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
try {
List<String> checksums = new ArrayList<String>();
// go through the different storageDirs and find files and checksums.
for (File dir : storageDirs) {
// go through all file and calculate the checksum
for (File entry : dir.listFiles()) {
String checksum = ChecksumCalculator.calculateMd5(entry);
String filename = entry.getName();
checksums.add(ChecksumJob.makeLine(filename, checksum));
}
}
// create a file with the results.
File res = File.createTempFile("all", "checksums", FileUtils.getTempDir());
FileUtils.writeCollectionToFile(res, checksums);
return res;
} catch (IOException e) {
throw new IOFailure("Received unexpected IOFailure: ", e);
}
}
/**
* Method for retrieving all the filenames of the replica.
*
* @param replicaId Inherited dummy argument.
* @return A file containing the names of all the files.
* @throws ArgumentNotValid If the replicaId is either null or empty.
* @throws IOFailure If an IOException is caught.
*/
@Override
public File getAllFilenames(String replicaId) throws IOFailure, ArgumentNotValid {
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
List<String> filenames = new ArrayList<String>();
// go through the different storageDirs and put the name of the files
// into the resulting list of filenames.
for (File dir : storageDirs) {
for (String name : dir.list()) {
filenames.add(name);
}
}
try {
File res = File.createTempFile("all", "filenames", FileUtils.getTempDir());
FileUtils.writeCollectionToFile(res, filenames);
return res;
} catch (IOException e) {
throw new IOFailure("Received unexpected IOFailure: ", e);
}
}
/**
* Method for correcting a bad entry. Calls 'removeAndGetFile' followed by 'store'.
*
* @param replicaId Inherited dummy argument.
* @param checksum The checksum of the bad entry.
* @param file The new file to replace the bad entry.
* @param credentials The 'password' to allow changing the archive.
* @return The bad entry file.
* @throws ArgumentNotValid If one of the arguments are null, or if a string is empty.
* @throws PermissionDenied If the credentials or checksum are invalid.
*/
@Override
public File correct(String replicaId, String checksum, File file, String credentials) throws ArgumentNotValid,
PermissionDenied {
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
ArgumentNotValid.checkNotNullOrEmpty(checksum, "String checksum");
ArgumentNotValid.checkNotNull(file, "File file");
ArgumentNotValid.checkNotNullOrEmpty(credentials, "String credentials");
// remove bad file.
File res = removeAndGetFile(file.getName(), replicaId, checksum, credentials);
// store good new file.
store(file);
// return bad file.
return res;
}
/**
* Method for finding the checksum of a file.
*
* @param replicaId Inherited dummy variable.
* @param filename The name of the file to calculate the checksum.
* @return The checksum of the file, or the empty string if the file was not found or an error occurred.
* @throws ArgumentNotValid If the replicaId or the filename is either null or the empty string.
*/
@Override
public String getChecksum(String replicaId, String filename) throws ArgumentNotValid {
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
ArgumentNotValid.checkNotNullOrEmpty(filename, "String filename");
return ChecksumCalculator.calculateMd5(findFile(filename));
}
}