Skip to content

Commit

Permalink
Initial work on BitmagArcRepositoryClient based on earlier work for t…
Browse files Browse the repository at this point in the history
…he Yggdrasil project
  • Loading branch information
Søren Vejrup Carlsen committed Aug 28, 2015
1 parent d4c529b commit 753b556
Show file tree
Hide file tree
Showing 6 changed files with 997 additions and 3 deletions.
18 changes: 15 additions & 3 deletions common/common-core/pom.xml
Expand Up @@ -11,8 +11,21 @@
<artifactId>common-core</artifactId>

<name>NetarchiveSuite - common - core</name>

<dependencies>
<properties>
<bitrepository.version>1.3.0.2</bitrepository.version>
</properties>

<dependencies>
<dependency>
<groupId>org.bitrepository.reference</groupId>
<artifactId>bitrepository-core</artifactId>
<version>${bitrepository.version}</version>
</dependency>
<dependency>
<groupId>org.bitrepository.reference</groupId>
<artifactId>bitrepository-client</artifactId>
<version>${bitrepository.version}</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down Expand Up @@ -214,7 +227,6 @@
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
Expand Down
@@ -0,0 +1,363 @@
/*
* #%L
* Netarchivesuite - common
* %%
* Copyright (C) 2005 - 2014 The Royal Danish Library, the Danish State and University Library,
* the National Library of France and the Austrian National Library.
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Lesser Public License for more details.
*
* You should have received a copy of the GNU General Lesser Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/lgpl-2.1.html>.
* #L%
*/

package dk.netarkivet.common.distribute.arcrepository;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;

import org.archive.io.ArchiveReader;
import org.archive.io.ArchiveReaderFactory;
import org.archive.io.ArchiveRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import dk.netarkivet.common.distribute.FileRemoteFile;
import dk.netarkivet.common.exceptions.ArgumentNotValid;
import dk.netarkivet.common.exceptions.IOFailure;
import dk.netarkivet.common.exceptions.NotImplementedException;
import dk.netarkivet.common.exceptions.IllegalState;
import dk.netarkivet.common.exceptions.PermissionDenied;
import dk.netarkivet.common.utils.ChecksumCalculator;
import dk.netarkivet.common.utils.FileUtils;
import dk.netarkivet.common.utils.Settings;
import dk.netarkivet.common.utils.batch.BatchLocalFiles;
import dk.netarkivet.common.utils.batch.ChecksumJob;
import dk.netarkivet.common.utils.batch.FileBatchJob;

/**
* A implementation of ArcRepositoryClient that gets its data from a bitrepository system.
* The batchjobs are executed locally, after fetching the data from the bitrepository.
* This is only meant for internal NAS-usage
*
* Only the store, get(?), and getFile, and batch will be implemented
*
*/
public class BitmagArcRepositoryClient implements ArcRepositoryClient {

/** The logger for this class. */
private static final Logger log = LoggerFactory.getLogger(BitmagArcRepositoryClient.class);

/** The default place in classpath where the settings file can be found. */
private static String defaultSettingsClasspath = "dk/netarkivet/common/distribute/arcrepository/"
+ "BitmagArcRepositoryClientSettings.xml";

/*
* The static initialiser is called when the class is loaded. It will add default values for all settings defined in
* this class, by loading them from a settings.xml file in classpath.
*/
static {
Settings.addDefaultClasspathSettings(defaultSettingsClasspath);
}

private static final String ARCREPOSITORY_TEMPDIR = "settings.common.arcrepositoryClient.tempdir";

private static final String BITREPOSITORY_SETTINGS_DIR = "settings.common.arcrepositoryClient.bitrepositorySettingsDir";

private static final String BITREPOSITORY_KEYFILE = "settings.common.arcrepositoryClient.keyfile"; //TODO necessary? optional so we don't force the user to use credentials.

private static final String BITREPOSITORY_STORE_MAX_PILLAR_FAILURES = "settings.common.arcrepositoryClient.storeMaxPillarFailures"; //TODO necessary?

private static final String BITREPOSITORY_COLLECTIONID = "settings.common.arcrepositoryClient.collectionid";


/** Create a new BitmagArcRepositoryClient based on current settings. */
public BitmagArcRepositoryClient() {

}

@Override
public void close() {
}

/**
* Store the given file in the ArcRepository. After storing, the file is deleted.
*
* @param file A file to be stored. Must exist.
* @throws IOFailure thrown if store is unsuccessful, or failed to clean up files after the store operation.
* @throws IllegalState if file already exists.
* @throws ArgumentNotValid if file parameter is null or file is not an existing file.
*/
@Override
public void store(File file) throws IOFailure, ArgumentNotValid {
ArgumentNotValid.checkNotNull(file, "File file");
ArgumentNotValid.checkTrue(file.exists(), "File '" + file + "' does not exist");

// Check if file already exists

// upload file



}

/**
* Gets a single ARC record out of the ArcRepository.
*
* @param arcfile The name of a file containing the desired record.
* @param index The offset of the desired record in the file
* @return a BitarchiveRecord-object, or null if request times out or object is not found.
* @throws ArgumentNotValid on null or empty filenames, or if index is negative.
* @throws IOFailure If the get operation failed.
*/
@Override
public BitarchiveRecord get(String arcfile, long index) throws ArgumentNotValid {
ArgumentNotValid.checkNotNullOrEmpty(arcfile, "String arcfile");
ArgumentNotValid.checkNotNegative(index, "long index");
//FIXME
// Initial implementation. fetch file, and retrieve the record on the local file.
return null;
/*
File f = findFile(arcfile);
if (f == null) {
log.warn("File '{}' does not exist. Null BitarchiveRecord returned", arcfile);
return null;
}
ArchiveReader reader = null;
ArchiveRecord record = null;
try {
reader = ArchiveReaderFactory.get(f, index);
record = reader.get();
return new BitarchiveRecord(record, arcfile);
} catch (IOException e) {
throw new IOFailure("Error reading record from '" + arcfile + "' offset " + index, e);
} finally {
if (record != null) {
try {
record.close();
} catch (IOException e) {
log.warn("Error closing ARC record '{}'", record, e);
}
}
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
log.warn("Error closing ARC reader '{}'", reader, e);
}
}
}
*/
}

/**
* Retrieves a file from an ArcRepository and places it in a local file.
*
* @param arcfilename Name of the arcfile to retrieve.
* @param replica The bitarchive to retrieve the data from. (Note argument is ignored)
* @param toFile Filename of a place where the file fetched can be put.
* @throws ArgumentNotValid if arcfilename is null or empty, or if toFile is null
* @throws IOFailure if there are problems reading or writing file, or the file with the given arcfilename could not
* be found.
*/
@Override
public void getFile(String arcfilename, Replica replica, File toFile) {
ArgumentNotValid.checkNotNullOrEmpty(arcfilename, "String arcfilename");
ArgumentNotValid.checkNotNull(toFile, "File toFile");
//FIXME
/*
File f = findFile(arcfilename);
if (f != null) {
FileUtils.copyFile(f, toFile);
} else {
throw new IOFailure("File '" + arcfilename + "' does not exist");
}
*/
}

/**
* Runs a batch job on each file in the ArcRepository.
*
* @param job An object that implements the FileBatchJob interface. The initialize() method will be called before
* processing and the finish() method will be called afterwards. The process() method will be called with each File
* entry. An optional function postProcess() allows handling the combined results of the batchjob, e.g. summing the
* results, sorting, etc.
* @param replicaId The archive to execute the job on.
* @param args The arguments for the batchjob. This can be null.
* @return The status of the batch job after it ended.
* @throws ArgumentNotValid If the job is null or the replicaId is either null or the empty string.
* @throws IOFailure If a problem occurs during processing the batchjob.
*/
@Override
public BatchStatus batch(final FileBatchJob job, String replicaId, String... args) throws ArgumentNotValid,
IOFailure {
ArgumentNotValid.checkNotNull(job, "FileBatchJob job");
ArgumentNotValid.checkNotNullOrEmpty(replicaId, "String replicaId");
//FIXME
// Use this pattern to request the fileIds to match this pattern
// and then fetch the matching files to local storage
//Pattern filenamePattern = job.getFilenamePattern;


OutputStream os = null;
File resultFile;
try {
resultFile = File.createTempFile("batch", replicaId, FileUtils.getTempDir());
os = new FileOutputStream(resultFile);
List<File> files = new ArrayList<File>();
final FilenameFilter filenameFilter = new FilenameFilter() {
public boolean accept(File dir, String name) {
Pattern filenamePattern = job.getFilenamePattern();
return new File(dir, name).isFile()
&& (filenamePattern == null || filenamePattern.matcher(name).matches());
}
};
/*
for (File dir : storageDirs) {
File[] filesInDir = dir.listFiles(filenameFilter);
if (filesInDir != null) {
files.addAll(Arrays.asList(filesInDir));
}
}
*/

BatchLocalFiles batcher = new BatchLocalFiles(files.toArray(new File[files.size()]));
batcher.run(job, os);
} catch (IOException e) {
throw new IOFailure("Cannot perform batch '" + job + "'", e);
} finally {
if (os != null) {
try {
os.close();
} catch (IOException e) {
log.warn("Error closing batch output stream '{}'", os, e);
}
}
}
return new BatchStatus(replicaId, job.getFilesFailed(), job.getNoOfFilesProcessed(), new FileRemoteFile(
resultFile), job.getExceptions());
}


/////////////////// The rest of the API is not implemented for the bitrepository system ///////////////////////////

/**
* Updates the administrative data in the ArcRepository for a given file and replica. This implementation does
* nothing.
*
* @param fileName The name of a file stored in the ArcRepository.
* @param bitarchiveId The id of the replica that the administrative data for fileName is wrong for.
* @param newval What the administrative data will be updated to.
*/
@Override
public void updateAdminData(String fileName, String bitarchiveId, ReplicaStoreState newval) {
throw new NotImplementedException("UpdateAdminData is not implemented here");
}

/**
* Updates the checksum kept in the ArcRepository for a given file. It is the responsibility of the ArcRepository
* implementation to ensure that this checksum matches that of the underlying files. This implementation does
* nothing.
*
* @param filename The name of a file stored in the ArcRepository.
* @param checksum The new checksum.
*/
@Override
public void updateAdminChecksum(String filename, String checksum) {
throw new NotImplementedException("UpdateAdminChecksum is not implemented here");
}

/**
* Remove a file from one part of the ArcRepository, retrieving a copy for security purposes. This is typically used
* when repairing a file that has been corrupted.
*
* @param fileName The name of the file to remove.
* @param bitarchiveId The id of the replica from which to remove the file. Not used in this implementation, may be
* null.
* @param checksum The checksum of the file to be removed.
* @param credentials A string that shows that the user is allowed to perform this operation.
* @return A local copy of the file removed.
* @throws ArgumentNotValid On null or empty parameters for fileName, checksum or credentials.
* @throws IOFailure On IO trouble.
* @throws PermissionDenied On wrong MD5 sum or wrong credentials.
*/
@Override
public File removeAndGetFile(String fileName, String bitarchiveId, String checksum, String credentials) {
throw new NotImplementedException("removeAndGetFile is not implemented here");
}

/**
* Method for retrieving the checksums of all the files of the replica.
*
* @param replicaId Inherited dummy argument.
* @return A file containing the names and checksum of all the files in the system.
* @throws ArgumentNotValid If the replicaId is either null or the empty string.
* @throws IOFailure If an unexpected IOException is caught.
*/
@Override
public File getAllChecksums(String replicaId) throws IOFailure, ArgumentNotValid {
throw new NotImplementedException("getAllChecksums is not implemented here");
}

/**
* Method for retrieving all the filenames of the replica.
*
* @param replicaId Inherited dummy argument.
* @return A file containing the names of all the files.
* @throws ArgumentNotValid If the replicaId is either null or empty.
* @throws IOFailure If an IOException is caught.
*/
@Override
public File getAllFilenames(String replicaId) throws IOFailure, ArgumentNotValid {
throw new NotImplementedException("getAllFilenames is not implemented here");
}

/**
* Method for correcting a bad entry. Calls 'removeAndGetFile' followed by 'store'.
*
* @param replicaId Inherited dummy argument.
* @param checksum The checksum of the bad entry.
* @param file The new file to replace the bad entry.
* @param credentials The 'password' to allow changing the archive.
* @return The bad entry file.
* @throws ArgumentNotValid If one of the arguments are null, or if a string is empty.
* @throws PermissionDenied If the credentials or checksum are invalid.
*/
@Override
public File correct(String replicaId, String checksum, File file, String credentials) throws ArgumentNotValid,
PermissionDenied {
throw new NotImplementedException("correct is not implemented here");
}

/**
* Method for finding the checksum of a file.
*
* @param replicaId Inherited dummy variable.
* @param filename The name of the file to calculate the checksum.
* @return The checksum of the file, or the empty string if the file was not found or an error occurred.
* @throws ArgumentNotValid If the replicaId or the filename is either null or the empty string.
*/
@Override
public String getChecksum(String replicaId, String filename) throws ArgumentNotValid {
throw new NotImplementedException("getChecksum is not implemented here");
}

}

0 comments on commit 753b556

Please sign in to comment.