Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Have DuplicateI able to handle underlying files #100

Merged
merged 15 commits into from
Sep 16, 2020
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 88 additions & 45 deletions src/main/java/omero/cmd/graphs/DuplicateI.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
Expand All @@ -53,6 +54,7 @@
import org.slf4j.LoggerFactory;
import org.springframework.context.ApplicationContext;

import com.google.common.base.Joiner;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -122,15 +124,26 @@ private static enum Inclusion {
IGNORE
};

private static enum Location {
/* Files/ */
FILES,
/* Pixels/ */
PIXELS,
/* Thumbnails/ */
THUMBS,
/* ManagedRepository/ */
MANAGED;
}

private final ACLVoter aclVoter;
private final GraphPathBean graphPathBean;
private final Set<Class<? extends IObject>> targetClasses;
private GraphPolicy graphPolicy; /* not final because of adjustGraphPolicy */
private final SetMultimap<String, String> unnullable;
private final Map<Class<? extends IObject>, Function<Long, String>> pathServices;
private final Map<Class<? extends IObject>, Location> locationsForClasses;
private final EnumMap<Location, Function<IObject, Path>> pathServices = new EnumMap<>(Location.class);
private final EnumMap<Location, Long> diskUsage = new EnumMap<>(Location.class);
private final ManagedRepositoryI managedRepository;
private Function<FsFile, Path> managedRepositoryPathfinder;
private Predicate<Path> isPathManaged;

/* retain a reference here because ManagedRepositoryI.setFileCreationListener uses a weak reference */
private Consumer<CheckedPath> fileCreationListener;
Expand Down Expand Up @@ -184,53 +197,79 @@ public DuplicateI(ACLVoter aclVoter, Roles securityRoles, GraphPathBean graphPat
(prefix, file) -> FsFile.concatenate(prefix, file),
(prefix, file) -> file.getPathFrom(prefix));
this.managedRepository = applicationContext.getBean(ManagedRepositoryI.class);
this.pathServices = ImmutableMap.of(
OriginalFile.class, applicationContext.getBean("/OMERO/Files", OriginalFilesService.class)::getFilesPath,
Pixels.class, applicationContext.getBean("/OMERO/Pixels", PixelsService.class)::getPixelsPath,
Thumbnail.class, applicationContext.getBean("/OMERO/Thumbs", ThumbnailService.class)::getThumbnailPath);
this.locationsForClasses = ImmutableMap.of(
OriginalFile.class, Location.FILES,
Pixels.class, Location.PIXELS,
Thumbnail.class, Location.THUMBS);
final Function<Function<Long, String>, Function<IObject, Path>> pathFactory =
new Function<Function<Long, String>, Function<IObject, Path>>() {
@Override
public Function<IObject, Path> apply(Function<Long, String> pathService) {
return obj -> Paths.get(pathService.apply(obj.getId()));
}
};
final Function<Long, String> pathInFiles, pathInPixels, pathInThumbs;
pathInFiles = applicationContext.getBean("/OMERO/Files", OriginalFilesService.class)::getFilesPath;
pathInPixels = applicationContext.getBean("/OMERO/Pixels", PixelsService.class)::getPixelsPath;
pathInThumbs = applicationContext.getBean("/OMERO/Thumbs", ThumbnailService.class)::getThumbnailPath;
this.pathServices.put(Location.FILES, pathFactory.apply(pathInFiles));
this.pathServices.put(Location.PIXELS, pathFactory.apply(pathInPixels));
this.pathServices.put(Location.THUMBS, pathFactory.apply(pathInThumbs));
}

@Override
public Map<String, String> getCallContext() {
return null;
}

/**
* Run {@link Files#copy(Path, Path, java.nio.file.CopyOption...)} and note the disk space used.
* @param location the location of the file in the binary repository
* @param from the file to copy
* @param to where to create the copy
* @throws IOException from {@link Files#copy(Path, Path, java.nio.file.CopyOption...)}
*/
private void copyFile(Location location, Path from, Path to) throws IOException {
Files.copy(from, to);
final long size = Files.size(to);
if (size > 0) {
diskUsage.merge(location, size, (x, y) -> x + y);
}
}

/**
* Create a duplicate of a file on the filesystem.
* @param location the location of the file in the binary repository
* @param from the file to duplicate
* @param to where to create the duplicate
* @throws IOException if the file duplication failed
*/
private void duplicateFile(final Path from, final Path to) throws IOException {
final boolean isFromManaged = isPathManaged.test(from);
final boolean isToManaged = isPathManaged.test(to);
if (isFromManaged && isToManaged) {
private void duplicateFile(Location location, Path from, Path to) throws IOException {
if (location == Location.MANAGED) {
try {
/* safe to create a hard link in managed repository */
Files.createLink(to, from);
LOGGER.debug("linked {} to {}", from, to);
} catch (FileSystemException fse) {
LOGGER.debug("failed to link {} to {}", from, to, fse);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should perhaps be at a higher level. Could it point to some form of misconfiguration?

Additionally, I still see an RFE for disallowing non-symlinks. The necessary rollback is in place in case this block threw an exception, right?

Copy link
Member Author

@mtbc mtbc Sep 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"Limit increase in disk usage" could definitely be a followup PR, though with maybe a subclass of the request type to non-breakingly specify that limit. Rollback should be good but I definitely encouraging testing that!

Update: Also, configuring, "What is linked how or copied." E.g., for synthetic images, not in the managed repository at all.

Copy link
Member Author

@mtbc mtbc Sep 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I could create an issue elaborating a little on the further work I suggest in the PR description. Update: Now #106.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To your other question, I'd expect this case to be most likely to occur for managed repositories that are split across multiple partitions, that's why I left it at DEBUG.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

File copies in the managed repository are now quantified by mtbc@e2eb9bea at INFO level: "file bytes copied ... MANAGED ..."

/* cannot link so fall back to a copy */
Files.copy(from, to);
copyFile(location, from, to);
LOGGER.debug("copied {} to {}", from, to);
}
} else if (isFromManaged || isToManaged) {
throw new IllegalArgumentException("either both or neither of the files may be in the managed repository");
} else {
/* copy rather than link as may violate integrity if different users can write to same file */
Files.copy(from, to);
copyFile(location, from, to);
LOGGER.debug("copied {} to {}", from, to);
}
}

/**
* Get the underlying filesystem path for the given object.
* @param modelObject a model object
* @return the underlying filesystem path for the object, or {@code null} if none exists
* @return the location of the object in the binary repository, or {@code null} if none exists
* @throws GraphException if the object should not be included in duplication
*/
private Path getPath(IObject modelObject) throws GraphException {
private Location getPath(IObject modelObject) throws GraphException {
if (modelObject instanceof OriginalFile) {
/* Files may require special handling. */
final OriginalFile file = (OriginalFile) modelObject;
Expand All @@ -249,7 +288,7 @@ private Path getPath(IObject modelObject) throws GraphException {
throw new GraphException(
"will not duplicate OriginalFile:" + file.getId() + " because it is in the scripts repository");
} else if (managedRepository.getRepoUuid().equals(fileRepo)) {
return managedRepositoryPathfinder.apply(new FsFile(file.getPath() + file.getName()));
return Location.MANAGED;
} else {
throw new GraphException(
"cannot duplicate OriginalFile:" + file.getId() + " because it is in repository " + fileRepo);
Expand All @@ -263,9 +302,11 @@ private Path getPath(IObject modelObject) throws GraphException {
} else {
modelObjectClass = modelObject.getClass();
}
for (final Map.Entry<Class<? extends IObject>, Function<Long, String>> pathServiceByType : pathServices.entrySet()) {
if (pathServiceByType.getKey().isAssignableFrom(modelObjectClass)) {
return Paths.get(pathServiceByType.getValue().apply(modelObject.getId()));
for (final Map.Entry<Class<? extends IObject>, Location> locationForClass : locationsForClasses.entrySet()) {
final Class<? extends IObject> locationClass = locationForClass.getKey();
final Location location = locationForClass.getValue();
if (locationClass.isAssignableFrom(modelObjectClass)) {
return location;
}
}
return null;
Expand Down Expand Up @@ -333,23 +374,13 @@ public boolean test(Class<? extends IObject> modelObject) {
}
fileCreationListener = checked -> filesCreated.add(checked.fsFile.toPath(managedRepositoryRoot));
managedRepository.setFileCreationListener(fileCreationListener);
managedRepositoryPathfinder = file -> file.toPath(managedRepositoryRoot);
isPathManaged = new Predicate<Path>() {

private final int managedRepositoryRootLength = managedRepositoryRoot.getNameCount();

this.pathServices.put(Location.MANAGED, new Function<IObject, Path>() {
@Override
public boolean test(Path file) {
if (file.getNameCount() < managedRepositoryRootLength) {
return false;
} else {
while (file.getNameCount() > managedRepositoryRootLength) {
file = file.getParent();
}
return file.equals(managedRepositoryRoot);
}
public Path apply(IObject object) {
final OriginalFile file = (OriginalFile) object;
return new FsFile(file.getPath() + file.getName()).toPath(managedRepositoryRoot);
}
};
});
}

/**
Expand Down Expand Up @@ -883,9 +914,7 @@ private void noteWrongPaths() throws GraphException {
if (original instanceof Fileset) {
duplicateFileset((Fileset) original, (Fileset) duplicate);
} else if (original instanceof OriginalFile) {
final OriginalFile file = (OriginalFile) original;
final Path path = getPath(file);
if (path != null && isPathManaged.test(path)) {
if (getPath(original) == Location.MANAGED) {
wrongPathDuplicateFiles.add((OriginalFile) duplicate);
}
} else if (original instanceof Pixels) {
Expand Down Expand Up @@ -944,16 +973,24 @@ private void duplicateUnderlyingFiles() throws GraphException {
for (final Map.Entry<IObject, IObject> originalDuplicate : originalsToDuplicates.entrySet()) {
final IObject original = originalDuplicate.getKey();
final IObject duplicate = originalDuplicate.getValue();
final Path originalPath = getPath(original);
if (originalPath == null || !Files.exists(originalPath)) {
final Location originalLocation = getPath(original);
if (originalLocation == null) {
continue;
}
final Path duplicatePath = getPath(duplicate);
if (duplicatePath == null || originalPath.equals(duplicatePath)) {
throw new GraphException("failed to construct file duplicating " + originalPath);
final Path originalPath = pathServices.get(originalLocation).apply(original);
if (!Files.exists(originalPath)) {
continue;
}
final Location duplicateLocation = getPath(duplicate);
if (duplicateLocation != null && originalLocation != duplicateLocation) {
throw new GraphException("file duplicating " + originalPath + " is in a different repository");
}
final Path duplicatePath = pathServices.get(originalLocation).apply(duplicate);
if (originalPath.equals(duplicatePath)) {
throw new GraphException("failed to generate different path for file duplicating " + originalPath);
}
try {
duplicateFile(originalPath, duplicatePath);
duplicateFile(originalLocation, originalPath, duplicatePath);
filesCreated.add(duplicatePath);
} catch (IOException ioe) {
final String message = "failed to duplicate file from " + originalPath + " to " + duplicatePath;
Expand Down Expand Up @@ -1086,6 +1123,12 @@ private void abort() {
@Override
public void finish() {
managedRepository.clearFileCreationListener();
if (diskUsage.isEmpty()) {
LOGGER.info("no file bytes copied");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice. This would be nice in the response, but I assume that's sufficiently troublesome water that it should be postponed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least for this PR I'm trying to avoid adjusting the API. I also figure we might need a round of UX thinking to be happy we're breaking it down in the desired way before exposing it in that form.

} else {
LOGGER.info("file bytes copied: {}", Joiner.on(", ").join(diskUsage.entrySet().stream()
.map(entry -> String.format("%,d into %s", entry.getValue(), entry.getKey())).iterator()));
}
}

@Override
Expand Down