Skip to content

Commit

Permalink
Adding HardLink Support to VirtualFileSystem.
Browse files Browse the repository at this point in the history
Summary:
Added support of creating a hardlink from one file to another file.
After a hardlink is added between two files, both file will have the same:
  1. UniqueID (inode)
  2. Size
  3. Buffer

This will bring replay of compilation closer to the actual compilation. There are instances where clang checks for the UniqueID of the file/header to be loaded which leads to a different behavior during replay as all files have different UniqueIDs.

Patch by Utkarsh Saxena!

Reviewers: ilya-biryukov

Reviewed By: ilya-biryukov

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D51359

llvm-svn: 341366
  • Loading branch information
ilya-biryukov committed Sep 4, 2018
1 parent 6cb1244 commit d5554c5
Show file tree
Hide file tree
Showing 3 changed files with 257 additions and 49 deletions.
24 changes: 24 additions & 0 deletions clang/include/clang/Basic/VirtualFileSystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ class OverlayFileSystem : public FileSystem {
namespace detail {

class InMemoryDirectory;
class InMemoryFile;

} // namespace detail

Expand All @@ -332,6 +333,15 @@ class InMemoryFileSystem : public FileSystem {
std::string WorkingDirectory;
bool UseNormalizedPaths = true;

/// If HardLinkTarget is non-null, a hardlink is created to the To path which
/// must be a file. If it is null then it adds the file as the public addFile.
bool addFile(const Twine &Path, time_t ModificationTime,
std::unique_ptr<llvm::MemoryBuffer> Buffer,
Optional<uint32_t> User, Optional<uint32_t> Group,
Optional<llvm::sys::fs::file_type> Type,
Optional<llvm::sys::fs::perms> Perms,
const detail::InMemoryFile *HardLinkTarget);

public:
explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
~InMemoryFileSystem() override;
Expand All @@ -348,6 +358,20 @@ class InMemoryFileSystem : public FileSystem {
Optional<llvm::sys::fs::file_type> Type = None,
Optional<llvm::sys::fs::perms> Perms = None);

/// Add a hard link to a file.
/// Here hard links are not intended to be fully equivalent to the classical
/// filesystem. Both the hard link and the file share the same buffer and
/// status (and thus have the same UniqueID). Because of this there is no way
/// to distinguish between the link and the file after the link has been
/// added.
///
/// The To path must be an existing file or a hardlink. The From file must not
/// have been added before. The To Path must not be a directory. The From Node
/// is added as a hard link which points to the resolved file of To Node.
/// \return true if the above condition is satisfied and hardlink was
/// successfully created, false otherwise.
bool addHardLink(const Twine &From, const Twine &To);

/// Add a buffer to the VFS with a path. The VFS does not own the buffer.
/// If present, User, Group, Type and Perms apply to the newly-created file
/// or directory.
Expand Down
168 changes: 119 additions & 49 deletions clang/lib/Basic/VirtualFileSystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
Expand All @@ -25,9 +26,9 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
Expand Down Expand Up @@ -466,70 +467,82 @@ namespace vfs {

namespace detail {

enum InMemoryNodeKind { IME_File, IME_Directory };
enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };

/// The in memory file system is a tree of Nodes. Every node can either be a
/// file or a directory.
/// file , hardlink or a directory.
class InMemoryNode {
Status Stat;
InMemoryNodeKind Kind;

protected:
/// Return Stat. This should only be used for internal/debugging use. When
/// clients wants the Status of this node, they should use
/// \p getStatus(StringRef).
const Status &getStatus() const { return Stat; }
std::string FileName;

public:
InMemoryNode(Status Stat, InMemoryNodeKind Kind)
: Stat(std::move(Stat)), Kind(Kind) {}
InMemoryNode(llvm::StringRef FileName, InMemoryNodeKind Kind)
: Kind(Kind), FileName(llvm::sys::path::filename(FileName)) {}
virtual ~InMemoryNode() = default;

/// Get the filename of this node (the name without the directory part).
StringRef getFileName() const { return FileName; }
InMemoryNodeKind getKind() const { return Kind; }
virtual std::string toString(unsigned Indent) const = 0;
};

class InMemoryFile : public InMemoryNode {
Status Stat;
std::unique_ptr<llvm::MemoryBuffer> Buffer;

public:
InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
: InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)),
Buffer(std::move(Buffer)) {}

/// Return the \p Status for this node. \p RequestedName should be the name
/// through which the caller referred to this node. It will override
/// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
Status getStatus(StringRef RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }

/// Get the filename of this node (the name without the directory part).
StringRef getFileName() const {
return llvm::sys::path::filename(Stat.getName());
std::string toString(unsigned Indent) const override {
return (std::string(Indent, ' ') + Stat.getName() + "\n").str();
}

static bool classof(const InMemoryNode *N) {
return N->getKind() == IME_File;
}
InMemoryNodeKind getKind() const { return Kind; }
virtual std::string toString(unsigned Indent) const = 0;
};

namespace {

class InMemoryFile : public InMemoryNode {
std::unique_ptr<llvm::MemoryBuffer> Buffer;
class InMemoryHardLink : public InMemoryNode {
const InMemoryFile &ResolvedFile;

public:
InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
: InMemoryNode(std::move(Stat), IME_File), Buffer(std::move(Buffer)) {}

llvm::MemoryBuffer *getBuffer() { return Buffer.get(); }
InMemoryHardLink(StringRef Path, const InMemoryFile &ResolvedFile)
: InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {}
const InMemoryFile &getResolvedFile() const { return ResolvedFile; }

std::string toString(unsigned Indent) const override {
return (std::string(Indent, ' ') + getStatus().getName() + "\n").str();
return std::string(Indent, ' ') + "HardLink to -> " +
ResolvedFile.toString(0);
}

static bool classof(const InMemoryNode *N) {
return N->getKind() == IME_File;
return N->getKind() == IME_HardLink;
}
};

/// Adapt a InMemoryFile for VFS' File interface. The goal is to make
/// \p InMemoryFileAdaptor mimic as much as possible the behavior of
/// \p RealFile.
class InMemoryFileAdaptor : public File {
InMemoryFile &Node;
const InMemoryFile &Node;
/// The name to use when returning a Status for this file.
std::string RequestedName;

public:
explicit InMemoryFileAdaptor(InMemoryFile &Node, std::string RequestedName)
explicit InMemoryFileAdaptor(const InMemoryFile &Node,
std::string RequestedName)
: Node(Node), RequestedName(std::move(RequestedName)) {}

llvm::ErrorOr<Status> status() override {
Expand All @@ -546,16 +559,22 @@ class InMemoryFileAdaptor : public File {

std::error_code close() override { return {}; }
};

} // namespace

class InMemoryDirectory : public InMemoryNode {
Status Stat;
std::map<std::string, std::unique_ptr<InMemoryNode>> Entries;

public:
InMemoryDirectory(Status Stat)
: InMemoryNode(std::move(Stat), IME_Directory) {}
: InMemoryNode(Stat.getName(), IME_Directory), Stat(std::move(Stat)) {}

/// Return the \p Status for this node. \p RequestedName should be the name
/// through which the caller referred to this node. It will override
/// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
Status getStatus(StringRef RequestedName) const {
return Status::copyWithNewName(Stat, RequestedName);
}
InMemoryNode *getChild(StringRef Name) {
auto I = Entries.find(Name);
if (I != Entries.end())
Expand All @@ -575,7 +594,7 @@ class InMemoryDirectory : public InMemoryNode {

std::string toString(unsigned Indent) const override {
std::string Result =
(std::string(Indent, ' ') + getStatus().getName() + "\n").str();
(std::string(Indent, ' ') + Stat.getName() + "\n").str();
for (const auto &Entry : Entries)
Result += Entry.second->toString(Indent + 2);
return Result;
Expand All @@ -586,6 +605,17 @@ class InMemoryDirectory : public InMemoryNode {
}
};

namespace {
Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
return Dir->getStatus(RequestedName);
if (auto File = dyn_cast<detail::InMemoryFile>(Node))
return File->getStatus(RequestedName);
if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node))
return Link->getResolvedFile().getStatus(RequestedName);
llvm_unreachable("Unknown node type");
}
} // namespace
} // namespace detail

InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
Expand All @@ -606,7 +636,8 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Optional<uint32_t> User,
Optional<uint32_t> Group,
Optional<llvm::sys::fs::file_type> Type,
Optional<llvm::sys::fs::perms> Perms) {
Optional<llvm::sys::fs::perms> Perms,
const detail::InMemoryFile *HardLinkTarget) {
SmallString<128> Path;
P.toVector(Path);

Expand All @@ -627,6 +658,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
const auto ResolvedGroup = Group.getValueOr(0);
const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer");
// Any intermediate directories we create should be accessible by
// the owner, even if Perms says otherwise for the final path.
const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
Expand All @@ -636,17 +668,22 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
++I;
if (!Node) {
if (I == E) {
// End of the path, create a new file or directory.
Status Stat(P.str(), getNextVirtualUniqueID(),
llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
ResolvedPerms);
// End of the path.
std::unique_ptr<detail::InMemoryNode> Child;
if (ResolvedType == sys::fs::file_type::directory_file) {
Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
} else {
Child.reset(new detail::InMemoryFile(std::move(Stat),
std::move(Buffer)));
if (HardLinkTarget)
Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
else {
// Create a new file or directory.
Status Stat(P.str(), getNextVirtualUniqueID(),
llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
ResolvedPerms);
if (ResolvedType == sys::fs::file_type::directory_file) {
Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
} else {
Child.reset(
new detail::InMemoryFile(std::move(Stat), std::move(Buffer)));
}
}
Dir->addChild(Name, std::move(Child));
return true;
Expand All @@ -656,8 +693,8 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
Status Stat(
StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
ResolvedUser, ResolvedGroup, Buffer->getBufferSize(),
sys::fs::file_type::directory_file, NewDirectoryPerms);
ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
NewDirectoryPerms);
Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
continue;
Expand All @@ -666,20 +703,35 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
Dir = NewDir;
} else {
assert(isa<detail::InMemoryFile>(Node) &&
"Must be either file or directory!");
assert((isa<detail::InMemoryFile>(Node) ||
isa<detail::InMemoryHardLink>(Node)) &&
"Must be either file, hardlink or directory!");

// Trying to insert a directory in place of a file.
if (I != E)
return false;

// Return false only if the new file is different from the existing one.
if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
return Link->getResolvedFile().getBuffer()->getBuffer() ==
Buffer->getBuffer();
}
return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
Buffer->getBuffer();
}
}
}

bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
std::unique_ptr<llvm::MemoryBuffer> Buffer,
Optional<uint32_t> User,
Optional<uint32_t> Group,
Optional<llvm::sys::fs::file_type> Type,
Optional<llvm::sys::fs::perms> Perms) {
return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type,
Perms, /*HardLinkTarget=*/nullptr);
}

bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
llvm::MemoryBuffer *Buffer,
Optional<uint32_t> User,
Expand All @@ -693,7 +745,7 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
std::move(Perms));
}

static ErrorOr<detail::InMemoryNode *>
static ErrorOr<const detail::InMemoryNode *>
lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
const Twine &P) {
SmallString<128> Path;
Expand Down Expand Up @@ -724,17 +776,35 @@ lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
return errc::no_such_file_or_directory;
}

// If Node is HardLink then return the resolved file.
if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
if (I == E)
return &File->getResolvedFile();
return errc::no_such_file_or_directory;
}
// Traverse directories.
Dir = cast<detail::InMemoryDirectory>(Node);
if (I == E)
return Dir;
}
}

bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
const Twine &ToPath) {
auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath);
auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath);
// FromPath must not have been added before. ToPath must have been added
// before. Resolved ToPath must be a File.
if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
return false;
return this->addFile(FromPath, 0, nullptr, None, None, None, None,
cast<detail::InMemoryFile>(*ToNode));
}

llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
auto Node = lookupInMemoryNode(*this, Root.get(), Path);
if (Node)
return (*Node)->getStatus(Path.str());
return detail::getNodeStatus(*Node, Path.str());
return Node.getError();
}

Expand Down Expand Up @@ -766,7 +836,7 @@ class InMemoryDirIterator : public clang::vfs::detail::DirIterImpl {
if (I != E) {
SmallString<256> Path(RequestedDirName);
llvm::sys::path::append(Path, I->second->getFileName());
CurrentEntry = I->second->getStatus(Path);
CurrentEntry = detail::getNodeStatus(I->second.get(), Path);
} else {
// When we're at the end, make CurrentEntry invalid and DirIterImpl will
// do the rest.
Expand All @@ -777,7 +847,7 @@ class InMemoryDirIterator : public clang::vfs::detail::DirIterImpl {
public:
InMemoryDirIterator() = default;

explicit InMemoryDirIterator(detail::InMemoryDirectory &Dir,
explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir,
std::string RequestedDirName)
: I(Dir.begin()), E(Dir.end()),
RequestedDirName(std::move(RequestedDirName)) {
Expand Down
Loading

0 comments on commit d5554c5

Please sign in to comment.