Skip to content

Commit

Permalink
NEW Speed up file migration (#271)
Browse files Browse the repository at this point in the history
* BUG Rename FileIDHelper test so the file matches the test name
* BUG Allow SS3 file with double underscore to be imported.
* BUG Update FileMigrationHelper to rename files with bad name if another file already exists with that name
* Don't let migration run when the public resolution strategy is unexpected
* Add a unit test for multi dash file to the FileMigrationHelper
* API Add FileHashingService
* BUG Update Flysystem to cache file hash when retrieving them from files
* Enable caching when re-writting short code
* Get FileHashingService to implement the flushable interface
* Make the FileHashingService injectable in FileIDHelperResoltuonStrategy
  • Loading branch information
maxime-rainville authored and chillu committed May 27, 2019
1 parent e1aaf5c commit ed0ad18
Show file tree
Hide file tree
Showing 13 changed files with 671 additions and 55 deletions.
6 changes: 6 additions & 0 deletions _config/assetscache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ SilverStripe\Core\Injector\Injector:
factory: SilverStripe\Core\Cache\CacheFactory
constructor:
namespace: "ImageShortcodeProvider"
Psr\SimpleCache\CacheInterface.Sha1FileHashingService:
factory: SilverStripe\Core\Cache\CacheFactory
constructor:
namespace: "Sha1FileHashingService"
SilverStripe\Assets\Storage\FileHashingService:
class: SilverStripe\Assets\Storage\Sha1FileHashingService
3 changes: 3 additions & 0 deletions src/Dev/Tasks/FileMigrationHelper.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
use SilverStripe\Assets\Flysystem\FlysystemAssetStore;
use SilverStripe\Assets\Folder;
use SilverStripe\Assets\Storage\AssetStore;
use SilverStripe\Assets\Storage\FileHashingService;
use SilverStripe\Core\Config\Config;
use SilverStripe\Core\Config\Configurable;
use SilverStripe\Core\Environment;
Expand Down Expand Up @@ -199,6 +200,7 @@ protected function ss3Migration($base)
/**
* Construct a temporary SS3 File Resolution Strategy based off the provided initial strategy.
* If `$initialStrategy` is not suitable for a migration, we return null.
* We're overriding the helpers to avoid doing unnecessary checks.
* @param FileResolutionStrategy $initialStrategy
* @return int|FileIDHelperResolutionStrategy
*/
Expand Down Expand Up @@ -227,6 +229,7 @@ private function buildSS3MigrationStrategy(FileResolutionStrategy $initialStrate
$ss3Strategy = new FileIDHelperResolutionStrategy();
$ss3Strategy->setDefaultFileIDHelper($initialStrategy->getDefaultFileIDHelper());
$ss3Strategy->setResolutionFileIDHelpers([new LegacyFileIDHelper(false)]);
$ss3Strategy->setFileHashingService(Injector::inst()->get(FileHashingService::class));

return $ss3Strategy;
}
Expand Down
4 changes: 4 additions & 0 deletions src/Dev/Tasks/TagsToShortcodeTask.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

namespace SilverStripe\Assets\Dev\Tasks;

use SilverStripe\Assets\Storage\FileHashingService;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\Dev\BuildTask;

/**
Expand Down Expand Up @@ -29,6 +31,8 @@ class TagsToShortcodeTask extends BuildTask
*/
public function run($request)
{
Injector::inst()->get(FileHashingService::class)->enableCache();

$tagsToShortcodeHelper = new TagsToShortcodeHelper(
$request->getVar('baseClass'),
isset($request->getVars()['includeBaseClass'])
Expand Down
32 changes: 26 additions & 6 deletions src/FilenameParsing/FileIDHelperResolutionStrategy.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

use InvalidArgumentException;
use League\Flysystem\Filesystem;
use SilverStripe\Assets\Storage\FileHashingService;
use SilverStripe\Core\Config\Configurable;
use SilverStripe\Core\Injector\Injector;
use SilverStripe\Versioned\Versioned;
use SilverStripe\Assets\File;
use SilverStripe\ORM\DB;
Expand All @@ -22,6 +25,9 @@
*/
class FileIDHelperResolutionStrategy implements FileResolutionStrategy
{
use Configurable;


/**
* The FileID helper that will be use to build FileID for this adapter.
* @var FileIDHelper
Expand All @@ -40,6 +46,19 @@ class FileIDHelperResolutionStrategy implements FileResolutionStrategy
*/
private $versionedStage = Versioned::DRAFT;

/** @var FileHashingService */
private $hasher;

private static $dependencies = [
'FileHashingService' => '%$' . FileHashingService::class
];

public function setFileHashingService($service)
{
$this->hasher = $service;
return $this;
}

public function resolveFileID($fileID, Filesystem $filesystem)
{
$parsedFileID = $this->parseFileID($fileID);
Expand Down Expand Up @@ -255,7 +274,11 @@ private function validateHash(FileIDHelper $helper, ParsedFileID $parsedFileID,

// Check if the physical hash of the file starts with our parsed file ID hash
$actualHash = $this->findHashOf($helper, $parsedFileID, $filesystem);
return strpos($actualHash, $parsedFileID->getHash()) === 0;
if (!$actualHash) {
return false;
}

return $this->hasher->compare($actualHash, $parsedFileID->getHash());
}

/**
Expand All @@ -281,11 +304,8 @@ private function findHashOf(FileIDHelper $helper, ParsedFileID $parsedFileID, Fi
return false;
}

// Get hash from stream
$stream = $filesystem->readStream($fileID);
$hc = hash_init('sha1');
hash_update_stream($hc, $stream);
$fullHash = hash_final($hc);
// Get hash from file
$fullHash = $this->hasher->computeFromFile($fileID, $filesystem);

return $fullHash;
}
Expand Down
120 changes: 71 additions & 49 deletions src/Flysystem/FlysystemAssetStore.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
use SilverStripe\Assets\Storage\AssetNameGenerator;
use SilverStripe\Assets\Storage\AssetStore;
use SilverStripe\Assets\Storage\AssetStoreRouter;
use SilverStripe\Assets\Storage\FileHashingService;
use SilverStripe\Control\Controller;
use SilverStripe\Control\Director;
use SilverStripe\Control\HTTPResponse;
Expand Down Expand Up @@ -300,6 +301,9 @@ private function applyToFileOnFilesystem(callable $callable, ParsedFileID $parse
self::VISIBILITY_PROTECTED
];

/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

/** @var Filesystem $fs */
/** @var FileResolutionStrategy $strategy */
/** @var string $visibility */
Expand All @@ -321,8 +325,8 @@ private function applyToFileOnFilesystem(callable $callable, ParsedFileID $parse
continue;
}

$stream = $fs->readStream($mainFileID);
if (!$this->validateStreamHash($stream, $parsedFileID->getHash())) {
$actualHash = $hasher->computeFromFile($mainFileID, $fs);
if (!$hasher->compare($actualHash, $parsedFileID->getHash())) {
continue;
}
}
Expand Down Expand Up @@ -548,26 +552,37 @@ public function setFromStream($stream, $filename, $hash = null, $variant = null,
return $result;
}

/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

// When saving original filename, generate hash
if (!$hash && !$variant) {
$hash = $hasher->computeFromStream($stream);
}

// Callback for saving content
$callback = function (Filesystem $filesystem, $fileID) use ($stream) {
$callback = function (Filesystem $filesystem, $fileID) use ($stream, $hasher, $hash, $variant) {

// If there's already a file where we want to write and that file has the same sha1 hash as our source file
// We just let the existing file sit there pretend to have writen it. This avoid a weird edge case where
// We try to move an existing file to its own location which causes us to override the file with zero bytes
if ($filesystem->has($fileID)) {
$newHash = $this->getStreamSHA1($stream);
$oldHash = $this->getStreamSHA1($filesystem->readStream($fileID));
$newHash = $hasher->computeFromStream($stream);
$oldHash = $hasher->computeFromFile($fileID, $filesystem);
if ($newHash === $oldHash) {
return true;
}
}

return $filesystem->putStream($fileID, $stream);
};
$result = $filesystem->putStream($fileID, $stream);

// When saving original filename, generate hash
if (!$hash && !$variant) {
$hash = $this->getStreamSHA1($stream);
}
// If we have an hash for a main file, let's pre-warm our file hashing cache.
if ($hash || !$variant) {
$hasher->set($fileID, $filesystem, $hash);
}

return $result;
};

// Submit to conflict check
return $this->writeWithCallback($callback, $filename, $hash, $variant, $config);
Expand Down Expand Up @@ -609,11 +624,18 @@ function (ParsedFileID $parsedFileID, Filesystem $fs, FileResolutionStrategy $st
$destParsedFileID->setVariant($originParsedFileID->getVariant())
);

/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

if ($origin !== $destination) {
if ($fs->has($destination)) {
$fs->delete($origin);
// Invalidate hash of delete file
$hasher->invalidate($origin, $fs);
} else {
$fs->rename($origin, $destination);
// Move cached hash value to new location
$hasher->move($origin, $fs, $destination);
}
$this->truncateDirectory(dirname($origin), $fs);
}
Expand Down Expand Up @@ -649,6 +671,13 @@ function (ParsedFileID $pfid, Filesystem $fs, FileResolutionStrategy $strategy)
if ($fromFileID !== $toFileID) {
if (!$fs->has($toFileID)) {
$fs->copy($fromFileID, $toFileID);

// Set hash value for new file
/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);
if ($hash = $hasher->get($fromFileID, $fs)) {
$hasher->set($toFileID, $fs, $hash);
}
}
}
}
Expand Down Expand Up @@ -677,8 +706,6 @@ protected function deleteFromFilesystem($fileID, Filesystem $filesystem)
$deleted = true;
}



return $deleted;
}

Expand All @@ -692,11 +719,15 @@ protected function deleteFromFilesystem($fileID, Filesystem $filesystem)
*/
protected function deleteFromFileStore(ParsedFileID $parsedFileID, Filesystem $fs, FileResolutionStrategy $strategy)
{
/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

$deleted = false;
/** @var ParsedFileID $parsedFileIDToDel */
foreach ($strategy->findVariants($parsedFileID, $fs) as $parsedFileIDToDel) {
$fs->delete($parsedFileIDToDel->getFileID());
$deleted = true;
$hasher->invalidate($parsedFileIDToDel->getFileID(), $fs);
}

// Truncate empty dirs
Expand Down Expand Up @@ -778,6 +809,9 @@ public function swapPublish($filename, $hash)
// The file is already publish
return;
}

/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

$parsedFileID = new ParsedFileID($filename, $hash);
$from = $this->getProtectedFilesystem();
Expand Down Expand Up @@ -807,6 +841,7 @@ public function swapPublish($filename, $hash)

// Blast existing variants from the destination
$to->delete($toFileID);
$hasher->move($toFileID, $to, '.swap/' . $fromFileID, $from);
$this->truncateDirectory(dirname($toFileID), $to);
}
}
Expand All @@ -827,12 +862,14 @@ public function swapPublish($filename, $hash)

// Remove the origin file and keep the file ID
$from->delete($fromFileID);
$hasher->move($fromFileID, $from, $toFileID, $to);
$this->truncateDirectory(dirname($fromFileID), $from);
}

foreach ($swapFiles as $variantParsedFileID) {
$fileID = $variantParsedFileID->getFileID();
$from->rename('.swap/' . $fileID, $fileID);
$hasher->move('.swap/' . $fileID, $from, $fileID);
}
$from->deleteDir('.swap');
}
Expand Down Expand Up @@ -899,6 +936,9 @@ protected function moveBetweenFileStore(
FileResolutionStrategy $toStrategy,
$swap = false
) {
/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

// Let's find all the variants on the origin store ... those need to be moved to the destination
/** @var ParsedFileID $variantParsedFileID */
foreach ($fromStrategy->findVariants($parsedFileID, $from) as $variantParsedFileID) {
Expand All @@ -915,6 +955,8 @@ protected function moveBetweenFileStore(
// Remove the origin file and keep the file ID
$idsToDelete[] = $fromFileID;
$from->delete($fromFileID);

$hasher->move($fromFileID, $from, $toFileID, $to);
$this->truncateDirectory(dirname($fromFileID), $from);
}
}
Expand Down Expand Up @@ -990,43 +1032,14 @@ protected function isGranted($fileID)
*
* @param resource $stream
* @return string str1 hash
* @deprecated 4.4.0 Use FileHashingService::computeFromStream() instead
*/
protected function getStreamSHA1($stream)
{
Util::rewindStream($stream);
$context = hash_init('sha1');
hash_update_stream($context, $stream);
return hash_final($context);
}

/**
* Validate that a resource stream start with the provided partial hash
* @param resource $stream
* @param string $partialHash
* @return bool
*/
private function validateStreamHash($stream, $partialHash)
{
$fullHash = $this->getStreamSHA1($stream);
return $this->validateHash($fullHash, $partialHash);
}

/**
* Validate that the provided hashes are equivalent. Partial hashes can be provided.
* @param string $firstHash
* @param string $secondHash
* @return bool
*/
private function validateHash($firstHash, $secondHash)
{
// Empty hash will always return false, because they are no validatable
if (empty($firstHash) || empty($secondHash)) {
throw new InvalidArgumentException('FlysystemAssetStore::validateHash can not validate empty hashes');
}
// Return true if $firstHash start with $secondHash or if $secondHash starts with $firstHash
return
strpos($firstHash, $secondHash) === 0 ||
strpos($secondHash, $firstHash) === 0;
return Injector::inst()
->get(FileHashingService::class)
->computeFromStream($stream);
}

/**
Expand Down Expand Up @@ -1152,7 +1165,9 @@ function (
if (empty($variant)) {
// If deferring to the existing file, return the sha of the existing file,
// unless we are writing a variant (which has the same hash value as its original file)
$hash = $this->getStreamSHA1($fs->readStream($targetFileID));
/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);
$hash = $hasher->computeFromFile($targetFileID, $fs);
$parsedFileID = $parsedFileID->setHash($hash);
}
return $parsedFileID->getTuple();
Expand Down Expand Up @@ -1224,10 +1239,12 @@ function (ParsedFileID $parsedFileID, Filesystem $fs, FileResolutionStrategy $st

if ($parsedFileID && $originalFileID = $parsedFileID->getFileID()) {
if ($fs->has($originalFileID)) {
$stream = $fs->readStream($originalFileID);
/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);
$actualHash = $hasher->computeFromFile($originalFileID, $fs);

// If the hash of the file doesn't match we return false, because we want to keep looking.
return $this->validateStreamHash($stream, $hash) ? true : false;
return $hasher->compare($actualHash, $hash);
}
}

Expand Down Expand Up @@ -1589,6 +1606,9 @@ private function normaliseToDefaultPath(ParsedFileID $pfid, Filesystem $fs, File
{
$ops = [];

/** @var FileHashingService $hasher */
$hasher = Injector::inst()->get(FileHashingService::class);

// Let's make sure we are using a valid file name
$cleanFilename = $strategy->cleanFilename($pfid->getFilename());
// Check if our cleaned filename is different from the original filename
Expand All @@ -1612,8 +1632,10 @@ private function normaliseToDefaultPath(ParsedFileID $pfid, Filesystem $fs, File
if ($targetVariantFileID !== $origin) {
if ($fs->has($targetVariantFileID)) {
$fs->delete($origin);
$hasher->invalidate($origin, $fs);
} else {
$fs->rename($origin, $targetVariantFileID);
$hasher->move($origin, $fs, $targetVariantFileID);
$ops[$origin] = $targetVariantFileID;
}
$this->truncateDirectory(dirname($origin), $fs);
Expand Down
Loading

0 comments on commit ed0ad18

Please sign in to comment.