Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions changelog/unreleased/38804
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Enhancement: Introduce new state to prevent scanning of shallow scanned folders

Folders can be partially scanned, this means that a folder could have its closest
contents scanned (the first level), but not deeper contents. Folder "/A" could be
scanned but not "/A/B/C".

Previously, we couldn't detect that a folder had been partially scanned, so we
triggered another scan on that folder even though we already had data in the DB.

Now, we can detect that the folder has been partially scanned to avoid another
scan if it isn't needed. This leads to notable performance improvements in cases
where a FS hasn't been scanned fully. Note that an initial scan is still required,
and the performance will remain the same in this case.

https://github.com/owncloud/core/pull/38804
25 changes: 19 additions & 6 deletions lib/private/Files/Cache/Cache.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
use Doctrine\DBAL\Platforms\OraclePlatform;
use OCP\DB\QueryBuilder\IQueryBuilder;
use OCP\Files\Cache\ICache;
use OCP\Files\Cache\IScanner;
use OCP\Files\Cache\ICacheEntry;
use \OCP\Files\IMimeTypeLoader;
use OCP\IDBConnection;
Expand Down Expand Up @@ -681,10 +682,11 @@ public function clear() {
* - Cache::PARTIAL: File is not stored in the cache but some incomplete data is known
* - Cache::SHALLOW: The folder and it's direct children are in the cache but not all sub folders are fully scanned
* - Cache::COMPLETE: The file or folder, with all it's children) are fully scanned
* - Cache::NOT_SCANNED: Only the folder is in the cache. The contents are unknown, so the folder needs a scan
*
* @param string $file
*
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW or Cache::COMPLETE
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW, Cache::COMPLETE or Cache::NOT_SCANNED
*/
public function getStatus($file) {
// normalize file
Expand All @@ -694,7 +696,10 @@ public function getStatus($file) {
$sql = 'SELECT `size` FROM `*PREFIX*filecache` WHERE `storage` = ? AND `path_hash` = ?';
$result = $this->connection->executeQuery($sql, [$this->getNumericStorageId(), $pathHash]);
if ($row = $result->fetch()) {
if ((int)$row['size'] === -1) {
$size = (int)$row['size'];
if ($size === IScanner::SIZE_NEEDS_SCAN) {
return self::NOT_SCANNED;
} elseif ($size === IScanner::SIZE_SHALLOW_SCANNED) {
return self::SHALLOW;
} else {
return self::COMPLETE;
Expand Down Expand Up @@ -853,10 +858,18 @@ public function calculateFolderSize($path, $entry = null) {
if ($row = $result->fetch()) {
$result->closeCursor();
list($sum, $min) = \array_values($row);
if ($min === null && $entry['size'] < 0) {
// could happen if the folder hasn't been scanned.
// we don't have any data, so return the SIZE_NEEDS_SCAN
// if the size of the entry is positive it means that the entry has been scanned,
// so the folder is empty (no need to be scanned)
return IScanner::SIZE_NEEDS_SCAN;
}
$sum = 0 + $sum;
$min = 0 + $min;
if ($min === -1) {
$totalSize = $min;
if ($min === IScanner::SIZE_NEEDS_SCAN || $min === IScanner::SIZE_SHALLOW_SCANNED) {
// current folder is shallow scanned
$totalSize = IScanner::SIZE_SHALLOW_SCANNED;
} else {
$totalSize = $sum;
}
Expand Down Expand Up @@ -900,8 +913,8 @@ public function getAll() {
*/
public function getIncomplete() {
$query = $this->connection->prepare('SELECT `path` FROM `*PREFIX*filecache`'
. ' WHERE `storage` = ? AND `size` = -1 ORDER BY `fileid` DESC', 1);
$query->execute([$this->getNumericStorageId()]);
. ' WHERE `storage` = ? AND `size` = ? ORDER BY `fileid` DESC', 1);
$query->execute([$this->getNumericStorageId(), IScanner::SIZE_NEEDS_SCAN]);
if ($row = $query->fetch()) {
return $row['path'];
} else {
Expand Down
2 changes: 2 additions & 0 deletions lib/private/Files/Cache/Propagator.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ public function propagateChange($internalPath, $time, $sizeDifference = 0) {

if ($sizeDifference !== 0) {
// if we need to update size, only update the records with calculated size (>-1)
// special cases (IScanner::SIZE_*) have negative values, so they won't interfere
$builder->set('size', $builder->createFunction(
'CASE' .
' WHEN ' . $builder->expr()->gt('size', $builder->expr()->literal(-1, IQueryBuilder::PARAM_INT)) .
Expand Down Expand Up @@ -168,6 +169,7 @@ public function commitBatch() {
->set('size', $sizeQuery->createFunction('`size` + ' . $sizeQuery->createParameter('size')))
->where($query->expr()->eq('storage', $query->expr()->literal($storageId, IQueryBuilder::PARAM_INT)))
->andWhere($query->expr()->eq('path_hash', $query->createParameter('hash')))
// special cases (IScanner::SIZE_*) have negative values, so they won't interfere
->andWhere($sizeQuery->expr()->gt('size', $sizeQuery->expr()->literal(-1, IQueryBuilder::PARAM_INT)));

foreach ($this->batch as $item) {
Expand Down
29 changes: 16 additions & 13 deletions lib/private/Files/Cache/Scanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ public function scanFile($file, $reuseExisting = 0, $parentId = -1, $cacheData =
// only reuse data if the file hasn't explicitly changed
if (isset($data['storage_mtime'], $cacheData['storage_mtime']) && $data['storage_mtime'] === $cacheData['storage_mtime']) {
$data['mtime'] = $cacheData['mtime'];
if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === -1)) {
if (($reuseExisting & self::REUSE_SIZE) && ($data['size'] === IScanner::SIZE_NEEDS_SCAN)) {
$data['size'] = $cacheData['size'];
}
if ($reuseExisting & self::REUSE_ETAG) {
Expand Down Expand Up @@ -399,12 +399,19 @@ protected function scanChildren($path, $recursive = self::SCAN_RECURSIVE, $reuse

foreach ($childQueue as $child => $childId) {
$childSize = $this->scanChildren($child, $recursive, $reuse, $childId, $lock);
if ($childSize === -1) {
$size = -1;
} elseif ($size !== -1) {
if ($childSize === IScanner::SIZE_NEEDS_SCAN) {
$size = IScanner::SIZE_NEEDS_SCAN;
} elseif ($size !== IScanner::SIZE_NEEDS_SCAN) {
$size += $childSize;
}
}

if ($size === IScanner::SIZE_NEEDS_SCAN) {
// Mark this folder as shallow scanned to avoid hitting the storage.
// The shallow contents are known, but not the whole.
$size = IScanner::SIZE_SHALLOW_SCANNED;
}

if ($this->cacheActive) {
$this->cache->update($folderId, ['size' => $size]);
}
Expand All @@ -431,12 +438,12 @@ private function handleChildren($path, $recursive, $reuse, $folderId, $lock, &$s
if ($data) {
if ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE) {
$childQueue[$child] = $data['fileid'];
} elseif ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === -1) {
} elseif ($data['mimetype'] === 'httpd/unix-directory' and $recursive === self::SCAN_RECURSIVE_INCOMPLETE and $data['size'] === IScanner::SIZE_NEEDS_SCAN) {
// only recurse into folders which aren't fully scanned
$childQueue[$child] = $data['fileid'];
} elseif (!isset($data['size']) || $data['size'] === -1) {
$size = -1;
} elseif ($size !== -1) {
} elseif (!isset($data['size']) || $data['size'] === IScanner::SIZE_NEEDS_SCAN) {
$size = IScanner::SIZE_NEEDS_SCAN;
} elseif ($size !== IScanner::SIZE_NEEDS_SCAN) {
$size += $data['size'];
}
}
Expand Down Expand Up @@ -499,14 +506,10 @@ public function backgroundScan() {
$this->scan('', self::SCAN_RECURSIVE, self::REUSE_ETAG);
}, '');
} else {
$lastPath = null;
while (($path = $this->cache->getIncomplete()) !== false && $path !== $lastPath) {
while (($path = $this->cache->getIncomplete()) !== false) {
$this->runBackgroundScanJob(function () use ($path) {
$this->scan($path, self::SCAN_RECURSIVE_INCOMPLETE, self::REUSE_ETAG | self::REUSE_SIZE);
}, $path);
// FIXME: this won't proceed with the next item, needs revamping of getIncomplete()
// to make this possible
$lastPath = $path;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion lib/private/Files/Cache/Wrapper/CacheJail.php
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public function clear() {
/**
* @param string $file
*
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW or Cache::COMPLETE
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW, Cache::COMPLETE or Cache::NOT_SCANNED
*/
public function getStatus($file) {
return $this->cache->getStatus($this->getSourcePath($file));
Expand Down
2 changes: 1 addition & 1 deletion lib/private/Files/Cache/Wrapper/CacheWrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ public function clear() {
/**
* @param string $file
*
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW or Cache::COMPLETE
* @return int Cache::NOT_FOUND, Cache::PARTIAL, Cache::SHALLOW, Cache::COMPLETE or Cache::NOT_SCANNED
*/
public function getStatus($file) {
return $this->cache->getStatus($file);
Expand Down
5 changes: 4 additions & 1 deletion lib/private/Files/View.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
use OCP\Constants;
use OCP\Events\EventEmitterTrait;
use OCP\Files\Cache\ICacheEntry;
use OCP\Files\Cache\IScanner;
use OCP\Files\FileNameTooLongException;
use OCP\Files\InvalidCharacterInPathException;
use OCP\Files\InvalidPathException;
Expand Down Expand Up @@ -1399,7 +1400,7 @@ private function getCacheEntry($storage, $internalPath, $relativePath) {

try {
// if the file is not in the cache or needs to be updated, trigger the scanner and reload the data
if (!$data || (isset($data['size']) && ($data['size'] === -1))) {
if (!$data || (isset($data['size']) && ($data['size'] === IScanner::SIZE_NEEDS_SCAN))) {
$this->lockFile($relativePath, ILockingProvider::LOCK_SHARED);
if (!$storage->file_exists($internalPath)) {
$this->unlockFile($relativePath, ILockingProvider::LOCK_SHARED);
Expand All @@ -1415,6 +1416,8 @@ private function getCacheEntry($storage, $internalPath, $relativePath) {
$storage->getPropagator()->propagateChange($internalPath, \time());
$data = $cache->get($internalPath);
$this->unlockFile($relativePath, ILockingProvider::LOCK_SHARED);
} elseif ($data['size'] === IScanner::SIZE_SHALLOW_SCANNED) {
$cache->correctFolderSize($internalPath, $data);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lock shouldn't be needed because we don't hit the storage... should we propagate the change using the propagator?

}
} catch (LockedException $e) {
// if the file is locked we just use the old cache info
Expand Down
8 changes: 8 additions & 0 deletions lib/public/Files/Cache/ICache.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ interface ICache {
public const PARTIAL = 1; //only partial data available, file not cached in the database
public const SHALLOW = 2; //folder in cache, but not all child files are completely scanned
public const COMPLETE = 3;
/**
* folder in cache, but contents not scanned yet. This is different than SHALLOW
* because SHALLOW implies a partial scan.
* If there are "/A/D1" (folder) and "/A/F1" (file), "/A" should be with SHALLOW status
* (we have data for D1 and F1), but "/A/D1" should be with NOT_SCANNED status because
* we don't know what is inside.
*/
public const NOT_SCANNED = 4;

/**
* Get the numeric storage id for this cache's storage
Expand Down
3 changes: 3 additions & 0 deletions lib/public/Files/Cache/IScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ interface IScanner {
public const REUSE_SIZE = 2;
public const REUSE_ONLY_FOR_FILES = 4; // apply the etag reuse only to files, not folders

public const SIZE_NEEDS_SCAN = -1;
public const SIZE_SHALLOW_SCANNED = -2; // current folder might be scanned but deeper folders not

/**
* scan a single file and store it in the cache
*
Expand Down
11 changes: 8 additions & 3 deletions tests/lib/Files/Cache/CacheTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,8 @@ public function testFolder($folder) {
$fileData['unkownSize'] = ['size' => -1, 'mtime' => 25, 'mimetype' => 'foo/file'];
$this->cache->put($file4, $fileData['unkownSize']);

$this->assertEquals(-1, $this->cache->calculateFolderSize($folder));
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $this->cache->calculateFolderSize($folder));

$fileData['unkownSize'] = ['size' => 5, 'mtime' => 25, 'mimetype' => 'foo/file'];
$this->cache->put($file4, $fileData['unkownSize']);
Expand Down Expand Up @@ -208,7 +209,8 @@ public function testEncryptedFolder() {
$fileData['unkownSize'] = ['size' => -1, 'mtime' => 25, 'mimetype' => 'foo/file'];
$this->cache->put($file4, $fileData['unkownSize']);

$this->assertEquals(-1, $this->cache->calculateFolderSize($file1));
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $this->cache->calculateFolderSize($file1));

$fileData['unkownSize'] = ['size' => 5, 'mtime' => 25, 'mimetype' => 'foo/file'];
$this->cache->put($file4, $fileData['unkownSize']);
Expand Down Expand Up @@ -244,7 +246,8 @@ public function testRootFolderSizeForNonHomeStorage() {
$this->assertTrue($this->cache->inCache($dir2));

// check that root size ignored the unknown sizes
$this->assertEquals(-1, $this->cache->calculateFolderSize(''));
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $this->cache->calculateFolderSize(''));

// clean up
$this->cache->remove('');
Expand All @@ -264,6 +267,8 @@ public function testStatus() {
$this->cache->put('foo', ['size' => -1]);
$this->assertEquals(Cache::PARTIAL, $this->cache->getStatus('foo'));
$this->cache->put('foo', ['size' => -1, 'mtime' => 20, 'mimetype' => 'foo/file']);
$this->assertEquals(Cache::NOT_SCANNED, $this->cache->getStatus('foo'));
$this->cache->put('foo', ['size' => -2, 'mtime' => 20, 'mimetype' => 'foo/file']);
$this->assertEquals(Cache::SHALLOW, $this->cache->getStatus('foo'));
$this->cache->put('foo', ['size' => 10]);
$this->assertEquals(Cache::COMPLETE, $this->cache->getStatus('foo'));
Expand Down
19 changes: 14 additions & 5 deletions tests/lib/Files/Cache/ScannerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ public function testShallow() {
$cachedDataFolder = $this->cache->get('');
$cachedDataFolder2 = $this->cache->get('folder');

$this->assertEquals(-1, $cachedDataFolder['size']);
// -2 = IScanner::SIZE_SHALLOW_SCANNED
// -1 = IScanner::SIZE_NEEDS_SCAN
$this->assertEquals(-2, $cachedDataFolder['size']);
$this->assertEquals(-1, $cachedDataFolder2['size']);

$this->scanner->scan('folder', \OC\Files\Cache\Scanner::SCAN_SHALLOW);
Expand All @@ -142,15 +144,19 @@ public function testBackgroundScan() {
$this->assertFalse($this->cache->inCache('folder/bar.txt'));
$this->assertFalse($this->cache->inCache('folder/2bar.txt'));
$cachedData = $this->cache->get('');
$this->assertEquals(-1, $cachedData['size']);
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $cachedData['size']);

$this->scanner->backgroundScan();

$this->assertTrue($this->cache->inCache('folder/bar.txt'));
$this->assertTrue($this->cache->inCache('folder/bar.txt'));

$cachedData = $this->cache->get('');
$this->assertnotEquals(-1, $cachedData['size']);
// -2 = IScanner::SIZE_SHALLOW_SCANNED
// -1 = IScanner::SIZE_NEEDS_SCAN
$this->assertNotEquals(-1, $cachedData['size']);
$this->assertNotEquals(-2, $cachedData['size']);

$this->assertFalse($this->cache->getIncomplete());
}
Expand All @@ -167,7 +173,8 @@ public function testBackgroundScanOnlyRecurseIncomplete() {
$this->cache->put('folder2', ['size' => 1]); // mark as complete

$cachedData = $this->cache->get('');
$this->assertEquals(-1, $cachedData['size']);
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $cachedData['size']);

$this->scanner->scan('', \OC\Files\Cache\Scanner::SCAN_RECURSIVE_INCOMPLETE, \OC\Files\Cache\Scanner::REUSE_ETAG | \OC\Files\Cache\Scanner::REUSE_SIZE);

Expand All @@ -177,6 +184,7 @@ public function testBackgroundScanOnlyRecurseIncomplete() {

$cachedData = $this->cache->get('');
$this->assertNotEquals(-1, $cachedData['size']);
$this->assertNotEquals(-2, $cachedData['size']);

$this->assertFalse($this->cache->getIncomplete());
}
Expand All @@ -199,11 +207,12 @@ public function testReuseExisting() {
$this->scanner->scan('', \OC\Files\Cache\Scanner::SCAN_SHALLOW, \OC\Files\Cache\Scanner::REUSE_ETAG);
$newData = $this->cache->get('');
$this->assertSame($oldData['etag'], $newData['etag']);
$this->assertEquals(-1, $newData['size']);
$this->assertEquals(-2, $newData['size']);

$this->scanner->scan('', \OC\Files\Cache\Scanner::SCAN_RECURSIVE);
$oldData = $this->cache->get('');
$this->assertNotEquals(-1, $oldData['size']);
$this->assertNotEquals(-2, $oldData['size']);
$this->scanner->scanFile('', \OC\Files\Cache\Scanner::REUSE_ETAG + \OC\Files\Cache\Scanner::REUSE_SIZE);
$newData = $this->cache->get('');
$this->assertSame($oldData['etag'], $newData['etag']);
Expand Down
3 changes: 2 additions & 1 deletion tests/lib/Files/ViewTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,8 @@ public function testAutoScan() {

$cachedData = $rootView->getFileInfo('/');
$this->assertEquals('httpd/unix-directory', $cachedData['mimetype']);
$this->assertEquals(-1, $cachedData['size']);
// -2 = IScanner::SIZE_SHALLOW_SCANNED
$this->assertEquals(-2, $cachedData['size']);

$folderData = $rootView->getDirectoryContent('/substorage/folder');
$this->assertEquals('text/plain', $folderData[0]['mimetype']);
Expand Down