Skip to content

Commit

Permalink
Feature: support any tag separator
Browse files Browse the repository at this point in the history
So it allows to have multiple words tags.

Breaking change: commas ',' are no longer a default separator.

Fixes #594
  • Loading branch information
ArthurHoaro committed Nov 5, 2020
1 parent 48df9f4 commit b3bd8c3
Show file tree
Hide file tree
Showing 38 changed files with 585 additions and 114 deletions.
39 changes: 21 additions & 18 deletions application/bookmark/Bookmark.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@ class Bookmark
/**
* Initialize a link from array data. Especially useful to create a Bookmark from former link storage format.
*
* @param array $data
* @param array $data
* @param string $tagsSeparator Tags separator loaded from the config file.
* This is a context data, and it should *never* be stored in the Bookmark object.
*
* @return $this
*/
public function fromArray(array $data): Bookmark
public function fromArray(array $data, string $tagsSeparator = ' '): Bookmark
{
$this->id = $data['id'] ?? null;
$this->shortUrl = $data['shorturl'] ?? null;
Expand All @@ -77,7 +79,7 @@ public function fromArray(array $data): Bookmark
if (is_array($data['tags'])) {
$this->tags = $data['tags'];
} else {
$this->tags = preg_split('/\s+/', $data['tags'] ?? '', -1, PREG_SPLIT_NO_EMPTY);
$this->tags = tags_str2array($data['tags'] ?? '', $tagsSeparator);
}
if (! empty($data['updated'])) {
$this->updated = $data['updated'];
Expand Down Expand Up @@ -348,7 +350,12 @@ public function getTags(): array
*/
public function setTags(?array $tags): Bookmark
{
$this->setTagsString(implode(' ', $tags ?? []));
$this->tags = array_map(
function (string $tag): string {
return $tag[0] === '-' ? substr($tag, 1) : $tag;
},
tags_filter($tags, ' ')
);

return $this;
}
Expand Down Expand Up @@ -420,11 +427,13 @@ public function setSticky(?bool $sticky): Bookmark
}

/**
* @return string Bookmark's tags as a string, separated by a space
* @param string $separator Tags separator loaded from the config file.
*
* @return string Bookmark's tags as a string, separated by a separator
*/
public function getTagsString(): string
public function getTagsString(string $separator = ' '): string
{
return implode(' ', $this->getTags());
return tags_array2str($this->getTags(), $separator);
}

/**
Expand All @@ -444,19 +453,13 @@ public function isNote(): bool
* - trailing dash in tags will be removed
*
* @param string|null $tags
* @param string $separator Tags separator loaded from the config file.
*
* @return $this
*/
public function setTagsString(?string $tags): Bookmark
public function setTagsString(?string $tags, string $separator = ' '): Bookmark
{
// Remove first '-' char in tags.
$tags = preg_replace('/(^| )\-/', '$1', $tags ?? '');
// Explode all tags separted by spaces or commas
$tags = preg_split('/[\s,]+/', $tags);
// Remove eventual empty values
$tags = array_values(array_filter($tags));

$this->tags = $tags;
$this->setTags(tags_str2array($tags, $separator));

return $this;
}
Expand Down Expand Up @@ -507,7 +510,7 @@ public function getAdditionalContentEntry(string $key, $default = null)
*/
public function renameTag(string $fromTag, string $toTag): void
{
if (($pos = array_search($fromTag, $this->tags)) !== false) {
if (($pos = array_search($fromTag, $this->tags ?? [])) !== false) {
$this->tags[$pos] = trim($toTag);
}
}
Expand All @@ -519,7 +522,7 @@ public function renameTag(string $fromTag, string $toTag): void
*/
public function deleteTag(string $tag): void
{
if (($pos = array_search($tag, $this->tags)) !== false) {
if (($pos = array_search($tag, $this->tags ?? [])) !== false) {
unset($this->tags[$pos]);
$this->tags = array_values($this->tags);
}
Expand Down
2 changes: 1 addition & 1 deletion application/bookmark/BookmarkFileService.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public function __construct(ConfigManager $conf, History $history, Mutex $mutex,
}
}

$this->bookmarkFilter = new BookmarkFilter($this->bookmarks);
$this->bookmarkFilter = new BookmarkFilter($this->bookmarks, $this->conf);
}

/**
Expand Down
47 changes: 31 additions & 16 deletions application/bookmark/BookmarkFilter.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

use Exception;
use Shaarli\Bookmark\Exception\BookmarkNotFoundException;
use Shaarli\Config\ConfigManager;

/**
* Class LinkFilter.
Expand Down Expand Up @@ -58,12 +59,16 @@ class BookmarkFilter
*/
private $bookmarks;

/** @var ConfigManager */
protected $conf;

/**
* @param Bookmark[] $bookmarks initialization.
*/
public function __construct($bookmarks)
public function __construct($bookmarks, ConfigManager $conf)
{
$this->bookmarks = $bookmarks;
$this->conf = $conf;
}

/**
Expand Down Expand Up @@ -107,10 +112,14 @@ public function filter(
$filtered = $this->bookmarks;
}
if (!empty($request[0])) {
$filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility);
$filtered = (new BookmarkFilter($filtered, $this->conf))
->filterTags($request[0], $casesensitive, $visibility)
;
}
if (!empty($request[1])) {
$filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility);
$filtered = (new BookmarkFilter($filtered, $this->conf))
->filterFulltext($request[1], $visibility)
;
}
return $filtered;
case self::$FILTER_TEXT:
Expand Down Expand Up @@ -280,8 +289,9 @@ private function filterFulltext(string $searchterms, string $visibility = 'all')
*
* @return string generated regex fragment
*/
private static function tag2regex(string $tag): string
protected function tag2regex(string $tag): string
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
$len = strlen($tag);
if (!$len || $tag === "-" || $tag === "*") {
// nothing to search, return empty regex
Expand All @@ -295,12 +305,13 @@ private static function tag2regex(string $tag): string
$i = 0; // start at first character
$regex = '(?='; // use positive lookahead
}
$regex .= '.*(?:^| )'; // before tag may only be a space or the beginning
// before tag may only be the separator or the beginning
$regex .= '.*(?:^|' . $tagsSeparator . ')';
// iterate over string, separating it into placeholder and content
for (; $i < $len; $i++) {
if ($tag[$i] === '*') {
// placeholder found
$regex .= '[^ ]*?';
$regex .= '[^' . $tagsSeparator . ']*?';
} else {
// regular characters
$offset = strpos($tag, '*', $i);
Expand All @@ -316,7 +327,8 @@ private static function tag2regex(string $tag): string
$i = $offset;
}
}
$regex .= '(?:$| ))'; // after the tag may only be a space or the end
// after the tag may only be the separator or the end
$regex .= '(?:$|' . $tagsSeparator . '))';
return $regex;
}

Expand All @@ -334,14 +346,15 @@ private static function tag2regex(string $tag): string
*/
public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all')
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
// get single tags (we may get passed an array, even though the docs say different)
$inputTags = $tags;
if (!is_array($tags)) {
// we got an input string, split tags
$inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY);
$inputTags = tags_str2array($inputTags, $tagsSeparator);
}

if (!count($inputTags)) {
if (count($inputTags) === 0) {
// no input tags
return $this->noFilter($visibility);
}
Expand All @@ -358,7 +371,7 @@ public function filterTags($tags, bool $casesensitive = false, string $visibilit
}

// build regex from all tags
$re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/';
$re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
if (!$casesensitive) {
// make regex case insensitive
$re .= 'i';
Expand All @@ -378,7 +391,8 @@ public function filterTags($tags, bool $casesensitive = false, string $visibilit
continue;
}
}
$search = $link->getTagsString(); // build search string, start with tags of current link
// build search string, start with tags of current link
$search = $link->getTagsString($tagsSeparator);
if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) {
// description given and at least one possible tag found
$descTags = array();
Expand All @@ -390,9 +404,9 @@ public function filterTags($tags, bool $casesensitive = false, string $visibilit
);
if (count($descTags[1])) {
// there were some tags in the description, add them to the search string
$search .= ' ' . implode(' ', $descTags[1]);
$search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
}
};
}
// match regular expression with search string
if (!preg_match($re, $search)) {
// this entry does _not_ match our regex
Expand Down Expand Up @@ -422,7 +436,7 @@ public function filterUntagged(string $visibility)
}
}

if (empty(trim($link->getTagsString()))) {
if (empty($link->getTags())) {
$filtered[$key] = $link;
}
}
Expand Down Expand Up @@ -537,18 +551,19 @@ protected function postProcessFoundPositions(array $fieldLengths, array $foundPo
*/
protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
{
$tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' '));
$content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
$content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
$content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
$content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
$content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') .'\\';

$lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
$nextField = $lengths['title']['end'] + 1;
$lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
$nextField = $lengths['description']['end'] + 1;
$lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
$nextField = $lengths['url']['end'] + 1;
$lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())];
$lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)];

return $content;
}
Expand Down
46 changes: 46 additions & 0 deletions application/bookmark/LinkUtils.php
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,49 @@ function is_note($linkUrl)
{
return isset($linkUrl[0]) && $linkUrl[0] === '?';
}

/**
* Extract an array of tags from a given tag string, with provided separator.
*
* @param string|null $tags String containing a list of tags separated by $separator.
* @param string $separator Shaarli's default: ' ' (whitespace)
*
* @return array List of tags
*/
function tags_str2array(?string $tags, string $separator): array
{
// For whitespaces, we use the special \s regex character
$separator = $separator === ' ' ? '\s' : $separator;

return preg_split('/\s*' . $separator . '+\s*/', trim($tags) ?? '', -1, PREG_SPLIT_NO_EMPTY);
}

/**
* Return a tag string with provided separator from a list of tags.
* Note that given array is clean up by tags_filter().
*
* @param array|null $tags List of tags
* @param string $separator
*
* @return string
*/
function tags_array2str(?array $tags, string $separator): string
{
return implode($separator, tags_filter($tags, $separator));
}

/**
* Clean an array of tags: trim + remove empty entries
*
* @param array|null $tags List of tags
* @param string $separator
*
* @return array
*/
function tags_filter(?array $tags, string $separator): array
{
$trimDefault = " \t\n\r\0\x0B";
return array_values(array_filter(array_map(function (string $entry) use ($separator, $trimDefault): string {
return trim($entry, $trimDefault . $separator);
}, $tags ?? [])));
}
1 change: 1 addition & 0 deletions application/config/ConfigManager.php
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ protected function setDefaultValues()
$this->setEmpty('general.default_note_title', 'Note: ');
$this->setEmpty('general.retrieve_description', true);
$this->setEmpty('general.enable_async_metadata', true);
$this->setEmpty('general.tags_separator', ' ');

$this->setEmpty('updates.check_updates', false);
$this->setEmpty('updates.check_updates_branch', 'stable');
Expand Down
7 changes: 4 additions & 3 deletions application/formatter/BookmarkDefaultFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,16 @@ protected function formatTagList($bookmark)
*/
protected function formatTagListHtml($bookmark)
{
$tagsSeparator = $this->conf->get('general.tags_separator', ' ');
if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) {
return $this->formatTagList($bookmark);
}

$tags = $this->tokenizeSearchHighlightField(
$bookmark->getTagsString(),
$bookmark->getTagsString($tagsSeparator),
$bookmark->getAdditionalContentEntry('search_highlight')['tags']
);
$tags = $this->filterTagList(explode(' ', $tags));
$tags = $this->filterTagList(tags_str2array($tags, $tagsSeparator));
$tags = escape($tags);
$tags = $this->replaceTokensArray($tags);

Expand All @@ -88,7 +89,7 @@ protected function formatTagListHtml($bookmark)
*/
protected function formatTagString($bookmark)
{
return implode(' ', $this->formatTagList($bookmark));
return implode($this->conf->get('general.tags_separator'), $this->formatTagList($bookmark));
}

/**
Expand Down
3 changes: 2 additions & 1 deletion application/formatter/BookmarkFormatter.php
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ protected function formatTagListHtml($bookmark)
*/
protected function formatTagString($bookmark)
{
return implode(' ', $this->formatTagList($bookmark));
return implode($this->conf->get('general.tags_separator', ' '), $this->formatTagList($bookmark));
}

/**
Expand Down Expand Up @@ -351,6 +351,7 @@ protected function formatUpdatedTimestamp(Bookmark $bookmark)

/**
* Format tag list, e.g. remove private tags if the user is not logged in.
* TODO: this method is called multiple time to format tags, the result should be cached.
*
* @param array $tags
*
Expand Down
Loading

0 comments on commit b3bd8c3

Please sign in to comment.