Skip to content
This repository has been archived by the owner on Sep 5, 2023. It is now read-only.

Commit

Permalink
Fix for stopword lists containing blank lines and/or comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Scott committed Dec 9, 2018
1 parent 16cc94e commit a7756e5
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion src/Text/StopWords.php
Expand Up @@ -68,7 +68,12 @@ public function getWordList(): array {
if (empty($this->cached)) {
$file = sprintf(__DIR__ . '/../../resources/text/stopwords-%s.txt', $this->getLanguage());

$this->cached = explode("\n", str_replace(["\r\n", "\r"], "\n", file_get_contents($file)));
$lines = explode("\n", str_replace(["\r\n", "\r"], "\n", file_get_contents($file)));

$this->cached = array_filter($lines, function($line) {
// Ignore emoty lines and lines starting with '#'.
return !(trim($line) == '' || mb_substr($line, 0, 1) == '#');
});
}

return $this->cached;
Expand Down

0 comments on commit a7756e5

Please sign in to comment.