diff --git a/README.md b/README.md index 3500f1d..6f7eafc 100644 --- a/README.md +++ b/README.md @@ -124,6 +124,17 @@ $parser = new TheIconic\NameParser\Parser(); $parser->setWhitespace("\t _."); ``` +### Limiting the position of salutations +```php +$parser = new TheIconic\NameParser\Parser(); +$parser->setMaxSalutationIndex(2); +``` +This will require salutations to appear within the +first two words of the given input string. +This defaults to half the amount of words in the input string, +meaning that effectively the salutation may occur within +the first half of the name parts. + ## License THE ICONIC Name Parser library for PHP is released under the MIT License. diff --git a/src/Mapper/LastnameMapper.php b/src/Mapper/LastnameMapper.php index 7ab8f8a..05d0d11 100644 --- a/src/Mapper/LastnameMapper.php +++ b/src/Mapper/LastnameMapper.php @@ -34,38 +34,120 @@ public function map(array $parts): array return $parts; } - $parts = array_reverse($parts); - - $parts = $this->mapReversedParts($parts); - - return array_reverse($parts); + return $this->mapParts($parts); } /** + * we map the parts in reverse order because it makes more + * sense to parse for the lastname starting from the end + * * @param array $parts * @return array */ - protected function mapReversedParts(array $parts): array + protected function mapParts(array $parts): array { - $length = count($parts); + $k = $this->skipIgnoredParts($parts) + 1; + $remapIgnored = true; - foreach ($parts as $k => $part) { - if ($part instanceof Suffix || $part instanceof Nickname || $part instanceof Salutation) { - continue; - } + while (--$k >= 0) { + $part = $parts[$k]; if ($part instanceof AbstractPart) { break; } - $originalIndex = $length - $k - 1; - $originalParts = array_reverse($parts); - - if ($this->isFollowedByLastnamePart($originalParts, $originalIndex)) { - if ($this->isApplicablePrefix($originalParts, $originalIndex)) { + if ($this->isFollowedByLastnamePart($parts, $k)) { + if ($this->isApplicablePrefix($parts, $k)) { $parts[$k] = new LastnamePrefix($part, $this->prefixes[$this->getKey($part)]); continue; } + + if ($this->shouldStopMapping($parts, $k)) { + break; + } + } + + $parts[$k] = new Lastname($part); + $remapIgnored = false; + } + + if ($remapIgnored) { + $parts = $this->remapIgnored($parts); + } + + return $parts; + } + + /** + * skip through the parts we want to ignore and return the start index + * + * @param array $parts + * @return int + */ + protected function skipIgnoredParts(array $parts): int + { + $k = count($parts); + + while (--$k >= 0) { + if (!$this->isIgnoredPart($parts[$k])) { + break; + } + } + + return $k; + } + + /** + * indicates if we should stop mapping at the give index $k + * + * the assumption is that lastname parts have already been found + * but we want to see if we should add more parts + * + * @param array $parts + * @param int $k + * @return bool + */ + protected function shouldStopMapping(array $parts, int $k): bool + { + if ($k < 1) { + return true; + } + + if ($parts[$k + 1] instanceof LastnamePrefix) { + return true; + } + + return strlen($parts[$k + 1]->getValue()) >= 3; + } + + /** + * indicates if the given part should be ignored (skipped) during mapping + * + * @param $part + * @return bool + */ + protected function isIgnoredPart($part) { + return $part instanceof Suffix || $part instanceof Nickname || $part instanceof Salutation; + } + + /** + * remap ignored parts as lastname + * + * if the mapping did not derive any lastname this is called to transform + * any previously ignored parts into lastname parts + * the parts array is still reversed at this point + * + * @param array $parts + * @return array + */ + protected function remapIgnored(array $parts): array + { + $k = count($parts); + + while (--$k >= 0) { + $part = $parts[$k]; + + if (!$this->isIgnoredPart($part)) { break; } diff --git a/src/Mapper/SalutationMapper.php b/src/Mapper/SalutationMapper.php index 239ba46..5486fe3 100644 --- a/src/Mapper/SalutationMapper.php +++ b/src/Mapper/SalutationMapper.php @@ -9,9 +9,12 @@ class SalutationMapper extends AbstractMapper { protected $salutations = []; - public function __construct(array $salutations) + protected $maxIndex = 0; + + public function __construct(array $salutations, $maxIndex = 0) { $this->salutations = $salutations; + $this->maxIndex = $maxIndex; } /** @@ -22,7 +25,11 @@ public function __construct(array $salutations) */ public function map(array $parts): array { - foreach ($parts as $k => $part) { + $max = ($this->maxIndex > 0) ? $this->maxIndex : floor(count($parts) / 2); + + for ($k = 0; $k < $max; $k++) { + $part = $parts[$k]; + if ($part instanceof AbstractPart) { break; } diff --git a/src/Parser.php b/src/Parser.php index c73f3fe..a99f5fb 100644 --- a/src/Parser.php +++ b/src/Parser.php @@ -33,6 +33,11 @@ class Parser */ protected $nicknameDelimiters = []; + /** + * @var int + */ + protected $maxSalutationIndex = 0; + public function __construct(array $languages = []) { if (empty($languages)) { @@ -99,7 +104,7 @@ protected function getFirstSegmentParser(): Parser $parser = new Parser(); $parser->setMappers([ - new SalutationMapper($this->getSalutations()), + new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()), new SuffixMapper($this->getSuffixes()), new LastnameMapper($this->getPrefixes(), true), new FirstnameMapper(), @@ -117,7 +122,7 @@ protected function getSecondSegmentParser(): Parser $parser = new Parser(); $parser->setMappers([ - new SalutationMapper($this->getSalutations()), + new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()), new SuffixMapper($this->getSuffixes(), true), new NicknameMapper($this->getNicknameDelimiters()), new InitialMapper(true), @@ -149,7 +154,7 @@ public function getMappers(): array if (empty($this->mappers)) { $this->setMappers([ new NicknameMapper($this->getNicknameDelimiters()), - new SalutationMapper($this->getSalutations()), + new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()), new SuffixMapper($this->getSuffixes()), new InitialMapper(), new LastnameMapper($this->getPrefixes()), @@ -275,4 +280,23 @@ public function setNicknameDelimiters(array $nicknameDelimiters): Parser return $this; } + + /** + * @return int + */ + public function getMaxSalutationIndex(): int + { + return $this->maxSalutationIndex; + } + + /** + * @param int $maxSalutationIndex + * @return Parser + */ + public function setMaxSalutationIndex(int $maxSalutationIndex): Parser + { + $this->maxSalutationIndex = $maxSalutationIndex; + + return $this; + } } diff --git a/tests/Mapper/FirstnameMapperTest.php b/tests/Mapper/FirstnameMapperTest.php index 21ff38b..cf9defb 100644 --- a/tests/Mapper/FirstnameMapperTest.php +++ b/tests/Mapper/FirstnameMapperTest.php @@ -48,6 +48,16 @@ public function provider() new Lastname('Pan'), ], ], + [ + 'input' => [ + 'Alfonso', + new Salutation('Mr'), + ], + 'expectation' => [ + new Firstname('Alfonso'), + new Salutation('Mr'), + ] + ] ]; } diff --git a/tests/ParserTest.php b/tests/ParserTest.php index aca8426..dc6e5be 100644 --- a/tests/ParserTest.php +++ b/tests/ParserTest.php @@ -434,10 +434,24 @@ public function provider() [ 'PAUL M LEWIS MR', [ - 'salutation' => 'Mr.', 'firstname' => 'Paul', 'initials' => 'M', - 'lastname' => 'Lewis', + 'lastname' => 'Lewis Mr', + ] + ], + [ + 'SUJAN MASTER', + [ + 'firstname' => 'Sujan', + 'lastname' => 'Master', + ], + ], + [ + 'JAMES J MA', + [ + 'firstname' => 'James', + 'initials' => 'J', + 'lastname' => 'Ma' ] ] ]; @@ -494,4 +508,19 @@ public function testSetGetNicknameDelimiters() $this->assertSame('Jim', $parser->parse('[Jim]')->getNickname()); $this->assertNotSame('Jim', $parser->parse('(Jim)')->getNickname()); } + + public function testSetMaxSalutationIndex() + { + $parser = new Parser(); + $this->assertSame(0, $parser->getMaxSalutationIndex()); + $parser->setMaxSalutationIndex(1); + $this->assertSame(1, $parser->getMaxSalutationIndex()); + $this->assertSame('', $parser->parse('Francis Mr')->getSalutation()); + + $parser = new Parser(); + $this->assertSame(0, $parser->getMaxSalutationIndex()); + $parser->setMaxSalutationIndex(2); + $this->assertSame(2, $parser->getMaxSalutationIndex()); + $this->assertSame('Mr.', $parser->parse('Francis Mr')->getSalutation()); + } }