Skip to content

Commit

Permalink
Enable lastname remapping - fixes #11
Browse files Browse the repository at this point in the history
Names like "SUJAN MASTER", "JAMES J MA", "PETER K MA" had the 'Master'
or 'Ma' parts as special parts (salutations, suffixes) where they
should be lastnames. In "PAUL M LEWIS MR", the lastname should be
'Lewis Mr'.

This change does three things to fix this:
Firstly, it prevents parsing for salutations beyond the first half of words in
the given string. It also introduces a `setMaxSalutationIndex()` method
to allow overriding this with a fixed maximum word index. E.g. setting
it to 2 will require salutations to appear in the first two words.

Secondly, if the lastname mapper does not derive a lastname, but has skipped
ignored parts like suffix, nickname or salutation, it will convert these into
lastname parts.

Thirdly, the lastname mapper will now map more than one lastname part if
the already mapped lastname parts are shorter than 3 characters and there
will be at least one part left after mapping. This effectively maps
'Lewis' in 'Paul M Lewis Mr' as lastname instead of previously as middlename.
  • Loading branch information
wyrfel committed Sep 16, 2018
1 parent 725c5b4 commit 015bb01
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 8 deletions.
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,17 @@ $parser = new TheIconic\NameParser\Parser();
$parser->setWhitespace("\t _.");
```

### Limiting the position of salutations
```php
$parser = new TheIconic\NameParser\Parser();
$parser->setMaxSalutationIndex(2);
```
This will require salutations to appear within the
first two words of the given input string.
This defaults to half the amount of words in the input string,
meaning that effectively the salutation may occur within
the first half of the name parts.

## License

THE ICONIC Name Parser library for PHP is released under the MIT License.
66 changes: 65 additions & 1 deletion src/Mapper/LastnameMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,19 @@ public function map(array $parts): array
}

/**
* we map the parts in reverse order because it makes more
* sense to parse for the lastname starting from the end
*
* @param array $parts
* @return array
*/
protected function mapReversedParts(array $parts): array
{
$length = count($parts);
$remapIgnored = true;

foreach ($parts as $k => $part) {
if ($part instanceof Suffix || $part instanceof Nickname || $part instanceof Salutation) {
if ($this->isIgnoredPart($part)) {
continue;
}

Expand All @@ -66,6 +70,66 @@ protected function mapReversedParts(array $parts): array
$parts[$k] = new LastnamePrefix($part, $this->prefixes[$this->getKey($part)]);
continue;
}

if ($this->shouldStopMapping($parts, $k)) {
break;
}
}

$parts[$k] = new Lastname($part);
$remapIgnored = false;
}

if ($remapIgnored) {
$parts = $this->remapIgnored($parts);
}

return $parts;
}

/**
* indicates if we should stop mapping at the give index $k
*
* the assumption is that lastname parts have already been found
* but we want to see if we should add more parts
*
* @param array $parts
* @param int $k
* @return bool
*/
protected function shouldStopMapping(array $parts, int $k): bool
{
if ($k + 2 > count($parts)) {
return true;
}

return strlen($parts[$k - 1]->getValue()) >= 3;
}

/**
* indicates if the given part should be ignored (skipped) during mapping
*
* @param $part
* @return bool
*/
protected function isIgnoredPart($part) {
return $part instanceof Suffix || $part instanceof Nickname || $part instanceof Salutation;
}

/**
* remap ignored parts as lastname
*
* if the mapping did not derive any lastname this is called to transform
* any previously ignored parts into lastname parts
* the parts array is still reversed at this point
*
* @param array $parts
* @return array
*/
protected function remapIgnored(array $parts): array
{
foreach ($parts as $k => $part) {
if (!$this->isIgnoredPart($part)) {
break;
}

Expand Down
11 changes: 9 additions & 2 deletions src/Mapper/SalutationMapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,12 @@ class SalutationMapper extends AbstractMapper
{
protected $salutations = [];

public function __construct(array $salutations)
protected $maxIndex = 0;

public function __construct(array $salutations, $maxIndex = 0)
{
$this->salutations = $salutations;
$this->maxIndex = $maxIndex;
}

/**
Expand All @@ -22,7 +25,11 @@ public function __construct(array $salutations)
*/
public function map(array $parts): array
{
foreach ($parts as $k => $part) {
$max = ($this->maxIndex > 0) ? $this->maxIndex : floor(count($parts) / 2);

for ($k = 0; $k < $max; $k++) {
$part = $parts[$k];

if ($part instanceof AbstractPart) {
break;
}
Expand Down
30 changes: 27 additions & 3 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ class Parser
*/
protected $nicknameDelimiters = [];

/**
* @var int
*/
protected $maxSalutationIndex = 0;

public function __construct(array $languages = [])
{
if (empty($languages)) {
Expand Down Expand Up @@ -99,7 +104,7 @@ protected function getFirstSegmentParser(): Parser
$parser = new Parser();

$parser->setMappers([
new SalutationMapper($this->getSalutations()),
new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
new SuffixMapper($this->getSuffixes()),
new LastnameMapper($this->getPrefixes(), true),
new FirstnameMapper(),
Expand All @@ -117,7 +122,7 @@ protected function getSecondSegmentParser(): Parser
$parser = new Parser();

$parser->setMappers([
new SalutationMapper($this->getSalutations()),
new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
new SuffixMapper($this->getSuffixes(), true),
new NicknameMapper($this->getNicknameDelimiters()),
new InitialMapper(true),
Expand Down Expand Up @@ -149,7 +154,7 @@ public function getMappers(): array
if (empty($this->mappers)) {
$this->setMappers([
new NicknameMapper($this->getNicknameDelimiters()),
new SalutationMapper($this->getSalutations()),
new SalutationMapper($this->getSalutations(), $this->getMaxSalutationIndex()),
new SuffixMapper($this->getSuffixes()),
new InitialMapper(),
new LastnameMapper($this->getPrefixes()),
Expand Down Expand Up @@ -275,4 +280,23 @@ public function setNicknameDelimiters(array $nicknameDelimiters): Parser

return $this;
}

/**
* @return int
*/
public function getMaxSalutationIndex(): int
{
return $this->maxSalutationIndex;
}

/**
* @param int $maxSalutationIndex
* @return Parser
*/
public function setMaxSalutationIndex(int $maxSalutationIndex): Parser
{
$this->maxSalutationIndex = $maxSalutationIndex;

return $this;
}
}
10 changes: 10 additions & 0 deletions tests/Mapper/FirstnameMapperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ public function provider()
new Lastname('Pan'),
],
],
[
'input' => [
'Alfonso',
new Salutation('Mr'),
],
'expectation' => [
new Firstname('Alfonso'),
new Salutation('Mr'),
]
]
];
}

Expand Down
33 changes: 31 additions & 2 deletions tests/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -434,10 +434,24 @@ public function provider()
[
'PAUL M LEWIS MR',
[
'salutation' => 'Mr.',
'firstname' => 'Paul',
'initials' => 'M',
'lastname' => 'Lewis',
'lastname' => 'Lewis Mr',
]
],
[
'SUJAN MASTER',
[
'firstname' => 'Sujan',
'lastname' => 'Master',
],
],
[
'JAMES J MA',
[
'firstname' => 'James',
'initials' => 'J',
'lastname' => 'Ma'
]
]
];
Expand Down Expand Up @@ -494,4 +508,19 @@ public function testSetGetNicknameDelimiters()
$this->assertSame('Jim', $parser->parse('[Jim]')->getNickname());
$this->assertNotSame('Jim', $parser->parse('(Jim)')->getNickname());
}

public function testSetMaxSalutationIndex()
{
$parser = new Parser();
$this->assertSame(0, $parser->getMaxSalutationIndex());
$parser->setMaxSalutationIndex(1);
$this->assertSame(1, $parser->getMaxSalutationIndex());
$this->assertSame('', $parser->parse('Francis Mr')->getSalutation());

$parser = new Parser();
$this->assertSame(0, $parser->getMaxSalutationIndex());
$parser->setMaxSalutationIndex(2);
$this->assertSame(2, $parser->getMaxSalutationIndex());
$this->assertSame('Mr.', $parser->parse('Francis Mr')->getSalutation());
}
}

0 comments on commit 015bb01

Please sign in to comment.