/
LastnameMapper.php
225 lines (190 loc) · 5.62 KB
/
LastnameMapper.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
<?php
namespace TheIconic\NameParser\Mapper;
use TheIconic\NameParser\LanguageInterface;
use TheIconic\NameParser\Part\AbstractPart;
use TheIconic\NameParser\Part\Lastname;
use TheIconic\NameParser\Part\LastnamePrefix;
use TheIconic\NameParser\Part\Nickname;
use TheIconic\NameParser\Part\Salutation;
use TheIconic\NameParser\Part\Suffix;
class LastnameMapper extends AbstractMapper
{
protected $prefixes = [];
protected $matchSinglePart = false;
public function __construct(array $prefixes, bool $matchSinglePart = false)
{
$this->prefixes = $prefixes;
$this->matchSinglePart = $matchSinglePart;
}
/**
* map lastnames in the parts array
*
* @param array $parts the name parts
* @return array the mapped parts
*/
public function map(array $parts): array
{
if (!$this->matchSinglePart && count($parts) < 2) {
return $parts;
}
return $this->mapParts($parts);
}
/**
* we map the parts in reverse order because it makes more
* sense to parse for the lastname starting from the end
*
* @param array $parts
* @return array
*/
protected function mapParts(array $parts): array
{
$k = $this->skipIgnoredParts($parts) + 1;
$remapIgnored = true;
while (--$k >= 0) {
$part = $parts[$k];
if ($part instanceof AbstractPart) {
break;
}
if ($this->isFollowedByLastnamePart($parts, $k)) {
if ($this->isApplicablePrefix($parts, $k)) {
$parts[$k] = new LastnamePrefix($part, $this->prefixes[$this->getKey($part)]);
continue;
}
if ($this->shouldStopMapping($parts, $k)) {
break;
}
}
$parts[$k] = new Lastname($part);
$remapIgnored = false;
}
if ($remapIgnored) {
$parts = $this->remapIgnored($parts);
}
return $parts;
}
/**
* skip through the parts we want to ignore and return the start index
*
* @param array $parts
* @return int
*/
protected function skipIgnoredParts(array $parts): int
{
$k = count($parts);
while (--$k >= 0) {
if (!$this->isIgnoredPart($parts[$k])) {
break;
}
}
return $k;
}
/**
* indicates if we should stop mapping at the give index $k
*
* the assumption is that lastname parts have already been found
* but we want to see if we should add more parts
*
* @param array $parts
* @param int $k
* @return bool
*/
protected function shouldStopMapping(array $parts, int $k): bool
{
if ($k < 1) {
return true;
}
if ($parts[$k + 1] instanceof LastnamePrefix) {
return true;
}
return strlen($parts[$k + 1]->getValue()) >= 3;
}
/**
* indicates if the given part should be ignored (skipped) during mapping
*
* @param $part
* @return bool
*/
protected function isIgnoredPart($part) {
return $part instanceof Suffix || $part instanceof Nickname || $part instanceof Salutation;
}
/**
* remap ignored parts as lastname
*
* if the mapping did not derive any lastname this is called to transform
* any previously ignored parts into lastname parts
* the parts array is still reversed at this point
*
* @param array $parts
* @return array
*/
protected function remapIgnored(array $parts): array
{
$k = count($parts);
while (--$k >= 0) {
$part = $parts[$k];
if (!$this->isIgnoredPart($part)) {
break;
}
$parts[$k] = new Lastname($part);
}
return $parts;
}
/**
* @param array $parts
* @param int $index
* @return bool
*/
protected function isFollowedByLastnamePart(array $parts, int $index): bool
{
$next = $this->skipNicknameParts($parts, $index + 1);
return (isset($parts[$next]) && $parts[$next] instanceof Lastname);
}
/**
* Assuming that the part at the given index is matched as a prefix,
* determines if the prefix should be applied to the lastname.
*
* We only apply it to the lastname if we already have at least one
* lastname part and there are other parts left in
* the name (this effectively prioritises firstname over prefix matching).
*
* This expects the parts array and index to be in the original order.
*
* @param array $parts
* @param int $index
* @return bool
*/
protected function isApplicablePrefix(array $parts, int $index): bool
{
if (!$this->isPrefix($parts[$index])) {
return false;
}
return $this->hasUnmappedPartsBefore($parts, $index);
}
/**
* check if the given word is a lastname prefix
*
* @param string $word the word to check
* @return bool
*/
protected function isPrefix($word): bool
{
return (array_key_exists($this->getKey($word), $this->prefixes));
}
/**
* find the next non-nickname index in parts
*
* @param $parts
* @param $startIndex
* @return int|void
*/
protected function skipNicknameParts($parts, $startIndex)
{
$total = count($parts);
for ($i = $startIndex; $i < $total; $i++) {
if (!($parts[$i] instanceof Nickname)) {
return $i;
}
}
return $total - 1;
}
}