-
Notifications
You must be signed in to change notification settings - Fork 822
/
CsvBulkLoader.php
355 lines (310 loc) · 12.2 KB
/
CsvBulkLoader.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
<?php
namespace SilverStripe\Dev;
use League\Csv\MapIterator;
use League\Csv\Reader;
use SilverStripe\Control\Director;
use SilverStripe\ORM\DataObject;
/**
* Utility class to facilitate complex CSV-imports by defining column-mappings
* and custom converters.
*
* Uses the fgetcsv() function to process CSV input. Accepts a file-handler as
* input.
*
* @see http://tools.ietf.org/html/rfc4180
*
* @todo Support for deleting existing records not matched in the import
* (through relation checks)
*/
class CsvBulkLoader extends BulkLoader
{
/**
* Delimiter character (Default: comma).
*
* @var string
*/
public $delimiter = ',';
/**
* Enclosure character (Default: doublequote)
*
* @var string
*/
public $enclosure = '"';
/**
* Identifies if csv the has a header row.
*
* @var boolean
*/
public $hasHeaderRow = true;
/**
* Number of lines to split large CSV files into.
*
* @var int
*
* @config
*/
private static $lines = 1000;
/**
* @inheritDoc
*/
public function preview($filepath)
{
return $this->processAll($filepath, true);
}
/**
* @param string $filepath
* @param boolean $preview
*
* @return null|BulkLoader_Result
*/
protected function processAll($filepath, $preview = false)
{
$this->extend('onBeforeProcessAll', $filepath, $preview);
$result = BulkLoader_Result::create();
try {
$filepath = Director::getAbsFile($filepath);
$csvReader = Reader::createFromPath($filepath, 'r');
$csvReader->setDelimiter($this->delimiter);
$csvReader->skipInputBOM();
$tabExtractor = function ($row, $rowOffset) {
foreach ($row as &$item) {
// [SS-2017-007] Ensure all cells with leading tab and then [@=+] have the tab removed on import
if (preg_match("/^\t[\-@=\+]+.*/", $item ?? '')) {
$item = ltrim($item ?? '', "\t");
}
}
return $row;
};
if ($this->columnMap) {
$headerMap = $this->getNormalisedColumnMap();
$remapper = function ($row, $rowOffset) use ($headerMap, $tabExtractor) {
$row = $tabExtractor($row, $rowOffset);
foreach ($headerMap as $column => $renamedColumn) {
if ($column == $renamedColumn) {
continue;
}
if (array_key_exists($column, $row ?? [])) {
if (strpos($renamedColumn ?? '', '_ignore_') !== 0) {
$row[$renamedColumn] = $row[$column];
}
unset($row[$column]);
}
}
return $row;
};
} else {
$remapper = $tabExtractor;
}
if ($this->hasHeaderRow) {
if (method_exists($csvReader, 'fetchAssoc')) {
$rows = $csvReader->fetchAssoc(0, $remapper);
} else {
$csvReader->setHeaderOffset(0);
$rows = new MapIterator($csvReader->getRecords(), $remapper);
}
} elseif ($this->columnMap) {
if (method_exists($csvReader, 'fetchAssoc')) {
$rows = $csvReader->fetchAssoc($headerMap, $remapper);
} else {
$rows = new MapIterator($csvReader->getRecords($headerMap), $remapper);
}
}
foreach ($rows as $row) {
$this->processRecord($row, $this->columnMap, $result, $preview);
}
} catch (\Exception $e) {
$failedMessage = sprintf("Failed to parse %s", $filepath);
if (Director::isDev()) {
$failedMessage = sprintf($failedMessage . " because %s", $e->getMessage());
}
print $failedMessage . PHP_EOL;
}
$this->extend('onAfterProcessAll', $result, $preview);
return $result;
}
protected function getNormalisedColumnMap()
{
$map = [];
foreach ($this->columnMap as $column => $newColumn) {
if (strpos($newColumn ?? '', "->") === 0) {
$map[$column] = $column;
} elseif (is_null($newColumn)) {
// the column map must consist of unique scalar values
// `null` can be present multiple times and is not scalar
// so we name it in a standard way so we can remove it later
$map[$column] = '_ignore_' . $column;
} else {
$map[$column] = $newColumn;
}
}
return $map;
}
/**
* @todo Better messages for relation checks and duplicate detection
* Note that columnMap isn't used.
*
* @param array $record
* @param array $columnMap
* @param BulkLoader_Result $results
* @param boolean $preview
*
* @return int
*/
protected function processRecord($record, $columnMap, &$results, $preview = false)
{
$class = $this->objectClass;
// find existing object, or create new one
$existingObj = $this->findExistingObject($record, $columnMap);
/** @var DataObject $obj */
$obj = ($existingObj) ? $existingObj : new $class();
$schema = DataObject::getSchema();
// first run: find/create any relations and store them on the object
// we can't combine runs, as other columns might rely on the relation being present
foreach ($record as $fieldName => $val) {
// don't bother querying of value is not set
if ($this->isNullValue($val)) {
continue;
}
// checking for existing relations
if (isset($this->relationCallbacks[$fieldName])) {
// trigger custom search method for finding a relation based on the given value
// and write it back to the relation (or create a new object)
$relationName = $this->relationCallbacks[$fieldName]['relationname'];
/** @var DataObject $relationObj */
$relationObj = null;
if ($this->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $this->{$this->relationCallbacks[$fieldName]['callback']}($obj, $val, $record);
} elseif ($obj->hasMethod($this->relationCallbacks[$fieldName]['callback'])) {
$relationObj = $obj->{$this->relationCallbacks[$fieldName]['callback']}($val, $record);
}
if (!$relationObj || !$relationObj->exists()) {
$relationClass = $schema->hasOneComponent(get_class($obj), $relationName);
$relationObj = new $relationClass();
//write if we aren't previewing
if (!$preview) {
$relationObj->write();
}
}
$obj->{"{$relationName}ID"} = $relationObj->ID;
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
} elseif (strpos($fieldName ?? '', '.') !== false) {
// we have a relation column with dot notation
[$relationName, $columnName] = explode('.', $fieldName ?? '');
// always gives us an component (either empty or existing)
$relationObj = $obj->getComponent($relationName);
if (!$preview) {
$relationObj->write();
}
$obj->{"{$relationName}ID"} = $relationObj->ID;
//write if we are not previewing
if (!$preview) {
$obj->write();
$obj->flushCache(); // avoid relation caching confusion
}
}
}
// second run: save data
foreach ($record as $fieldName => $val) {
// break out of the loop if we are previewing
if ($preview) {
break;
}
// look up the mapping to see if this needs to map to callback
$mapped = $this->columnMap && isset($this->columnMap[$fieldName]);
if ($mapped && strpos($this->columnMap[$fieldName] ?? '', '->') === 0) {
$funcName = substr($this->columnMap[$fieldName] ?? '', 2);
$this->$funcName($obj, $val, $record);
} elseif ($obj->hasMethod("import{$fieldName}")) {
$obj->{"import{$fieldName}"}($val, $record);
} else {
$obj->update([$fieldName => $val]);
}
}
$isChanged = $obj->isChanged();
// write record
if (!$preview) {
$obj->write();
}
// @todo better message support
$message = '';
// save to results
if ($existingObj) {
// We mark as updated regardless of isChanged, since custom formatters and importers
// might have affected relationships and other records.
$results->addUpdated($obj, $message);
} else {
$results->addCreated($obj, $message);
}
$this->extend('onAfterProcessRecord', $obj, $preview, $isChanged);
$objID = $obj->ID;
$obj->destroy();
// memory usage
unset($existingObj, $obj);
return $objID;
}
/**
* Find an existing objects based on one or more uniqueness columns
* specified via {@link self::$duplicateChecks}.
*
* @todo support $columnMap
*
* @param array $record CSV data column
* @param array $columnMap
* @return DataObject
*/
public function findExistingObject($record, $columnMap = [])
{
$SNG_objectClass = singleton($this->objectClass);
// checking for existing records (only if not already found)
foreach ($this->duplicateChecks as $fieldName => $duplicateCheck) {
$existingRecord = null;
if (is_string($duplicateCheck)) {
// Skip current duplicate check if field value is empty
if (empty($record[$duplicateCheck])) {
continue;
}
// Check existing record with this value
$dbFieldValue = $record[$duplicateCheck];
$existingRecord = DataObject::get($this->objectClass)
->filter($duplicateCheck, $dbFieldValue)
->first();
if ($existingRecord) {
return $existingRecord;
}
} elseif (is_array($duplicateCheck) && isset($duplicateCheck['callback'])) {
if ($this->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $this->{$duplicateCheck['callback']}($record[$fieldName], $record);
} elseif ($SNG_objectClass->hasMethod($duplicateCheck['callback'])) {
$existingRecord = $SNG_objectClass->{$duplicateCheck['callback']}($record[$fieldName], $record);
} else {
throw new \RuntimeException(
"CsvBulkLoader::processRecord():"
. " {$duplicateCheck['callback']} not found on importer or object class."
);
}
if ($existingRecord) {
return $existingRecord;
}
} else {
throw new \InvalidArgumentException(
'CsvBulkLoader::processRecord(): Wrong format for $duplicateChecks'
);
}
}
return false;
}
/**
* Determine whether any loaded files should be parsed with a
* header-row (otherwise we rely on {@link self::$columnMap}.
*
* @return boolean
*/
public function hasHeaderRow()
{
return ($this->hasHeaderRow || isset($this->columnMap));
}
}