Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mantis 19934 - Parse CSV by CSV standard - rfc4180 #593

Merged
merged 1 commit into from Oct 7, 2019
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

Use fgetcsv() to parse an import file.

  • Loading branch information
bramley committed Sep 23, 2019
commit 62fa91683166f6a7b06fe621da7011730447df47
@@ -0,0 +1,55 @@
<?php
class CsvReader
{
private $fh;
private $delimiter;
private $totalRows;
const ENCLOSURE = '"';
const ESCAPE = "\0";
/**
* Constructor.
* Setting auto_detect_line_endings is needed to allow CR as line separator.
* Read all rows from the file to get the count of rows ignoring empty lines.
*/
public function __construct($filename, $delimiter)
{
ini_set('auto_detect_line_endings', true);
$this->fh = fopen($filename, 'r');
$this->delimiter = $delimiter;
$this->totalRows = 0;
while ($row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE)) {
if ($row[0] !== null) {
++$this->totalRows;
}
}
rewind($this->fh);
}
/**
* Return the number of rows in the file.
*
* @return int
*/
public function totalRows()
{
return $this->totalRows;
}
/**
* Return the result of calling fgetcsv() ignoring empty lines.
*
* @return array|false|null
*/
public function getRow()
{
do {
$row = fgetcsv($this->fh, 0, $this->delimiter, self::ENCLOSURE, SELF::ESCAPE);
} while ($row && $row[0] === null);
return $row;
}
}
@@ -4,6 +4,7 @@
require dirname(__FILE__).'/../structure.php';
require dirname(__FILE__).'/../inc/importlib.php';
require dirname(__FILE__).'/../CsvReader.php';
@ob_end_flush();
$status = 'FAIL';
@@ -12,36 +13,24 @@
if (filesize($_SESSION['import_file']) > 50000) {
@ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file']));
}
$email_list = file_get_contents($_SESSION['import_file']);
flush();
// Clean up email file
$email_list = trim($email_list);
$email_list = str_replace("\r", "\n", $email_list);
$email_list = str_replace("\n\r", "\n", $email_list);
$email_list = str_replace("\n\n", "\n", $email_list);
if ($_SESSION['import_record_delimiter'] != "\n") {
$email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list);
}
// Split file/emails into array
$email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array
output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'</p>', count($email_list)));
$header = array_shift($email_list);
$total = count($email_list);
$headers = str_getcsv($header, $_SESSION['import_field_delimiter']);
$csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']);
$total = $csvReader->totalRows();
output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'</p>', $total));
--$total; // now the number of subscribers to be imported
$headers = $csvReader->getRow();
$headers = array_unique($headers);
$_SESSION['columnnames'] = $headers;
//## show progress and adjust working space
if (count($email_list)) {
if ($total > 0) {
$import_field_delimiter = $_SESSION['import_field_delimiter'];
if (count($email_list) > 300 && !$_SESSION['test_import']) {
if ($total > 300 && !$_SESSION['test_import']) {
// this is a possibly a time consuming process, so show a progress bar
echo '<script language="Javascript" type="text/javascript"> document.write(progressmeter); start();</script>';
flush();
// increase the memory to make sure we are not running out
// $mem = sizeof($email_list);
ini_set('memory_limit', '32M');
}
@@ -65,18 +54,17 @@
$c = 1;
$count['invalid_email'] = 0;
$num_lists = count($_SESSION['lists']);
$total = count($email_list);
$cnt = 0;
$count['emailmatch'] = 0;
$count['fkeymatch'] = 0;
$count['dataupdate'] = 0;
$count['duplicate'] = 0;
$additional_emails = 0;
foreach ($email_list as $line) {
while ($values = $csvReader->getRow()) {
set_time_limit(60);
// will contain attributes to store / change
$user = array();
$values = str_getcsv($line, $_SESSION['import_field_delimiter']);
$system_values = array();
foreach ($system_attribute_mapping as $column => $index) {
// print '<br/>'.$column . ' = '. $values[$index];
@@ -114,8 +102,8 @@
$replace = array();
foreach ($_SESSION['import_attribute'] as $key => $val) {
if (!empty($values[$val['index']])) {
$user[$val['index']] = addslashes($values[$val['index']]);
$replace[$key] = addslashes($values[$val['index']]);
$user[$val['index']] = htmlspecialchars($values[$val['index']]);
$replace[$key] = htmlspecialchars($values[$val['index']]);
}
}
} else {
@@ -20,7 +20,6 @@
'#',
"\t",
);
$email_list = array();
$attributes = array();
if (!isset($everyone_groupid)) {
@@ -39,8 +38,9 @@
'http://php.net/post_max_size', 'http://php.net/post_max_size')));
}
require dirname(__FILE__).'//structure.php';
require dirname(__FILE__).'/structure.php';
require dirname(__FILE__).'/inc/importlib.php';
require dirname(__FILE__).'/CsvReader.php';
register_shutdown_function('my_shutdown');
if (!defined('WEBBLER')) {
@@ -184,43 +184,16 @@
if (filesize($_SESSION['import_file']) > 50000) {
@ini_set('memory_limit', memory_get_usage() + 50 * filesize($_SESSION['import_file']));
}
$email_list = file_get_contents($_SESSION['import_file']);
flush();
if (!isset($_SESSION['import_attribute'])) {
$_SESSION['import_attribute'] = array();
}
// Clean up email file
$email_list = trim($email_list);
$email_list = str_replace("\r", "\n", $email_list);
$email_list = str_replace("\n\r", "\n", $email_list);
$email_list = str_replace("\n\n", "\n", $email_list);
if ($_SESSION['import_record_delimiter'] != "\n") {
$email_list = str_replace($_SESSION['import_record_delimiter'], "\n", $email_list);
}
// not sure if we need to check on errors
/*
for($i=0; $i<count($illegal_cha); $i++) {
if( ($illegal_cha[$i] != $import_field_delimiter) && ($illegal_cha[$i] != $import_record_delimiter) && (strpos($header, $illegal_cha[$i]) != false) ) {
$errpos = strpos($email_list, $illegal_cha[$i]);
$startpos = ( $errpos > 20 ) ? $errpos - 20 : 0;
print '<h3>';
printf($GLOBALS['I18N']->get('Error was around here &quot;%s&quot;'),substr( $email_list, $startpos, 40 ));
print '</h3>';
printf('<h3>',$GLOBALS['I18N']->get('Illegal character was %s').'</h3>',$illegal_cha[$i]);
Fatal_Error($GLOBALS['I18N']->get('A character has been found in the import which is not the delimiter indicated, but is likely to be confused for one. Please clean up your import file and try again')." $import_field_delimiter, $import_record_delimiter");
return;
}
};
*/
// Split file/emails into array
$email_list = explode("\n", $email_list); //WARNING the file contents get replace by an array
output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'</p>', count($email_list)));
$header = array_shift($email_list);
$total = count($email_list);
$headers = str_getcsv($header, $_SESSION['import_field_delimiter']);
$csvReader = new CsvReader($_SESSION['import_file'], $_SESSION['import_field_delimiter']);
$total = $csvReader->totalRows();
output(sprintf('..'.$GLOBALS['I18N']->get('ok, %d lines').'</p>', $total));
--$total; // now the number of subscribers to be imported
$headers = $csvReader->getRow();
$headers = array_unique($headers);
$_SESSION['columnnames'] = $headers;
@@ -436,8 +409,8 @@
echo '<p>'.PageLinkButton($_GET['page'].'&amp;confirm=yes', $GLOBALS['I18N']->get('Confirm Import')).'</p>';
echo '<h3>'.$GLOBALS['I18N']->get('Test Output').'</h3>';
// dbg($_SESSION["import_attribute"]);
} elseif (count($email_list)) {
echo '<h3>'.s('Importing %d subscribers to %d lists, please wait', count($email_list),
} elseif (isset($_GET['confirm']) || isset($_POST['import'])) {
echo '<h3>'.s('Importing %d subscribers to %d lists, please wait', $total,
count($_SESSION['lists'])).'</h3>';
echo $GLOBALS['img_busy'];
echo '<div id="progresscount" style="width: 200; height: 50;">Progress</div>';
@@ -448,14 +421,13 @@
//var_dump($system_attributes);
//## show progress and adjust working space
if (count($email_list)) {
if (!empty($_SESSION['test_import'])) {
$import_field_delimiter = $_SESSION['import_field_delimiter'];
if (count($email_list) > 300 && !$_SESSION['test_import']) {
if ($total > 300 && !$_SESSION['test_import']) {
// this is a possibly a time consuming process, so show a progress bar
echo '<script language="Javascript" type="text/javascript"> document.write(progressmeter); start();</script>';
flush();
// increase the memory to make sure we are not running out
// $mem = sizeof($email_list);
ini_set('memory_limit', '32M');
}
@@ -494,18 +466,17 @@
$c = 1;
$count['invalid_email'] = 0;
$num_lists = count($_SESSION['lists']);
$total = count($email_list);
$cnt = 0;
$count['emailmatch'] = 0;
$count['fkeymatch'] = 0;
$count['dataupdate'] = 0;
$count['duplicate'] = 0;
$additional_emails = 0;
foreach ($email_list as $line) {
while ($values = $csvReader->getRow()) {
set_time_limit(60);
// will contain attributes to store / change
$user = array();
$values = str_getcsv($line, $_SESSION['import_field_delimiter']);
$system_values = array();
foreach ($system_attribute_mapping as $column => $index) {
// print '<br/>'.$column . ' = '. $values[$index];
@@ -540,8 +511,8 @@
$replace = array();
foreach ($_SESSION['import_attribute'] as $key => $val) {
if (!empty($values[$val['index']])) {
$user[$val['index']] = addslashes($values[$val['index']]);
$replace[$key] = addslashes($values[$val['index']]);
$user[$val['index']] = htmlspecialchars($values[$val['index']]);
$replace[$key] = htmlspecialchars($values[$val['index']]);
}
}
} else {
@@ -714,7 +685,7 @@
<td><?php echo $GLOBALS['I18N']->get('Test output') ?>:</td>
<td><input type="checkbox" name="import_test" value="yes" checked="checked"/><?php echo Help('testoutput'); ?></td>
</tr>

<tr>
<td><?php echo $GLOBALS['I18N']->get('Show Warnings') ?>:</td>
<td><input type="checkbox" name="show_warnings" value="yes"/><?php echo Help('showwarnings'); ?></td>
@@ -728,12 +699,12 @@
<td><input type="text" name="assign_invalid" value="<?php echo $GLOBALS['assign_invalid_default'] ?>"/><?php echo Help('assigninvalid'); ?>
</td>
</tr>

<tr>
<td><?php echo $GLOBALS['I18N']->get('Overwrite Existing') ?>:</td>
<td><input type="checkbox" name="overwrite" value="yes" checked="checked"/><?php echo Help('overwriteexisting'); ?></td>
</tr>

<tr>
<td><?php echo $GLOBALS['I18N']->get('Retain Old User Email') ?>:</td>
<td><input type="checkbox" name="retainold" value="yes"/><?php echo Help('retainoldemail'); ?></td>
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.