You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I have been testing this parser library, intending to use it in a project, and have noted some opportunities for changes. They take the following forms:
Improvements to support for v2.1 format (while the library targets 3.0 some support exists for 2.1 so it makes sense to improve it)
An addition to support non-standard parameters which may be found "in the wild"
Comments on possible weaknesses in the current code
My changes are as follows, in patch form:
@@ -50,11 +50,13 @@
private static $Spec_ElementTypes = array(
'email' => array('internet', 'x400', 'pref'),
'adr' => array('dom', 'intl', 'postal', 'parcel', 'home', 'work', 'pref'),
'label' => array('dom', 'intl', 'postal', 'parcel', 'home', 'work', 'pref'),
- 'tel' => array('home', 'msg', 'work', 'pref', 'voice', 'fax', 'cell', 'video', 'pager', 'bbs', 'modem', 'car', 'isdn', 'pcs')
+ 'tel' => array('home', 'msg', 'work', 'pref', 'voice', 'fax', 'cell', 'video', 'pager', 'bbs', 'modem', 'car', 'isdn', 'pcs'),
+ 'url' => array('work', 'home'), // not per RFC but may be present in practice
+ 'impp' => array('personal','business','home','work','mobile','pref') // http://tools.ietf.org/html/rfc4770
);
private static $Spec_FileElements = array('photo', 'logo', 'sound');
/**
@@ -138,16 +140,23 @@
$this -> Data[] = new vCard(false, $SinglevCardRawData);
}
}
else
{
+ // Protect the BASE64 final = sign (detected by the line beginning with whitespace), otherwise the next replace will get rid of it
+ $this -> RawData = preg_replace('/(\n\s.+)=(\n)/', '$1-base64=-$2', $this -> RawData);
+
// Joining multiple lines that are split with a hard wrap and indicated by an equals sign at the end of line
+ // i.e. quoted-printable-encoded values in v2.1 vCards
$this -> RawData = str_replace("=\n", '', $this -> RawData);
// Joining multiple lines that are split with a soft wrap (space or tab on the beginning of the next line
$this -> RawData = str_replace(array("\n ", "\n\t"), '-wrap-', $this -> RawData);
+ // Restore the BASE64 =
+ $this -> RawData = str_replace("-base64=-\n", "=\n", $this -> RawData);
+
$Lines = explode("\n", $this -> RawData);
foreach ($Lines as $Line)
{
// Lines without colons are skipped because, most likely, they contain no data.
@@ -160,10 +169,11 @@
// value is just the value
list($Key, $Value) = explode(':', $Line, 2);
// Key is transformed to lowercase because, even though the element and parameter names are written in uppercase,
// it is quite possible that they will be in lower- or mixed case.
+ // The key is trimmed to allow for non-significant WSP characters as allowed by v2.1
$Key = strtolower(trim(self::Unescape($Key)));
// These two lines can be skipped as they aren't necessary at all.
if ($Key == 'begin' || $Key == 'end')
{
@@ -183,10 +193,13 @@
else
{
$Value = str_replace('-wrap-', '', $Value);
}
+ // The value is trimmed to allow for non-significant WSP characters as allowed by v2.1
+ // (This might remove white space which a v3.0 parser is meant to retain)
+ // WARNING! unescaping ; characters at this point causes the ParseStructuredValue later to parse the values wrongly!
$Value = trim(self::Unescape($Value));
$Type = array();
// Here additional parameters are parsed
$KeyParts = explode(';', $Key);
@@ -201,20 +214,22 @@
{
switch ($ParamKey)
{
case 'encoding':
$Encoding = $ParamValue;
- if ($ParamValue == 'b')
+ // Allow for both v3.0 B and v2.1 BASE64 encoding parameter values
+ //if ($ParamValue == 'b')
+ if (in_array($ParamValue, array('b', 'base64')))
{
//$Value = base64_decode($Value);
}
- elseif ($ParamValue == 'quoted-printable')
+ elseif ($ParamValue == 'quoted-printable') // v2.1
{
$Value = quoted_printable_decode($Value);
}
break;
- case 'charset':
+ case 'charset': // v2.1
if ($ParamValue != 'utf-8' && $ParamValue != 'utf8')
{
$Value = mb_convert_encoding($Value, 'UTF-8', $ParamValue);
}
break;
@@ -336,11 +351,13 @@
}
if (is_writable($TargetPath) || (!file_exists($TargetPath) && is_writable(dirname($TargetPath))))
{
$RawContent = $this -> Data[$Key][$Index]['Value'];
- if (isset($this -> Data[$Key][$Index]['Encoding']) && $this -> Data[$Key][$Index]['Encoding'] == 'b')
+ // Allow for both v3.0 B and v2.1 BASE64 encoding parameter values
+ //if (isset($this -> Data[$Key][$Index]['Encoding']) && $this -> Data[$Key][$Index]['Encoding'] == 'b')
+ if (isset($this -> Data[$Key][$Index]['Encoding']) && (in_array($this -> Data[$Key][$Index]['Encoding'], array('b', 'base64'))) )
{
$RawContent = base64_decode($RawContent);
}
$Status = file_put_contents($TargetPath, $RawContent);
return (bool)$Status;
@@ -493,10 +510,12 @@
* @return string Resulting text.
*/
private static function Unescape($Text)
{
return str_replace(array('\:', '\;', '\,', "\n"), array(':', ';', ',', ''), $Text);
+ // The line above removes encoded newlines ("\n") rather than converting them back to CRLF character sequences!
+ // "\N" should be treated in the same way as "\n"
}
/**
* Separates the various parts of a structured value according to the spec.
*
@@ -582,11 +601,13 @@
else
{
switch ($Parameter[0])
{
case 'encoding':
- if (in_array($Parameter[1], array('quoted-printable', 'b')))
+ // Allow for both v3.0 B and v2.1 BASE64 encoding parameter values
+ //if (in_array($Parameter[1], array('quoted-printable', 'b')))
+ if (in_array($Parameter[1], array('quoted-printable', 'b', 'base64')))
{
$Result['encoding'] = $Parameter[1];
}
break;
case 'charset':
The text was updated successfully, but these errors were encountered:
I have been testing this parser library, intending to use it in a project, and have noted some opportunities for changes. They take the following forms:
My changes are as follows, in patch form:
The text was updated successfully, but these errors were encountered: