Skip to content

Commit

Permalink
Merge pull request #33 from mennodekker/master
Browse files Browse the repository at this point in the history
Fix for #30
  • Loading branch information
tiamo committed Jun 16, 2019
2 parents d2feb9d + e760ef3 commit 1b6ec69
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 70 deletions.
116 changes: 66 additions & 50 deletions src/Sav/Record/Data.php
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ public function read(Buffer $buffer)
for ($case = 0; $case < $casesCount; $case++) {
$parent = -1;
$octs = 0;
foreach ($variables as $index => $var) {

$varCount = count($variables);
$varNum = 0;
for($index = 0; $index < $varCount; $index++) {
$var = $variables[$index];
$isNumeric = $var->width == 0;
$width = isset($var->write[2]) ? $var->write[2] : $var->width;

Expand All @@ -96,7 +98,7 @@ public function read(Buffer $buffer)

if ($isNumeric) {
if (! $compressed) {
$this->matrix[$case][$index] = $buffer->readDouble();
$this->matrix[$case][$varNum] = $buffer->readDouble();
} else {
$opcode = $this->readOpcode($buffer);
switch ($opcode) {
Expand All @@ -108,61 +110,64 @@ public function read(Buffer $buffer)
);
break;
case self::OPCODE_RAW_DATA;
$this->matrix[$case][$index] = $buffer->readDouble();
$this->matrix[$case][$varNum] = $buffer->readDouble();
break;
case self::OPCODE_SYSMISS;
$this->matrix[$case][$index] = $sysmis;
$this->matrix[$case][$varNum] = $sysmis;
break;
default:
$this->matrix[$case][$index] = $opcode - $bias;
$this->matrix[$case][$varNum] = $opcode - $bias;
break;
}
}
} else {
do {
$val = '';
if (! $compressed) {
$val = $buffer->readString(8);
} else {
$opcode = $this->readOpcode($buffer);
switch ($opcode) {
case self::OPCODE_NOP;
break;
case self::OPCODE_EOF;
throw new Exception(
'Error reading data: unexpected end of compressed data file (cluster code 252)'
);
break;
case self::OPCODE_RAW_DATA;
$val = $buffer->readString(8);
break;
case self::OPCODE_WHITESPACES;
$val = ' ';
break;
}
}

if ($parent >= 0) {
$this->matrix[$case][$parent] .= $val;
$octs--;
if ($octs <= 0) {
$this->matrix[$case][$parent] = rtrim($this->matrix[$case][$parent]);
$parent = -1;
$width = isset($veryLongStrings[$var->name]) ? $veryLongStrings[$var->name] : $width;
$this->matrix[$case][$varNum] = '';
$segmentsCount = Utils::widthToSegments($width);
$opcode = self::OPCODE_RAW_DATA;
$index = $index - 1;
for ($s = 0; $s < $segmentsCount; $s++) {
$segWidth = Utils::segmentAllocWidth($width, $s);
$octs = Utils::widthToOcts($segWidth);
$index = $index + $octs; // Skip a few variables for this segment
if ($opcode === self::OPCODE_NOP || $opcode === self::OPCODE_EOF) {
// If next segments are empty too, skip
$continue;
}
for ($i = $segWidth; $i > 0; $i -= 8) {
if ($segWidth = 255) {
$chunkSize = min($i, 8);
} else {
$chunkSize = 8;
}
} else {
$width = isset($veryLongStrings[$var->name]) ? $veryLongStrings[$var->name] : $width;
if ($width > 0) {
$octs = Utils::widthToOcts($width) - 1; // Buffer::roundUp($width, 8) / 8) -1;
if ($octs > 0) {
$parent = $index;
} else {
$val = rtrim($val);

$val = '';
if (! $compressed) {
$val = $buffer->readString(8);
} else {
$opcode = $this->readOpcode($buffer);
switch ($opcode) {
case self::OPCODE_NOP;
break 2;
case self::OPCODE_EOF;
throw new Exception(
'Error reading data: unexpected end of compressed data file (cluster code 252)'
);
break 2;
case self::OPCODE_RAW_DATA;
$val = $buffer->readString(8);
break;
case self::OPCODE_WHITESPACES;
$val = ' ';
break;
}
$this->matrix[$case][$index] = $val;
}
$this->matrix[$case][$varNum] .= $val;
}
} while ($octs > 0);
$this->matrix[$case][$varNum] = rtrim($this->matrix[$case][$varNum]);
}
}
$varNum++;
}
}
}
Expand Down Expand Up @@ -208,13 +213,18 @@ public function write(Buffer $buffer)

/** @var Record\Info[] $info */
$info = $buffer->context->info;

$veryLongStrings = [];
if (isset($info[Record\Info\VeryLongString::SUBTYPE])) {
$veryLongStrings = $info[Record\Info\VeryLongString::SUBTYPE]->toArray();
}

if (isset($info[Record\Info\MachineFloatingPoint::SUBTYPE])) {
$sysmis = $info[Record\Info\MachineFloatingPoint::SUBTYPE]->sysmis;
} else {
$sysmis = NAN;
}

$dataBuffer = Buffer::factory('', ['memory' => true]);

for ($case = 0; $case < $casesCount; $case++) {
Expand Down Expand Up @@ -243,20 +253,26 @@ public function write(Buffer $buffer)
$buffer->writeString($value, Utils::roundUp($width, 8));
} else {
$offset = 0;
$width = isset($veryLongStrings[$var->name]) ? $veryLongStrings[$var->name] : $width;
$segmentsCount = Utils::widthToSegments($width);
for ($s = 0; $s < $segmentsCount; $s++) {
$segWidth = Utils::segmentAllocWidth($width, $s);
for ($i = $segWidth; $i > 0; $i -= 8, $offset += 8) {
// $chunkSize = min($i, 8);
$val = substr($value, $offset, 8); // Read 8 byte segements, don't use mbsubstr here
for ($i = $segWidth; $i > 0; $i -= 8) {
if ($segWidth = 255) {
$chunkSize = min($i, 8);
} else {
$chunkSize = 8;
}
$val = substr($value, $offset, $chunkSize); // Read 8 byte segements, don't use mbsubstr here
if ($val == "") {
$this->writeOpcode($buffer, $dataBuffer, self::OPCODE_WHITESPACES);
} else {
$this->writeOpcode($buffer, $dataBuffer, self::OPCODE_RAW_DATA);
$dataBuffer->writeString($val, 8);
}
$offset += $chunkSize;
}
}
}
}
}
}
Expand Down
6 changes: 5 additions & 1 deletion src/Sav/Record/Info.php
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ public function offsetGet($offset)
*/
public function offsetSet($offset, $value)
{
$this->data[$offset] = $value;
if (is_null($offset)) {
$this->data[] = $value;
} else {
$this->data[$offset] = $value;
}
}

/**
Expand Down
10 changes: 10 additions & 0 deletions src/Sav/Record/Info/CharacterEncoding.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ class CharacterEncoding extends Info
* @var string
*/
public $value;

/**
* Record constructor.
*
* @param array $data
*/
public function __construct($value)
{
$this->value = $value;
}

/**
* @param Buffer $buffer
Expand Down
5 changes: 3 additions & 2 deletions src/Sav/Record/Info/VeryLongString.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@ public function read(Buffer $buffer)
public function write(Buffer $buffer)
{
if ($this->data) {
$data = '';
$data = [];
foreach ($this->data as $key => $value) {
$data .= sprintf('%s=%05d%c', $key, $value, 0) . self::DELIMITER;
$data[] = sprintf('%s=%05d%c', $key, $value, 0);
}
$data = join(self::DELIMITER, $data);
$this->dataCount = strlen($data);
parent::write($buffer);
$buffer->writeString($data);
Expand Down
16 changes: 10 additions & 6 deletions src/Sav/Record/Variable.php
Original file line number Diff line number Diff line change
Expand Up @@ -169,18 +169,22 @@ public function write(Buffer $buffer)

// Write additional segments for very long string variables.
if (self::isVeryLong($this->width)) {
$this->writeBlank($buffer, $seg0width);
$segmentCount = Utils::widthToSegments($this->width);
for ($i = 1; $i < $segmentCount; $i++) {
$segmentWidth = Utils::segmentAllocWidth($this->width, $i);
$format = Utils::bytesToInt([0, max($segmentWidth, 1), 1, 0]);

$format = Utils::bytesToInt([0, 1, max($segmentWidth, 1), 0]);
$buffer->writeInt(self::TYPE);
$buffer->writeInt($segmentWidth);
$buffer->writeInt(0); // No variable label
$buffer->writeInt($hasLabel); // No variable label
$buffer->writeInt(0); // No missing values
$buffer->writeInt($format); // Print format
$buffer->writeInt($format); // Write format
$buffer->writeString($this->getSegmentName($i), 8);
$buffer->writeString($this->getSegmentName($i - 1), 8);
if ($hasLabel) {
$buffer->writeInt($labelLengthBytes);
$buffer->writeString($label, Utils::roundUp($labelLengthBytes, 4));
}

$this->writeBlank($buffer, $segmentWidth);
}
Expand Down Expand Up @@ -214,8 +218,8 @@ public function getSegmentName($seg = 0)
{
// TODO: refactory
$name = $this->name;
$name = mb_substr($name, 0, 8);
$name = mb_substr($name, 0, -mb_strlen($seg)) . $seg;
$name = mb_substr($name, 0, 6);
$name .= $seg;

return mb_strtoupper($name);
}
Expand Down
7 changes: 4 additions & 3 deletions src/Sav/Writer.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ public function write($data)
$this->info[Record\Info\VariableAttributes::SUBTYPE] = new Record\Info\VariableAttributes();
$this->info[Record\Info\LongStringValueLabels::SUBTYPE] = new Record\Info\LongStringValueLabels();
$this->info[Record\Info\LongStringMissingValues::SUBTYPE] = new Record\Info\LongStringMissingValues();
$this->info[Record\Info\CharacterEncoding::SUBTYPE] = new Record\Info\CharacterEncoding('UTF-8');

$this->data = new Record\Data();

Expand Down Expand Up @@ -118,8 +119,8 @@ public function write($data)

$variable = new Record\Variable();

// TODO: refactory
$variable->name = 'V' . str_pad($idx + 1, 7, 0, STR_PAD_LEFT);
// TODO: refactory - keep 7 positions so we can add after that for 100 very long string segments
$variable->name = 'V' . str_pad($idx + 1, 5, 0, STR_PAD_LEFT);
// $variable->name = strtoupper($var->name);

// TODO: test
Expand Down Expand Up @@ -197,7 +198,7 @@ public function write($data)

$segmentCount = Utils::widthToSegments($var->width);
for ($i = 0; $i < $segmentCount; $i++) {
$this->info[Record\Info\VariableDisplayParam::SUBTYPE][$idx] = [
$this->info[Record\Info\VariableDisplayParam::SUBTYPE][] = [
$var->getMeasure(),
$var->getColumns(),
$var->getAlignment(),
Expand Down
2 changes: 1 addition & 1 deletion src/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ public static function widthToBytes($width)
} else {
$chunks = $width / Variable::EFFECTIVE_VLS_CHUNK;
$remainder = $width % Variable::EFFECTIVE_VLS_CHUNK;
$bytes = $remainder + ($chunks * self::roundUp(Variable::REAL_VLS_CHUNK, 8));
$bytes = floor($chunks) * Variable::REAL_VLS_CHUNK + $remainder;
}

return self::roundUp($bytes, 8);
Expand Down
68 changes: 68 additions & 0 deletions tests/LongStringTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php

namespace SPSS\Tests;

use SPSS\Sav\Reader;
use SPSS\Sav\Variable;
use SPSS\Sav\Writer;

class LongStringTest extends TestCase
{

public function testLongString()
{
$firstLong = str_repeat('1234567890', 30);
$secondLong = str_repeat('abcdefghij', 30);
$data = [
'header' => [
'prodName' => '@(#) IBM SPSS STATISTICS',
'layoutCode' => 2,
'creationDate' => '08 May 19',
'creationTime' => '12:22:16',
],
'variables' => [
[
'name' => 'long',
'label' => 'long label',
'width' => 300,
'format' => Variable::FORMAT_TYPE_A,
'attributes' => [
'$@Role' => Variable::ROLE_INPUT,
],
'data' => [
$firstLong,
$secondLong
]
],
[
'name' => 'short',
'label' => 'short label',
'format' => Variable::FORMAT_TYPE_A,
'width' => 8,
'attributes' => [
'$@Role' => Variable::ROLE_INPUT,
],
'data' => [
'12345678',
'abcdefgh'
],
],
],
];
$writer = new Writer($data);

// Uncomment if you want to really save and check the resulting file in SPSS
//$writer->save('longString.sav');
$buffer = $writer->getBuffer();
$buffer->rewind();

$reader = Reader::fromString($buffer->getStream())->read();

$expected[0][0] = $data['variables'][0]['data'][0];
$expected[0][1] = $data['variables'][1]['data'][0];
$expected[1][0] = $data['variables'][0]['data'][1];
$expected[1][1] = $data['variables'][1]['data'][1];
$this->assertEquals($expected, $reader->data);
}

}

0 comments on commit 1b6ec69

Please sign in to comment.