Merge a44a7fd into 6204ac6

slicebox · Mar 23, 2018 · ca360ed · ca360ed
2 parents 6204ac6 + a44a7fd
commit ca360ed
Show file tree

Hide file tree

Showing 18 changed files with 1,113 additions and 252 deletions.
diff --git a/build.sbt b/build.sbt
@@ -4,7 +4,7 @@ import DicomSourceGenerators._
 name := "dicom-streams"
 version := "0.1-SNAPSHOT"
 organization := "se.nimsa"
-scalaVersion := "2.12.4"
+scalaVersion := "2.12.5"
 scalacOptions := Seq("-encoding", "UTF-8", "-Xlint", "-deprecation", "-unchecked", "-feature", "-target:jvm-1.8")
 scalacOptions in (Compile, doc) ++= Seq(
   "-no-link-warnings" // Suppresses problems with Scaladoc @throws links

diff --git a/src/main/scala/se/nimsa/dicom/CharacterSets.scala b/src/main/scala/se/nimsa/dicom/CharacterSets.scala
@@ -0,0 +1,153 @@
+package se.nimsa.dicom
+
+import java.nio.charset.{Charset, StandardCharsets}
+
+import akka.util.ByteString
+import se.nimsa.dicom.VR.VR
+
+class CharacterSets(val charsetArray: Seq[String]) {
+
+  import CharacterSets._
+
+  private val charsetExtensionsEnabled = charsetArray.length > 1
+
+  private val specifiedCharsets =
+    (if (charsetArray.nonEmpty && charsetArray.head.isEmpty) // first item may be empty -> default charset
+      Seq(defaultCharsetObj)
+    else
+      Seq.empty) ++ charsetArray.flatMap(s => charsetsMap.get(s))
+
+  private val getInitialCharset =
+    if (specifiedCharsets.nonEmpty)
+      specifiedCharsets.head
+    else
+      defaultCharsetObj
+
+  def decode(vr: VR, b: ByteString): String =
+    if (!isVrAffectedBySpecificCharacterSet(vr))
+      defaultOnly.decode(b)
+    else
+      decode(b)
+
+  private def decode(b: ByteString): String =
+    if (charsetExtensionsEnabled)
+      decodeWithExtensions(b)
+    else
+      new String(b.toArray, getInitialCharset.charset)
+
+  private def decodeWithExtensions(b: ByteString) = {
+    var charset = getInitialCharset
+    var off = 0
+    var cur = 0
+    val sb = new StringBuilder(b.length)
+
+    while (cur < b.length)
+      if (b(cur) == 0x1b) {
+        // ESC
+        if (off < cur) sb.append(new String(b.toArray, off, cur - off, charset.charset))
+        cur += 3
+        var key = ((b(cur - 2) & 0xff) << 8) + (b(cur - 1) & 0xff)
+        if (key == 0x2428 || key == 0x2429) {
+          key = (key << 8) + (b(cur) & 0xff)
+          cur += 1
+        }
+        charset = Option(escToCharset(key)).getOrElse{
+          // decode invalid ESC sequence as chars
+          val byteCount = if ((key & 0xff0000) != 0) 4 else 3 // if second msb of key is set then 4 otherwise 3
+          sb.append(new String(b.toArray, cur - byteCount, byteCount, charset.charset))
+          charset
+        }
+        off = cur
+      } else // Step -1 -> chars in G0 one byte, chars in G1 two bytes.
+        cur += (if (charset.charlength > 0) charset.charlength else if (b(cur) < 0) 2 else 1)
+    if (off < cur)
+      sb.append(new String(b.toArray, off, cur - off, charset.charset))
+    sb.toString
+  }
+}
+
+object CharacterSets {
+
+  val charsetsMap = Map(
+    // Single-Byte Character Sets Without Code Extensions
+    "ISO_IR 100" -> CharsetObj("ISO-8859-1"),
+    "ISO_IR 101" -> CharsetObj("ISO-8859-2"),
+    "ISO_IR 109" -> CharsetObj("ISO-8859-3"),
+    "ISO_IR 110" -> CharsetObj("ISO-8859-4"),
+    "ISO_IR 144" -> CharsetObj("ISO-8859-5"),
+    "ISO_IR 127" -> CharsetObj("ISO-8859-6"),
+    "ISO_IR 126" -> CharsetObj("ISO-8859-7"),
+    "ISO_IR 138" -> CharsetObj("ISO-8859-8"),
+    "ISO_IR 148" -> CharsetObj("ISO-8859-9"),
+    "ISO_IR 13" -> CharsetObj("JIS_X0201"),
+    "ISO_IR 166" -> CharsetObj("TIS-620"),
+    // Single-Byte Character Sets with Code Extensions
+    "ISO 2022 IR 6" -> CharsetObj("ISO-8859-1", 1, Some(ByteString(0x28, 0x42))),
+    "ISO 2022 IR 100" -> CharsetObj("ISO-8859-1", 1, Some(ByteString(0x2d, 0x41))),
+    "ISO 2022 IR 101" -> CharsetObj("ISO-8859-2", 1, Some(ByteString(0x2d, 0x42))),
+    "ISO 2022 IR 109" -> CharsetObj("ISO-8859-3", 1, Some(ByteString(0x2d, 0x43))),
+    "ISO 2022 IR 110" -> CharsetObj("ISO-8859-4", 1, Some(ByteString(0x2d, 0x44))),
+    "ISO 2022 IR 144" -> CharsetObj("ISO-8859-5", 1, Some(ByteString(0x2d, 0x4c))),
+    "ISO 2022 IR 127" -> CharsetObj("ISO-8859-6", 1, Some(ByteString(0x2d, 0x47))),
+    "ISO 2022 IR 126" -> CharsetObj("ISO-8859-7", 1, Some(ByteString(0x2d, 0x46))),
+    "ISO 2022 IR 138" -> CharsetObj("ISO-8859-8", 1, Some(ByteString(0x28, 0x48))),
+    "ISO 2022 IR 148" -> CharsetObj("ISO-8859-9", 1, Some(ByteString(0x28, 0x4d))),
+    "ISO 2022 IR 13" -> CharsetObj("JIS_X0201", 1, Some(ByteString(0x29, 0x49))),
+    "ISO 2022 IR 166" -> CharsetObj("TIS-620", 1, Some(ByteString(0x2d, 0x54))),
+    // Multi-Byte Character Sets with Code Extensions
+    "ISO 2022 IR 87" -> CharsetObj("X-JIS0208", 2, Some(ByteString(0x24, 0x42))),
+    "ISO 2022 IR 159" -> CharsetObj("JIS_X0212-1990", 2, Some(ByteString(0x24, 0x28, 0x44))),
+    "ISO 2022 IR 149" -> CharsetObj("EUC-KR", -1, Some(ByteString(0x24, 0x29, 0x43))),
+    "ISO 2022 IR 58" -> CharsetObj("GB2312", -1, Some(ByteString(0x24, 0x29, 0x41))),
+    // Multi-Byte Character Sets Without Code Extensions
+    "ISO_IR 192" -> CharsetObj("UTF-8", -1, None),
+    "GB18030" -> CharsetObj("GB18030", -1, None),
+    "GBK" -> CharsetObj("GBK", -1, None)
+  )
+
+  val escToCharset: Map[Int, CharsetObj] = {
+    val map = charsetsMap.values
+      .filter(_.hasEscapeSeq)
+      .map(co => co.escapeSequence.get.foldLeft(0)((i, b) => (i << 8) + (b & 0xff)) -> co)
+      .toMap
+
+    // ISO 2022 IR 13 has two escape sequences
+    map + (0x284a -> map(0x2949))
+  }
+
+  val utf8Charset: Charset = StandardCharsets.UTF_8
+  val defaultCharset: Charset = StandardCharsets.ISO_8859_1
+  val defaultCharsetObj = new CharsetObj(defaultCharset, 1, None)
+  val defaultOnly = new CharacterSets(Array[String](""))
+
+  def apply(specificCharacterSetValue: ByteString): CharacterSets = {
+    val s = Value.toStrings(VR.CS, CharacterSets.defaultOnly, specificCharacterSetValue)
+    if (s.isEmpty || s.length == 1 && s.head.isEmpty) defaultOnly else new CharacterSets(s)
+  }
+
+  def isVrAffectedBySpecificCharacterSet(vr: VR): Boolean =
+    vr match {
+      case VR.LO => true
+      case VR.LT => true
+      case VR.PN => true
+      case VR.SH => true
+      case VR.ST => true
+      case VR.UT => true
+      case _ => false
+    }
+
+  def encode(s: String) = ByteString(s.getBytes(utf8Charset))
+}
+
+case class CharsetObj(charset: Charset, charlength: Int, escapeSequence: Option[ByteString]) {
+  def hasEscapeSeq: Boolean = escapeSequence.isDefined
+}
+
+object CharsetObj {
+  def apply(charsetName: String, charlength: Int, escapeSequence: Option[ByteString]) =
+    new CharsetObj(Charset.forName(charsetName), charlength, escapeSequence)
+
+  def apply(charsetName: String) =
+    new CharsetObj(Charset.forName(charsetName), 1, None)
+
+}
diff --git a/src/main/scala/se/nimsa/dicom/Value.scala b/src/main/scala/se/nimsa/dicom/Value.scala
@@ -0,0 +1,49 @@
+package se.nimsa.dicom
+
+import java.text.{ParsePosition, SimpleDateFormat}
+import java.util.Date
+
+import akka.util.ByteString
+import se.nimsa.dicom.VR.VR
+
+object Value {
+
+  final lazy val multiValueDelimiter = "\\"
+  final lazy val multiValueDelimiterRegexp = "\\\\"
+
+  private final lazy val dateFormat1: SimpleDateFormat = new SimpleDateFormat("yyyyMMdd")
+  private final lazy val dateFormat2: SimpleDateFormat = new SimpleDateFormat("yyyy.MM.dd")
+
+  def toStrings(vr: VR, characterSets: CharacterSets, bytes: ByteString): Seq[String] = if (bytes.isEmpty) Seq.empty else split(characterSets.decode(vr, bytes))
+  def toSingleString(vr: VR, characterSets: CharacterSets, bytes: ByteString): String = characterSets.decode(vr, bytes)
+  def toLongs(bytes: ByteString): Seq[Long] = if (bytes.isEmpty) Seq.empty[Long] else split(toString(bytes)).flatMap(stringToLong)
+  def toLong(bytes: ByteString): Option[Long] = toLongs(bytes).headOption
+  def toInts(bytes: ByteString): Seq[Int] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToInt)
+  def toInt(bytes: ByteString): Option[Int] = toInts(bytes).headOption
+  def toShorts(bytes: ByteString): Seq[Short] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToShort)
+  def toShort(bytes: ByteString): Option[Short] = toShorts(bytes).headOption
+  def toFloats(bytes: ByteString): Seq[Float] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToFloat)
+  def toFloat(bytes: ByteString): Option[Float] = toFloats(bytes).headOption
+  def toDoubles(bytes: ByteString): Seq[Double] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToDouble)
+  def toDouble(bytes: ByteString): Option[Double] = toDoubles(bytes).headOption
+  def toDates(bytes: ByteString): Seq[Date] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToDate)
+  def toDate(bytes: ByteString): Option[Date] = toDates(bytes).headOption
+
+  private def toString(bytes: ByteString) = bytes.utf8String
+  private def split(s: String) = s.split(multiValueDelimiterRegexp).map(_.trim)
+  private def stringToLong(s: String) = try Option(java.lang.Long.parseLong(s)) catch { case _: Throwable => None }
+  private def stringToInt(s: String) = try Option(java.lang.Integer.parseInt(s)) catch { case _: Throwable => None }
+  private def stringToShort(s: String) = try Option(java.lang.Short.parseShort(s)) catch { case _: Throwable => None }
+  private def stringToFloat(s: String) = try Option(java.lang.Float.parseFloat(s.replace(',', '.'))) catch { case _: Throwable => None }
+  private def stringToDouble(s: String) = try Option(java.lang.Double.parseDouble(s.replace(',', '.'))) catch { case _: Throwable => None }
+  private def stringToDate(s: String) = try {
+    val x = s.trim
+    val pos = new ParsePosition(0)
+    val d1 = dateFormat1.parse(x, pos)
+    if (pos.getIndex == x.length) Option(d1) else {
+      pos.setIndex(0)
+      val d2 = dateFormat2.parse(x, pos)
+      if (pos.getIndex == x.length) Option(d2) else None
+    }
+  } catch { case _: Throwable => None }
+}
diff --git a/src/main/scala/se/nimsa/dicom/package.scala b/src/main/scala/se/nimsa/dicom/package.scala
@@ -16,6 +16,8 @@
 
 package se.nimsa
 
+import java.nio.{ByteBuffer, ByteOrder}
+
 import akka.util.ByteString
 import se.nimsa.dicom.VR.VR
 
@@ -58,6 +60,8 @@ package object dicom {
   def intToBytes(i: Int, bigEndian: Boolean): ByteString = if (bigEndian) intToBytesBE(i) else intToBytesLE(i)
   def intToBytesBE(i: Int): ByteString = ByteString((i >> 24).toByte, (i >> 16).toByte, (i >> 8).toByte, i.toByte)
   def intToBytesLE(i: Int): ByteString = ByteString(i.toByte, (i >> 8).toByte, (i >> 16).toByte, (i >> 24).toByte)
+  def doubleToBytes(d: Double, bigEndian: Boolean): ByteString = ByteString(ByteBuffer.wrap(new Array[Byte](8)).order(if (bigEndian) ByteOrder.BIG_ENDIAN else ByteOrder.LITTLE_ENDIAN).putDouble(d).array)
+  //  def longTo4Bytes(i: Long, bigEndian: Boolean): ByteString = if (bigEndian) longTo4BytesBE(i) else longTo4BytesLE(i)
   //  def longTo4Bytes(i: Long, bigEndian: Boolean): ByteString = if (bigEndian) longTo4BytesBE(i) else longTo4BytesLE(i)
   //  def longTo4BytesBE(i: Long): ByteString = ByteString((i >> 24).toByte, (i >> 16).toByte, (i >> 8).toByte, i.toByte)
   //  def longTo4BytesLE(i: Long): ByteString = ByteString(i.toByte, (i >> 8).toByte, (i >> 16).toByte, (i >> 24).toByte)