Skip to content

Commit

Permalink
Merge a44a7fd into 6204ac6
Browse files Browse the repository at this point in the history
  • Loading branch information
KarlSjostrand committed Mar 23, 2018
2 parents 6204ac6 + a44a7fd commit ca360ed
Show file tree
Hide file tree
Showing 18 changed files with 1,113 additions and 252 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import DicomSourceGenerators._
name := "dicom-streams"
version := "0.1-SNAPSHOT"
organization := "se.nimsa"
scalaVersion := "2.12.4"
scalaVersion := "2.12.5"
scalacOptions := Seq("-encoding", "UTF-8", "-Xlint", "-deprecation", "-unchecked", "-feature", "-target:jvm-1.8")
scalacOptions in (Compile, doc) ++= Seq(
"-no-link-warnings" // Suppresses problems with Scaladoc @throws links
Expand Down
153 changes: 153 additions & 0 deletions src/main/scala/se/nimsa/dicom/CharacterSets.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
package se.nimsa.dicom

import java.nio.charset.{Charset, StandardCharsets}

import akka.util.ByteString
import se.nimsa.dicom.VR.VR

class CharacterSets(val charsetArray: Seq[String]) {

import CharacterSets._

private val charsetExtensionsEnabled = charsetArray.length > 1

private val specifiedCharsets =
(if (charsetArray.nonEmpty && charsetArray.head.isEmpty) // first item may be empty -> default charset
Seq(defaultCharsetObj)
else
Seq.empty) ++ charsetArray.flatMap(s => charsetsMap.get(s))

private val getInitialCharset =
if (specifiedCharsets.nonEmpty)
specifiedCharsets.head
else
defaultCharsetObj

def decode(vr: VR, b: ByteString): String =
if (!isVrAffectedBySpecificCharacterSet(vr))
defaultOnly.decode(b)
else
decode(b)

private def decode(b: ByteString): String =
if (charsetExtensionsEnabled)
decodeWithExtensions(b)
else
new String(b.toArray, getInitialCharset.charset)

private def decodeWithExtensions(b: ByteString) = {
var charset = getInitialCharset
var off = 0
var cur = 0
val sb = new StringBuilder(b.length)

while (cur < b.length)
if (b(cur) == 0x1b) {
// ESC
if (off < cur) sb.append(new String(b.toArray, off, cur - off, charset.charset))
cur += 3
var key = ((b(cur - 2) & 0xff) << 8) + (b(cur - 1) & 0xff)
if (key == 0x2428 || key == 0x2429) {
key = (key << 8) + (b(cur) & 0xff)
cur += 1
}
charset = Option(escToCharset(key)).getOrElse{
// decode invalid ESC sequence as chars
val byteCount = if ((key & 0xff0000) != 0) 4 else 3 // if second msb of key is set then 4 otherwise 3
sb.append(new String(b.toArray, cur - byteCount, byteCount, charset.charset))
charset
}
off = cur
} else // Step -1 -> chars in G0 one byte, chars in G1 two bytes.
cur += (if (charset.charlength > 0) charset.charlength else if (b(cur) < 0) 2 else 1)
if (off < cur)
sb.append(new String(b.toArray, off, cur - off, charset.charset))
sb.toString
}
}

object CharacterSets {

val charsetsMap = Map(
// Single-Byte Character Sets Without Code Extensions
"ISO_IR 100" -> CharsetObj("ISO-8859-1"),
"ISO_IR 101" -> CharsetObj("ISO-8859-2"),
"ISO_IR 109" -> CharsetObj("ISO-8859-3"),
"ISO_IR 110" -> CharsetObj("ISO-8859-4"),
"ISO_IR 144" -> CharsetObj("ISO-8859-5"),
"ISO_IR 127" -> CharsetObj("ISO-8859-6"),
"ISO_IR 126" -> CharsetObj("ISO-8859-7"),
"ISO_IR 138" -> CharsetObj("ISO-8859-8"),
"ISO_IR 148" -> CharsetObj("ISO-8859-9"),
"ISO_IR 13" -> CharsetObj("JIS_X0201"),
"ISO_IR 166" -> CharsetObj("TIS-620"),
// Single-Byte Character Sets with Code Extensions
"ISO 2022 IR 6" -> CharsetObj("ISO-8859-1", 1, Some(ByteString(0x28, 0x42))),
"ISO 2022 IR 100" -> CharsetObj("ISO-8859-1", 1, Some(ByteString(0x2d, 0x41))),
"ISO 2022 IR 101" -> CharsetObj("ISO-8859-2", 1, Some(ByteString(0x2d, 0x42))),
"ISO 2022 IR 109" -> CharsetObj("ISO-8859-3", 1, Some(ByteString(0x2d, 0x43))),
"ISO 2022 IR 110" -> CharsetObj("ISO-8859-4", 1, Some(ByteString(0x2d, 0x44))),
"ISO 2022 IR 144" -> CharsetObj("ISO-8859-5", 1, Some(ByteString(0x2d, 0x4c))),
"ISO 2022 IR 127" -> CharsetObj("ISO-8859-6", 1, Some(ByteString(0x2d, 0x47))),
"ISO 2022 IR 126" -> CharsetObj("ISO-8859-7", 1, Some(ByteString(0x2d, 0x46))),
"ISO 2022 IR 138" -> CharsetObj("ISO-8859-8", 1, Some(ByteString(0x28, 0x48))),
"ISO 2022 IR 148" -> CharsetObj("ISO-8859-9", 1, Some(ByteString(0x28, 0x4d))),
"ISO 2022 IR 13" -> CharsetObj("JIS_X0201", 1, Some(ByteString(0x29, 0x49))),
"ISO 2022 IR 166" -> CharsetObj("TIS-620", 1, Some(ByteString(0x2d, 0x54))),
// Multi-Byte Character Sets with Code Extensions
"ISO 2022 IR 87" -> CharsetObj("X-JIS0208", 2, Some(ByteString(0x24, 0x42))),
"ISO 2022 IR 159" -> CharsetObj("JIS_X0212-1990", 2, Some(ByteString(0x24, 0x28, 0x44))),
"ISO 2022 IR 149" -> CharsetObj("EUC-KR", -1, Some(ByteString(0x24, 0x29, 0x43))),
"ISO 2022 IR 58" -> CharsetObj("GB2312", -1, Some(ByteString(0x24, 0x29, 0x41))),
// Multi-Byte Character Sets Without Code Extensions
"ISO_IR 192" -> CharsetObj("UTF-8", -1, None),
"GB18030" -> CharsetObj("GB18030", -1, None),
"GBK" -> CharsetObj("GBK", -1, None)
)

val escToCharset: Map[Int, CharsetObj] = {
val map = charsetsMap.values
.filter(_.hasEscapeSeq)
.map(co => co.escapeSequence.get.foldLeft(0)((i, b) => (i << 8) + (b & 0xff)) -> co)
.toMap

// ISO 2022 IR 13 has two escape sequences
map + (0x284a -> map(0x2949))
}

val utf8Charset: Charset = StandardCharsets.UTF_8
val defaultCharset: Charset = StandardCharsets.ISO_8859_1
val defaultCharsetObj = new CharsetObj(defaultCharset, 1, None)
val defaultOnly = new CharacterSets(Array[String](""))

def apply(specificCharacterSetValue: ByteString): CharacterSets = {
val s = Value.toStrings(VR.CS, CharacterSets.defaultOnly, specificCharacterSetValue)
if (s.isEmpty || s.length == 1 && s.head.isEmpty) defaultOnly else new CharacterSets(s)
}

def isVrAffectedBySpecificCharacterSet(vr: VR): Boolean =
vr match {
case VR.LO => true
case VR.LT => true
case VR.PN => true
case VR.SH => true
case VR.ST => true
case VR.UT => true
case _ => false
}

def encode(s: String) = ByteString(s.getBytes(utf8Charset))
}

case class CharsetObj(charset: Charset, charlength: Int, escapeSequence: Option[ByteString]) {
def hasEscapeSeq: Boolean = escapeSequence.isDefined
}

object CharsetObj {
def apply(charsetName: String, charlength: Int, escapeSequence: Option[ByteString]) =
new CharsetObj(Charset.forName(charsetName), charlength, escapeSequence)

def apply(charsetName: String) =
new CharsetObj(Charset.forName(charsetName), 1, None)

}
49 changes: 49 additions & 0 deletions src/main/scala/se/nimsa/dicom/Value.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package se.nimsa.dicom

import java.text.{ParsePosition, SimpleDateFormat}
import java.util.Date

import akka.util.ByteString
import se.nimsa.dicom.VR.VR

object Value {

final lazy val multiValueDelimiter = "\\"
final lazy val multiValueDelimiterRegexp = "\\\\"

private final lazy val dateFormat1: SimpleDateFormat = new SimpleDateFormat("yyyyMMdd")
private final lazy val dateFormat2: SimpleDateFormat = new SimpleDateFormat("yyyy.MM.dd")

def toStrings(vr: VR, characterSets: CharacterSets, bytes: ByteString): Seq[String] = if (bytes.isEmpty) Seq.empty else split(characterSets.decode(vr, bytes))
def toSingleString(vr: VR, characterSets: CharacterSets, bytes: ByteString): String = characterSets.decode(vr, bytes)
def toLongs(bytes: ByteString): Seq[Long] = if (bytes.isEmpty) Seq.empty[Long] else split(toString(bytes)).flatMap(stringToLong)
def toLong(bytes: ByteString): Option[Long] = toLongs(bytes).headOption
def toInts(bytes: ByteString): Seq[Int] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToInt)
def toInt(bytes: ByteString): Option[Int] = toInts(bytes).headOption
def toShorts(bytes: ByteString): Seq[Short] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToShort)
def toShort(bytes: ByteString): Option[Short] = toShorts(bytes).headOption
def toFloats(bytes: ByteString): Seq[Float] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToFloat)
def toFloat(bytes: ByteString): Option[Float] = toFloats(bytes).headOption
def toDoubles(bytes: ByteString): Seq[Double] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToDouble)
def toDouble(bytes: ByteString): Option[Double] = toDoubles(bytes).headOption
def toDates(bytes: ByteString): Seq[Date] = if (bytes.isEmpty) Seq.empty else split(toString(bytes)).flatMap(stringToDate)
def toDate(bytes: ByteString): Option[Date] = toDates(bytes).headOption

private def toString(bytes: ByteString) = bytes.utf8String
private def split(s: String) = s.split(multiValueDelimiterRegexp).map(_.trim)
private def stringToLong(s: String) = try Option(java.lang.Long.parseLong(s)) catch { case _: Throwable => None }
private def stringToInt(s: String) = try Option(java.lang.Integer.parseInt(s)) catch { case _: Throwable => None }
private def stringToShort(s: String) = try Option(java.lang.Short.parseShort(s)) catch { case _: Throwable => None }
private def stringToFloat(s: String) = try Option(java.lang.Float.parseFloat(s.replace(',', '.'))) catch { case _: Throwable => None }
private def stringToDouble(s: String) = try Option(java.lang.Double.parseDouble(s.replace(',', '.'))) catch { case _: Throwable => None }
private def stringToDate(s: String) = try {
val x = s.trim
val pos = new ParsePosition(0)
val d1 = dateFormat1.parse(x, pos)
if (pos.getIndex == x.length) Option(d1) else {
pos.setIndex(0)
val d2 = dateFormat2.parse(x, pos)
if (pos.getIndex == x.length) Option(d2) else None
}
} catch { case _: Throwable => None }
}
4 changes: 4 additions & 0 deletions src/main/scala/se/nimsa/dicom/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package se.nimsa

import java.nio.{ByteBuffer, ByteOrder}

import akka.util.ByteString
import se.nimsa.dicom.VR.VR

Expand Down Expand Up @@ -58,6 +60,8 @@ package object dicom {
def intToBytes(i: Int, bigEndian: Boolean): ByteString = if (bigEndian) intToBytesBE(i) else intToBytesLE(i)
def intToBytesBE(i: Int): ByteString = ByteString((i >> 24).toByte, (i >> 16).toByte, (i >> 8).toByte, i.toByte)
def intToBytesLE(i: Int): ByteString = ByteString(i.toByte, (i >> 8).toByte, (i >> 16).toByte, (i >> 24).toByte)
def doubleToBytes(d: Double, bigEndian: Boolean): ByteString = ByteString(ByteBuffer.wrap(new Array[Byte](8)).order(if (bigEndian) ByteOrder.BIG_ENDIAN else ByteOrder.LITTLE_ENDIAN).putDouble(d).array)
// def longTo4Bytes(i: Long, bigEndian: Boolean): ByteString = if (bigEndian) longTo4BytesBE(i) else longTo4BytesLE(i)
// def longTo4Bytes(i: Long, bigEndian: Boolean): ByteString = if (bigEndian) longTo4BytesBE(i) else longTo4BytesLE(i)
// def longTo4BytesBE(i: Long): ByteString = ByteString((i >> 24).toByte, (i >> 16).toByte, (i >> 8).toByte, i.toByte)
// def longTo4BytesLE(i: Long): ByteString = ByteString(i.toByte, (i >> 8).toByte, (i >> 16).toByte, (i >> 24).toByte)
Expand Down
Loading

0 comments on commit ca360ed

Please sign in to comment.