Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fixed issue #13 encoding problems

Merge branch 'feature/import-encoding' into develop
  • Loading branch information...
commit 8eeda7a5e404ea3cc85dcdbc633f2c58b6066ec6 2 parents fb2ef94 + 8678b1c
Martin Westergaard Lassen mwl authored
2  sc-dbgenerate/src/main/java/dk/sst/snomedcave/dbgenerate/Initiator.java
View
@@ -14,6 +14,8 @@ public static void main(String[] args) throws IOException {
System.out.println("args[" + i + "] = " + args[i]);
}
+ logger.info("Using file.encoding=" + System.getProperty("file.encoding"));
+
File db = new File(SnomedParser.STORE_DIR);
logger.info("Using neo4j store path=" + db.getAbsolutePath());
if (db.exists()) {
12 sc-dbgenerate/src/main/java/dk/sst/snomedcave/dbgenerate/SnomedParser.java
View
@@ -67,7 +67,7 @@ private BeanReader getBeanReader(String classpathPath, String config) {
}
private void readTerms() {
- BeanReader in = getBeanReader("/data/20120828_SNOMEDCT_subset_med_allergier.csv", "terms");
+ BeanReader in = getBeanReader("/data/1000005/sct_descriptions.txt", "terms");
Object record;
while ((record = in.read()) != null) {
@@ -78,11 +78,7 @@ private void readTerms() {
else if ("term".equals(in.getRecordName())) {
final Map<String, Object> term = (Map<String, Object>) record;
- try {
- conceptTerms.put((String) term.get("conceptId"), new String(((String) term.get("term")).getBytes(), "ISO-8859-1"));
- } catch (UnsupportedEncodingException e) {
- throw new RuntimeException("encoding", e);
- }
+ conceptTerms.put((String) term.get("conceptId"), (String) term.get("term"));
}
else {
logger.warn("unable to parse \"" + record + "\"");
@@ -94,7 +90,7 @@ else if ("term".equals(in.getRecordName())) {
public void importConcept() {
readTerms();
long startTime = currentTimeMillis();
- BeanReader in = getBeanReader("/data/0/sct_concepts_20120813T134009.txt", "concepts");
+ BeanReader in = getBeanReader("/data/0/sct_concepts.txt", "concepts");
Object record;
int count = 0;
@@ -142,7 +138,7 @@ private long saveConcept(final Map<String, Object> concept) {
}
public void importRelationships() {
- BeanReader in = getBeanReader("/data/0/sct_relationships_20120813T134009.txt", "relationships");
+ BeanReader in = getBeanReader("/data/0/sct_relationships.txt", "relationships");
Object record;
long startTime = currentTimeMillis();
30 sc-dbgenerate/src/main/resources/beanio-terms.xml
View
@@ -1,30 +1,34 @@
<?xml version="1.0" encoding="UTF-8"?>
<beanio xmlns="http://www.beanio.org/2012/03">
<!--
-Begrebs ID;Fuldt specificerende term;Foretrukken term ID;Foretrukken term;;
-416098002;Drug allergy (disorder);4743381000005110;allergi over for l�gemiddel;;
-293959006;Alcohol metabolism modifier allergy (disorder);3844511000005110;allergi over for alkoholstofskiftemodifikator;;
-294420000;Alcohol products allergy (disorder);3951121000005110;allergi over for alkoholprodukter;;
-
-
+DESCRIPTIONID DESCRIPTIONSTATUS CONCEPTID TERM INITIALCAPITALSTATUS DESCRIPTIONTYPE LANGUAGECODE
+411000005111 5 202450006 Villonodular synovitis of other tarsal joint 0 1 da-DK
+421000005117 0 9284003 Corpus cavernosum penis 0 1 da-DK
+431000005119 5 181422007 Hele prostata 0 1 da-DK
-->
<stream name="terms" format="delimited">
<parser>
- <property name="delimiter" value=";"/>
+ <property name="delimiter" value="\t"/>
</parser>
<!-- 'class' binds the header record to a java.util.HashMap -->
<record name="header" class="map">
- <field name="ConceptId" literal="Begrebs ID" rid="true"/>
- <field name="FullySpecifiedName" literal="Fuldt specificerende term" rid="true"/>
- <field name="TermId" literal="Foretrukken term ID" rid="true"/>
- <field name="Term" literal="Foretrukken term" rid="true"/>
+ <field name="DescriptionId" literal="DESCRIPTIONID" rid="true"/>
+ <field name="DescriptionStatus" literal="DESCRIPTIONSTATUS" rid="true"/>
+ <field name="ConceptId" literal="CONCEPTID" rid="true"/>
+ <field name="Term" literal="TERM" rid="true"/>
+ <field name="InitialCapitalStatus" literal="INITIALCAPITALSTATUS" rid="true"/>
+ <field name="DescriptionType" literal="DESCRIPTIONTYPE" rid="true"/>
+ <field name="LanguageCode" literal="LANGUAGECODE" rid="true"/>
</record>
<record name="term" class="map">
+ <field name="descriptionId"/>
+ <field name="descriptionStatus"/>
<field name="conceptId"/>
- <field name="fullyspecifiedName"/>
- <field name="termId"/>
<field name="term"/>
+ <field name="initialCapitalStatus" type="int"/>
+ <field name="descriptionType" type="int"/>
+ <field name="languageCode"/>
</record>
</stream>
</beanio>
0  ...resources/data/0/sct_concepts_20120813T134009.txt → ...nerate/src/main/resources/data/0/sct_concepts.txt
View
File renamed without changes
0  ...urces/data/0/sct_descriptions_20120813T134009.txt → ...te/src/main/resources/data/0/sct_descriptions.txt
View
File renamed without changes
0  ...rces/data/0/sct_relationships_20120813T134009.txt → ...e/src/main/resources/data/0/sct_relationships.txt
View
File renamed without changes
0  ...ces/data/1000005/sct_concepts_20120430T153107.txt → .../src/main/resources/data/1000005/sct_concepts.txt
View
File renamed without changes
15,081 ...data/1000005/sct_descriptions_20120430T153107.txt → .../main/resources/data/1000005/sct_descriptions.txt
View
10,378 additions, 4,703 deletions not shown
0  ...ata/1000005/sct_relationships_20120430T153107.txt → ...main/resources/data/1000005/sct_relationships.txt
View
File renamed without changes
1,454 sc-dbgenerate/src/main/resources/data/20120828_SNOMEDCT_subset_med_allergier.csv
View
0 additions, 1,454 deletions not shown
Please sign in to comment.
Something went wrong with that request. Please try again.