@@ -14,13 +14,32 @@ genia: genia-nlpba-2004.crf.gz
1414genia-nlpba-2004.crf.gz :
1515 java -mx10g edu.stanford.nlp.ie.crf.CRFClassifier -prop genia-nlpba-2004.prop > genia-nlpba-2004.out 2>&1
1616
17- german : german.hgc_175m_600.crf.ser.gz german.dewac_175m_600.crf.ser.gz
18-
19- german.hgc_175m_600.crf.ser.gz :
20- java -mx10g edu.stanford.nlp.ie.crf.CRFClassifier -prop german.hgc_175m_600.prop > german.hgc_175m_600.out 2>&1
21-
22- german.dewac_175m_600.crf.ser.gz :
23- java -mx10g edu.stanford.nlp.ie.crf.CRFClassifier -prop german.dewac_175m_600.prop > german.dewac_175m_600.out 2>&1
17+ # We are No longer building/distributing deWAC model. The data for distributional similarity classes wasn't clean and can't be recovered.
18+ german : german.conll.crf.ser.gz german.hgc_175m_600.crf.ser.gz german.conll.germeval2014.hgc_175m_600.crf.ser.gz german.conll.germeval2014.europeana.hgc_175m_600.crf.ser.gz
19+
20+ german.conll.crf.ser.gz :
21+ java -mx5g edu.stanford.nlp.ie.crf.CRFClassifier -prop german-2018.hgc_175m_600.prop -useDistSim false -serializeTo $@ \
22+ > $(addsuffix .out, $(basename $(basename $(basename $@ ) ) ) ) 2>&1
23+
24+ german.conll.hgc_175m_600.crf.ser.gz :
25+ java -mx10g edu.stanford.nlp.ie.crf.CRFClassifier -prop german-2018.hgc_175m_600.prop -serializeTo $@ \
26+ > $(addsuffix .out, $(basename $(basename $(basename $@ ) ) ) ) 2>&1
27+
28+ german.conll.germeval2014.hgc_175m_600.crf.ser.gz :
29+ java -mx15g edu.stanford.nlp.ie.crf.CRFClassifier -prop german-2018.hgc_175m_600.prop -serializeTo $@ \
30+ -trainFileList /u/nlp/data/german/ner/2016/deu.io.f15.utf8.train,/u/nlp/data/german/ner/2016/GermEval2014_complete_data/NER-de-train-io.tsv \
31+ > $(addsuffix .out, $(basename $(basename $(basename $@ ) ) ) ) 2>&1
32+
33+ # currently we exclude enp_DE.sbb.io, as the data has too many issues, but we could work to include it....
34+ # ,/u/nlp/data/german/ner/2016/Europeana-Newspapers-data/ner-corpora/enp_DE.sbb.bio/enp_DE.sbb.io
35+ german.conll.germeval2014.europeana.hgc_175m_600.crf.ser.gz :
36+ java -mx20g edu.stanford.nlp.ie.crf.CRFClassifier -prop german-2018.hgc_175m_600.prop -serializeTo $@ \
37+ -trainFileList /u/nlp/data/german/ner/2016/deu.io.f15.utf8.train,/u/nlp/data/german/ner/2016/GermEval2014_complete_data/NER-de-train-io.tsv,/u/nlp/data/german/ner/2016/Europeana-Newspapers-data/ner-corpora/enp_DE.lft.bio/enp_DE.lft.io,/u/nlp/data/german/ner/2016/Europeana-Newspapers-data/ner-corpora/enp_DE.onb.bio/enp_DE.onb.io \
38+ > $(addsuffix .out, $(basename $(basename $(basename $@ ) ) ) ) 2>&1
39+
40+ # We are No longer building/distributing deWAC model. The data for distributional similarity classes wasn't clean and can't be recovered.
41+ # german.dewac_175m_600.crf.ser.gz:
42+ # java -mx10g edu.stanford.nlp.ie.crf.CRFClassifier -prop german.dewac_175m_600.prop > german.dewac_175m_600.out 2>&1
2443
2544
2645all.3class : english.all.3class.nodistsim.crf.ser.gz english.all.3class.caseless.distsim.crf.ser.gz english.all.3class.distsim.crf.ser.gz
0 commit comments