Merge pull request #129 from kermitt2/features/add-deep-learning-models

Add deep learning models
lfoppiano · Nov 29, 2021 · cd52f07 · cd52f07
2 parents 7b11d72 + bae4922
commit cd52f07
Show file tree

Hide file tree

Showing 32 changed files with 225,967 additions and 10 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -88,7 +88,7 @@ EXPOSE 8060 8061
 
 #CMD ["java", "-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"]
 #CMD ["java", "-agentpath:/usr/local/jprofiler12.0.2/bin/linux-x64/libjprofilerti.so=port=8849", "-jar", "grobid-superconductors/grobid-superconductors-0.2.1-SNAPSHOT-onejar.jar", "server", "grobid-superconductors/config.yml"]
-CMD ["java", "-jar", "grobid-quantities-0.7.0-onejar.jar", "server", "config.yml"]
+CMD ["java", "-jar", "grobid-quantities-0.7.1-SNAPSHOT-onejar.jar", "server", "config.yml"]
 
 ARG GROBID_VERSION
 

diff --git a/README.md b/README.md
@@ -40,9 +40,48 @@ You can find the latest documentation [here](http://grobid-quantities.readthedoc
 ## Evaluation
 The results (Precision, Recall, F-score) for all the models have been obtained using 10-fold cross-validation (average metrics over the 10 folds). 
 
+### BidLSTM + CRF 
+Evaluated on the 28/11/2021 (using layout features, architecture BidLSTM_CRF_FEATURES)
+
+#### Quantities
+
+| Labels          | Precision  | Recall      |  F1-Score     |
+|-----------------|------------|-------------|---------------|
+| `<unitLeft>`    | 95.17      |  96.67      |  95.91      |    
+| `<unitRight>`   | 92.52      |  83.64      |  87.69      |    
+| `<valueAtomic>` | 81.74      |  89.21      |  85.30      |    
+| `<valueBase>`   | 100.00     |  75.00      |  85.71      |     
+| `<valueLeast>`  | 89.24      |  82.25      |  85.55      |    
+| `<valueList>`   | 75.27      |  75.33      |  75.12      |  
+| `<valueMost>`   | 89.02      |  81.56      |  85.10      |  
+| `<valueRange>`  | 100.00     |  96.25      |  97.90      |  
+| all (micro avg.)| 87.23      |  89.00      | 88.10       |      
+
+#### Units
+
+| Labels          | Precision  | Recall      |  F1-Score     |
+|---------------- |------------|-------------|---------------|
+| `<base>`        | 98.26      | 98.52       |  98.39        |    
+| `<pow>`         | 100.00     | 98.57       |  99.28        |    
+| `<prefix>`      | 98.89      | 97.75       |  98.30        |    
+| all (micro avg.)| 98.51      |  98.39      |  98.45        |
+
+
+#### Values
+
+| Labels          | Precision  | Recall      |  F1-Score     |
+|-----------------|------------|-------------|---------------|
+| `<alpha>`       | 99.41     |   99.55     |   99.48      |    
+| `<base>`        | 96.67     |   100.00     |  98.00       |    
+| `<number>`      | 99.55     |   98.68     |   99.11      |    
+| `<pow>`         | 72.50     |   75.00     |   73.50      |     
+| `<time>`        | 80.84     |   100.00     |  89.28       |    
+| all (micro avg.)| 98.49      |   98.66     |   98.57        |
+
+### CRF
 Evaluated on the 30/04/2020.
 
-### Quantities     
+#### Quantities     
 
 | Labels          | Precision  | Recall      |  F1-Score     |
 |-----------------|------------|-------------|---------------|
@@ -55,7 +94,7 @@ Evaluated on the 30/04/2020.
 | `<valueRange>`  | 90.25      |   88.58     |   88.86       |  
 | all (micro avg.)| 88.96      |   85.4      |   87.14       |      
 
-### Units
+#### Units
 
 Updated the 10/02/2021
 
@@ -66,7 +105,7 @@ Updated the 10/02/2021
 | `<prefix>`      | 99.5       | 98.76       |  99.13        |    
 | all (micro avg.)| 98.85      |  99.01      |  98.93        |
 
-### Values 
+#### Values 
 
 | Labels          | Precision  | Recall      |  F1-Score     |
 |-----------------|------------|-------------|---------------|

diff --git a/resources/config/config.yml b/resources/config/config.yml
@@ -10,45 +10,45 @@ maxParallelRequests: 0
 models:
   - name: "quantities"
     engine: "wapiti"
-    #engine: "delft"
+#    engine: "delft"
     wapiti:
       # wapiti training parameters, they will be used at training time only
       epsilon: 0.00001
       window: 30
       nbMaxIterations: 2000
     delft:
       # deep learning parameters
-      architecture: "BidLSTM_CRF"
+      architecture: "BidLSTM_CRF_FEATURES"
       #architecture: "scibert"
       useELMo: false
       embeddings_name: "glove-840B"
 
   - name: "units"
     engine: "wapiti"
-    #engine: "delft"
+#    engine: "delft"
     wapiti:
       # wapiti training parameters, they will be used at training time only
       epsilon: 0.00001
       window: 30
       nbMaxIterations: 2000
     delft:
       # deep learning parameters
-      architecture: "BidLSTM_CRF"
+      architecture: "BidLSTM_CRF_FEATURES"
       #architecture: "scibert"
       useELMo: false
       embeddings_name: "glove-840B"
 
   - name: "values"
     engine: "wapiti"
-    #engine: "delft"
+#    engine: "delft"
     wapiti:
       # wapiti training parameters, they will be used at training time only
       epsilon: 0.00001
       window: 30
       nbMaxIterations: 2000
     delft:
       # deep learning parameters
-      architecture: "BidLSTM_CRF"
+      architecture: "BidLSTM_CRF_FEATURES"
       #architecture: "scibert"
       useELMo: false
       embeddings_name: "glove-840B"