Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
prepare language-specific model features
  • Loading branch information
kermitt2 committed Jul 17, 2022
1 parent 5122863 commit 856f23a
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 1 deletion.
2 changes: 1 addition & 1 deletion build.gradle
Expand Up @@ -257,7 +257,7 @@ task(quantize_word_embeddings, dependsOn: 'classes', type: JavaExec, group: 'emb
task(generate_entity_embeddings, dependsOn: 'classes', type: JavaExec, group: 'embeddings') {
main = 'com.scienceminer.nerd.embeddings.EntityEmbeddings'
classpath = sourceSets.main.runtimeClasspath
args '-in', getArg('in', 'entity.description'), '-v', getArg('v', 'word.embeddings.quantized'), '-out', getArg('out', 'entity.embeddings.vec'), '-n', getArg('n', '10')
args '-in', getArg('in', 'entity.description'), '-v', getArg('v', 'word.embeddings.quantized'), '-out', getArg('out', 'entity.embeddings.vec'), '-n', getArg('n', '8')
jvmArgs '-Xms2g', '-Xmx8g'
}

Expand Down
18 changes: 18 additions & 0 deletions src/main/java/com/scienceminer/nerd/disambiguation/NerdModel.java
Expand Up @@ -45,15 +45,23 @@
import smile.regression.*;
import com.thoughtworks.xstream.*;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Class for sharing data structures and methods to be used by the machine learning models
*/
public class NerdModel {
/**
* The class Logger
*/
private static final Logger LOGGER = LoggerFactory.getLogger(NerdModel.class);

public enum FeatureType {
BASELINE, // only use conditional prob.
MILNE_WITTEN, // Milne and Witten features
MILNE_WITTEN_RELATEDNESS, // only Milne and Witten relatedness measure
MINIMAL, // minimal reasonable set of features (for selector only)
SIMPLE, // basic features in addition to Milne&Witten relatedness
EMBEDDINGS, // only entity embeddings similarity
NERD, // basic features with Milne&Witten and entity embeddings
Expand Down Expand Up @@ -134,4 +142,14 @@ public Double call() throws InvalidParameterException {
return new Double(score);
}
}

public FeatureType getFeatureTypeFromString(String localFeaturesTypeString) {
FeatureType localFeaturesType = null;
try {
localFeaturesType = FeatureType.valueOf(localFeaturesTypeString);
} catch(Exception e) {
LOGGER.warn("invalid feature type string: " + localFeaturesTypeString, e);
}
return localFeaturesType;
}
}
22 changes: 22 additions & 0 deletions src/main/java/com/scienceminer/nerd/utilities/NerdConfig.java
Expand Up @@ -67,6 +67,12 @@ public class NerdConfig {
// Wikipedia page in a supported language
private boolean restrictConceptStatementsToWikipediaPages = true;

// the feature set to be used for the ranker model
private String rankerFeatures = null;

// the feature set to be used for the selector model
private String selectorFeatures = null;

public String getLangCode() {
return langCode;
}
Expand Down Expand Up @@ -178,4 +184,20 @@ public boolean getRestrictConceptStatementsToWikipediaPages() {
public void setRestrictConceptStatementsToWikipediaPages(boolean restrict) {
this.restrictConceptStatementsToWikipediaPages = restrict;
}

public String getRankerFeatures() {
return this.rankerFeatures;
}

public void setRankerFeatures(String rankerFeatures) {
this.rankerFeatures = rankerFeatures;
}

public String getSelectorFeatures() {
return this.selectorFeatures;
}

public void setSelectorFeatures(String selectorFeatures) {
this.selectorFeatures = selectorFeatures;
}
}

0 comments on commit 856f23a

Please sign in to comment.