diff --git a/cellbase-build/pom.xml b/cellbase-build/pom.xml
index fbdeb3e02d..7fe108aa0d 100644
--- a/cellbase-build/pom.xml
+++ b/cellbase-build/pom.xml
@@ -27,6 +27,10 @@
org.opencb.commons
bioformats
+
+ org.opencb.biodata
+ models
+
psidev.psi.mi
diff --git a/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java
new file mode 100644
index 0000000000..668c52e549
--- /dev/null
+++ b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java
@@ -0,0 +1,260 @@
+package org.opencb.cellbase.build.transform;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.file.Path;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import org.apache.commons.lang.StringUtils;
+import org.opencb.biodata.models.variant.effect.ConsequenceType;
+import org.opencb.biodata.models.variant.effect.ConsequenceTypeMappings;
+import org.opencb.biodata.models.variant.effect.ProteinSubstitutionScores;
+import org.opencb.biodata.models.variant.effect.VariantEffect;
+import org.opencb.cellbase.build.transform.serializers.json.JsonSerializer;
+
+
+/**
+ *
+ * @author Cristina Yenyxe Gonzalez Garcia
+ */
+public class VariantEffectParser {
+
+ private JsonSerializer serializer;
+
+ public VariantEffectParser(JsonSerializer serializer) {
+ this.serializer = serializer;
+ }
+
+ public int parse(Path file) throws IOException {
+ BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file.toFile())));
+ String line = null;
+ VariantEffect currentEffect = null;
+ String currentAllele = null;
+
+ int numEffectsWritten = 0;
+
+ while((line = reader.readLine()) != null) {
+ if (line.startsWith("#")) {
+ continue; // Header will just be ignored
+ }
+
+ String[] fields = line.split("\t");
+ String[] positionFields = fields[0].split("[\\_\\/]");
+
+ if (positionFields.length < 4) {
+ // Only entries chr_pos_ref/alt will be parsed, ie, 1_909238_G/C or 3_361464_A/-
+ // Entries like 5_121187650_duplication will be ignored
+ continue;
+ }
+
+ if (isNewVariant(positionFields[0], Integer.parseInt(positionFields[1]), positionFields[2], positionFields[3], currentEffect, currentAllele)) {
+ if (currentEffect != null && serializer != null) {
+ if (serializer.write(currentEffect)) {
+ numEffectsWritten++;
+ }
+ }
+
+ currentEffect = new VariantEffect(positionFields[0], Integer.parseInt(positionFields[1]), Integer.parseInt(positionFields[1]), positionFields[2]);
+ currentAllele = positionFields[3];
+ } else if (isNewAllele(positionFields[0], Integer.parseInt(positionFields[1]), positionFields[2], positionFields[3], currentEffect, currentAllele)) {
+ currentAllele = positionFields[3];
+ }
+
+ parseLine(fields, currentEffect, currentAllele);
+ }
+
+ // Don't forget to serialize the last effect read!
+ if (currentEffect != null && serializer != null) {
+ if (serializer.write(currentEffect)) {
+ numEffectsWritten++;
+ }
+ }
+
+ return numEffectsWritten;
+ }
+
+ private boolean isNewVariant(String chromosome, int start, String referenceAllele, String alternateAllele,
+ VariantEffect current, String currentAllele) {
+ if (current == null) {
+ return true;
+ }
+
+ return !chromosome.equals(current.getChromosome())
+ || start != current.getStart()
+ || !referenceAllele.equals(current.getReferenceAllele())
+// || !alternateAllele.equals(currentAllele)
+ ;
+ }
+
+ private boolean isNewAllele(String chromosome, int start, String referenceAllele, String alternateAllele,
+ VariantEffect current, String currentAllele) {
+ if (current == null) {
+ return true;
+ }
+
+ return chromosome.equals(current.getChromosome())
+ && start == current.getStart()
+ && referenceAllele.equals(current.getReferenceAllele())
+ && !alternateAllele.equals(currentAllele);
+ }
+
+ private void parseLine(String[] fields, VariantEffect effect, String alternateAllele) {
+ ConsequenceType ct = new ConsequenceType(alternateAllele);
+ effect.addConsequenceType(alternateAllele, ct);
+
+ // Gene and feature information
+ ct.setGeneId(fields[3]);
+ ct.setFeatureId(fields[4]);
+ ct.setFeatureType(fields[5]);
+
+ // List of consequence types as SO codes
+ String[] consequencesName = fields[6].split(",");
+ int[] consequencesSo = new int[consequencesName.length];
+ for (int i = 0; i < consequencesName.length; i++) {
+ Integer so = ConsequenceTypeMappings.termToAccession.get(consequencesName[i]);
+ if (so != null) {
+ consequencesSo[i] = so;
+ } else {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.WARNING, "{0} is not a valid consequence type", consequencesName[i]);
+ }
+ }
+ ct.setConsequenceTypes(consequencesSo);
+
+ // Fields related to position can be empty (marked with "-")
+ if (!"-".equals(fields[7]) && StringUtils.isNumeric(fields[7])) {
+ ct.setcDnaPosition(Integer.parseInt(fields[7]));
+ }
+ if (!"-".equals(fields[8]) && StringUtils.isNumeric(fields[8])) {
+ ct.setCdsPosition(Integer.parseInt(fields[8]));
+ }
+ if (!"-".equals(fields[9]) && StringUtils.isNumeric(fields[9])) {
+ ct.setProteinPosition(Integer.parseInt(fields[9]));
+ }
+
+ // Fields related to AA and codon changes can also be empty (marked with "-")
+ if (!"-".equals(fields[10])) {
+ ct.setAminoacidChange(fields[10]);
+ }
+ if (!"-".equals(fields[11])) {
+ ct.setCodonChange(fields[11]);
+ }
+
+ // Variant ID
+ if (!"-".equals(fields[12])) {
+ ct.setVariationId(fields[12]);
+ }
+
+ parseExtraFields(fields[13], effect, ct);
+ }
+
+ private void parseExtraFields(String extra, VariantEffect effect, ConsequenceType ct) {
+ for (String field : extra.split(";")) {
+ String[] keyValue = field.split("=");
+
+ switch (keyValue[0].toLowerCase()) {
+ case "aa_maf":
+ effect.getFrequencies().setMafNhlbiEspAfricanAmerican(Float.parseFloat(keyValue[1]));
+ break;
+ case "afr_maf":
+ effect.getFrequencies().setMaf1000GAfrican(Float.parseFloat(keyValue[1]));
+ break;
+ case "amr_maf":
+ effect.getFrequencies().setMaf1000GAmerican(Float.parseFloat(keyValue[1]));
+ break;
+ case "asn_maf":
+ effect.getFrequencies().setMaf1000GAsian(Float.parseFloat(keyValue[1]));
+ break;
+ case "biotype":
+ ct.setFeatureBiotype(keyValue[1]);
+ break;
+ case "canonical":
+ ct.setCanonical(keyValue[1].equalsIgnoreCase("YES") || keyValue[1].equalsIgnoreCase("Y"));
+ break;
+ case "ccds":
+ ct.setCcdsId(keyValue[1]);
+ break;
+ case "cell_type":
+ effect.getRegulatoryEffect().setCellType(keyValue[1]);
+ break;
+ case "clin_sig":
+ ct.setClinicalSignificance(keyValue[1]);
+ break;
+ case "distance":
+ ct.setVariantToTranscriptDistance(Integer.parseInt(keyValue[1]));
+ break;
+ case "domains":
+ ct.setProteinDomains(keyValue[1]);
+ break;
+ case "ea_maf":
+ effect.getFrequencies().setMafNhlbiEspEuropeanAmerican(Float.parseFloat(keyValue[1]));
+ break;
+ case "ensp":
+ ct.setProteinId(keyValue[1]);
+ break;
+ case "eur_maf":
+ effect.getFrequencies().setMaf1000GEuropean(Float.parseFloat(keyValue[1]));
+ break;
+ case "exon":
+ ct.setExonNumber(keyValue[1]);
+ break;
+ case "gmaf": // Format is GMAF=G:0.2640
+ String[] gmafFields = keyValue[1].split(":");
+ effect.getFrequencies().setAllele1000g(gmafFields[0]);
+ effect.getFrequencies().setMaf1000G(Float.parseFloat(gmafFields[1]));
+ break;
+ case "hgvsc":
+ ct.setHgvsc(keyValue[1]);
+ break;
+ case "hgvsp":
+ ct.setHgvsp(keyValue[1]);
+ break;
+ case "high_inf_pos":
+ effect.getRegulatoryEffect().setHighInformationPosition(keyValue[1].equalsIgnoreCase("YES") || keyValue[1].equalsIgnoreCase("Y"));
+ break;
+ case "intron":
+ ct.setIntronNumber(keyValue[1]);
+ break;
+ case "motif_name":
+ effect.getRegulatoryEffect().setMotifName(keyValue[1]);
+ break;
+ case "motif_pos":
+ effect.getRegulatoryEffect().setMotifPosition(Integer.parseInt(keyValue[1]));
+ break;
+ case "motif_score_change":
+ effect.getRegulatoryEffect().setMotifScoreChange(Float.parseFloat(keyValue[1]));
+ break;
+ case "polyphen": // Format is PolyPhen=possibly_damaging(0.859)
+ String[] polyphenFields = keyValue[1].split("[\\(\\)]");
+ effect.getProteinSubstitutionScores().setPolyphenEffect(ProteinSubstitutionScores.PolyphenEffect.valueOf(polyphenFields[0].toUpperCase()));
+ effect.getProteinSubstitutionScores().setPolyphenScore(Float.parseFloat(polyphenFields[1]));
+ break;
+ case "pubmed":
+ ct.setPubmed(keyValue[1].split(","));
+ break;
+ case "sift": // Format is SIFT=tolerated(0.07)
+ String[] siftFields = keyValue[1].split("[\\(\\)]");
+ effect.getProteinSubstitutionScores().setSiftEffect(ProteinSubstitutionScores.SiftEffect.valueOf(siftFields[0].toUpperCase()));
+ effect.getProteinSubstitutionScores().setSiftScore(Float.parseFloat(siftFields[1]));
+ break;
+ case "strand":
+ ct.setFeatureStrand(keyValue[1]);
+ break;
+ case "sv":
+ ct.setStructuralVariantsId(keyValue[1].split(","));
+ break;
+ case "symbol":
+ ct.setGeneName(keyValue[1]);
+ break;
+ case "symbol_source":
+ ct.setGeneNameSource(keyValue[1]);
+ break;
+ default:
+ // ALLELE_NUM, FREQS, IND, ZYG
+ break;
+ }
+ }
+
+ }
+}
diff --git a/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java
new file mode 100644
index 0000000000..4f171d9664
--- /dev/null
+++ b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java
@@ -0,0 +1,113 @@
+package org.opencb.cellbase.build.transform.serializers.json;
+
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import java.util.zip.GZIPOutputStream;
+import org.opencb.commons.io.DataWriter;
+
+/**
+ *
+ * @author Cristina Yenyxe Gonzalez Garcia
+ */
+public class JsonSerializer implements DataWriter {
+
+ private Path outdir;
+ private Path file;
+
+ protected JsonFactory factory;
+ protected ObjectMapper jsonObjectMapper;
+ protected JsonGenerator generator;
+ private OutputStream stream;
+
+ public JsonSerializer(Path outdir, Path file) {
+ this.outdir = outdir;
+ this.file = file;
+ this.factory = new JsonFactory();
+ this.jsonObjectMapper = new ObjectMapper(this.factory);
+ }
+
+ @Override
+ public boolean open() {
+ try {
+ stream = new GZIPOutputStream(new FileOutputStream(
+ Paths.get(outdir.toString(), file.getFileName().toString()).toAbsolutePath().toString() + ".json.gz"));
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex);
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public boolean pre() {
+ try {
+ generator = factory.createGenerator(stream);
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex);
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean write(T elem) {
+ try {
+ generator.writeObject(elem);
+ generator.writeRaw('\n');
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, elem.toString(), ex);
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean write(List batch) {
+ for (T elem : batch) {
+ try {
+ generator.writeObject(elem);
+ generator.writeRaw('\n');
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, elem.toString(), ex);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ @Override
+ public boolean post() {
+ try {
+ stream.flush();
+ generator.flush();
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex);
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public boolean close() {
+ try {
+ generator.close();
+ } catch (IOException ex) {
+ Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex);
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java b/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java
new file mode 100644
index 0000000000..1b29ddd671
--- /dev/null
+++ b/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java
@@ -0,0 +1,47 @@
+package org.opencb.cellbase.build.transform;
+
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.opencb.biodata.models.variant.effect.VariantEffect;
+import org.opencb.cellbase.build.transform.serializers.json.JsonSerializer;
+
+/**
+ *
+ * @author Cristina Yenyxe Gonzalez Garcia
+ */
+public class VariantEffectParserTest {
+
+ private static Path file;
+ private static JsonSerializer serializer;
+
+ @BeforeClass
+ public static void setUpClass() throws URISyntaxException {
+ URL resource = VariantEffectParserTest.class.getResource("/vep-example-output.txt");
+ file = Paths.get(resource.toURI());
+
+ serializer = new JsonSerializer<>(Paths.get("/tmp"), Paths.get("vep-example-output"));
+ serializer.open();
+ serializer.pre();
+ }
+
+ @AfterClass
+ public static void tearDownClass() {
+ serializer.post();
+ serializer.close();
+ }
+
+ @Test
+ public void testParse() throws Exception {
+ System.out.println("parse");
+ VariantEffectParser instance = new VariantEffectParser(serializer);
+ int numEffectsWritten = instance.parse(file);
+ Assert.assertEquals(3, numEffectsWritten);
+ }
+
+}
diff --git a/cellbase-build/src/test/resources/vep-example-output.txt b/cellbase-build/src/test/resources/vep-example-output.txt
new file mode 100644
index 0000000000..90b8acd1d3
--- /dev/null
+++ b/cellbase-build/src/test/resources/vep-example-output.txt
@@ -0,0 +1,24 @@
+#Uploaded_variation Location Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Extra
+1_909238_G/C 1:909238 C ENSG00000187642 ENST00000341290 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1346;STRAND=-1;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000343864;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/C 1:909238 C ENSG00000187642 ENST00000433179 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1341;STRAND=-1;CANONICAL=YES;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000414022;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/C 1:909238 C ENSG00000187583 ENST00000491024 Transcript missense_variant 155 155 52 R/P cGt/cCt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=2/5;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.17);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000462558;HGVSc=ENST00000491024.1:c.155G>C;HGVSp=ENSP00000462558.1:p.Arg52Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/C 1:909238 C ENSG00000187583 ENST00000379409 Transcript missense_variant 1646 1616 539 R/P cGt/cCt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=13/15;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.25);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368719;HGVSc=ENST00000379409.2:c.1616G>C;HGVSp=ENSP00000368719.2:p.Arg539Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/T 1:909238 T ENSG00000187583 ENST00000480267 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=2335;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=retained_intron;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/T 1:909238 T ENSG00000187583 ENST00000379407 Transcript missense_variant 1385 1355 452 R/P cGt/cTt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=13/15;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.23);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368717;CCDS=CCDS53256.1;HGVSc=ENST00000379407.3:c.1355G>T;HGVSp=ENSP00000368717.2:p.Arg452Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/T 1:909238 T ENSG00000187642 ENST00000479361 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1347;STRAND=-1;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=retained_intron;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+1_909238_G/T 1:909238 T ENSG00000187583 ENST00000379410 Transcript missense_variant 1495 1460 487 R/P cGt/cTt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=14/16;STRAND=1;CANONICAL=YES;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.23);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368720;CCDS=CCDS4.1;HGVSc=ENST00000379410.3:c.1460G>T;HGVSp=ENSP00000368720.3:p.Arg487Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000449294 Transcript frameshift_variant,feature_truncation 345 5 2 - - - EXON=3/5;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000390440;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000449294.2:c.5delA;HGVSp=ENSP00000390440.2:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000397491 Transcript frameshift_variant,feature_truncation 472 5 2 - - - EXON=3/27;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000380628;DOMAINS=Cleavage_site_(Signalp):Sigp;CCDS=CCDS58812.1;HGVSc=ENST00000397491.2:c.5delA;HGVSp=ENSP00000380628.2:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000421198 Transcript frameshift_variant,feature_truncation 258 5 2 - - - EXON=3/5;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000413628;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000421198.1:c.5delA;HGVSp=ENSP00000413628.1:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000427688 Transcript frameshift_variant,feature_truncation 380 5 2 - - - EXON=2/3;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000403311;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000427688.1:c.5delA;HGVSp=ENSP00000403311.1:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000435603 Transcript frameshift_variant,feature_truncation 185 5 2 - - - EXON=2/6;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000397445;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000435603.1:c.5delA;HGVSp=ENSP00000397445.1:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000453040 Transcript 3_prime_UTR_variant,NMD_transcript_variant,feature_truncation 628 - - - - - EXON=3/25;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=nonsense_mediated_decay;ENSP=ENSP00000413109;HGVSc=ENST00000453040.1:c.*343delA
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000256509 Transcript frameshift_variant,feature_truncation 647 5 2 - - - EXON=3/28;STRAND=1;CANONICAL=YES;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000256509;DOMAINS=Cleavage_site_(Signalp):Sigp;CCDS=CCDS2556.1;HGVSc=ENST00000256509.2:c.5delA;HGVSp=ENSP00000256509.2:p.Glu2GlyfsTer9
+3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000461289 Transcript upstream_gene_variant - - - - - - DISTANCE=23;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=processed_transcript
+5_121187650_duplication 5:121187650 duplication ENSG00000181867 ENST00000321339 Transcript transcript_amplification 1-870 - - - - - EXON=1/1;STRAND=1;CANONICAL=YES;SYMBOL=FTMT;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000313691;CCDS=CCDS4128.1
+13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000544455 Transcript 5_prime_UTR_variant 53 - - - - rs55880202 EXON=1/28;STRAND=1;CANONICAL=YES;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000439902;CCDS=CCDS9344.1;HGVSc=ENST00000544455.1:c.-175C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
+13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000530893 Transcript 5_prime_UTR_variant 28 - - - - rs55880202 EXON=1/10;STRAND=1;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000435699;HGVSc=ENST00000530893.2:c.-540C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
+13_32889669_C/T 13:32889669 T ENSG00000189167 ENST00000345108 Transcript upstream_gene_variant - - - - - rs55880202 DISTANCE=3578;STRAND=-1;SYMBOL=ZAR1L;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000344616;CCDS=CCDS45023.1;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
+13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000380152 Transcript 5_prime_UTR_variant 59 - - - - rs55880202 EXON=1/27;STRAND=1;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000369497;CCDS=CCDS9344.1;HGVSc=ENST00000380152.3:c.-175C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
+13_32889669_C/T 13:32889669 T ENSG00000189167 ENST00000533490 Transcript upstream_gene_variant - - - - - rs55880202 DISTANCE=188;STRAND=-1;CANONICAL=YES;SYMBOL=ZAR1L;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000437289;CCDS=CCDS45023.1;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
+13_32889669_C/T 13:32889669 T - ENSR00000054736 RegulatoryFeature regulatory_region_variant - - - - - rs55880202 GMAF=T:0.0087;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0
diff --git a/pom.xml b/pom.xml
index 05ece0d1cc..adc1d44b69 100644
--- a/pom.xml
+++ b/pom.xml
@@ -62,6 +62,11 @@
cellbase-mongodb
${cellbase.version}
+
+ org.opencb.biodata
+ models
+ 0.1
+
org.opencb.commons
bioformats