diff --git a/cellbase-build/pom.xml b/cellbase-build/pom.xml index fbdeb3e02d..7fe108aa0d 100644 --- a/cellbase-build/pom.xml +++ b/cellbase-build/pom.xml @@ -27,6 +27,10 @@ org.opencb.commons bioformats + + org.opencb.biodata + models + psidev.psi.mi diff --git a/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java new file mode 100644 index 0000000000..668c52e549 --- /dev/null +++ b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/VariantEffectParser.java @@ -0,0 +1,260 @@ +package org.opencb.cellbase.build.transform; + +import java.io.BufferedReader; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.nio.file.Path; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.commons.lang.StringUtils; +import org.opencb.biodata.models.variant.effect.ConsequenceType; +import org.opencb.biodata.models.variant.effect.ConsequenceTypeMappings; +import org.opencb.biodata.models.variant.effect.ProteinSubstitutionScores; +import org.opencb.biodata.models.variant.effect.VariantEffect; +import org.opencb.cellbase.build.transform.serializers.json.JsonSerializer; + + +/** + * + * @author Cristina Yenyxe Gonzalez Garcia + */ +public class VariantEffectParser { + + private JsonSerializer serializer; + + public VariantEffectParser(JsonSerializer serializer) { + this.serializer = serializer; + } + + public int parse(Path file) throws IOException { + BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file.toFile()))); + String line = null; + VariantEffect currentEffect = null; + String currentAllele = null; + + int numEffectsWritten = 0; + + while((line = reader.readLine()) != null) { + if (line.startsWith("#")) { + continue; // Header will just be ignored + } + + String[] fields = line.split("\t"); + String[] positionFields = fields[0].split("[\\_\\/]"); + + if (positionFields.length < 4) { + // Only entries chr_pos_ref/alt will be parsed, ie, 1_909238_G/C or 3_361464_A/- + // Entries like 5_121187650_duplication will be ignored + continue; + } + + if (isNewVariant(positionFields[0], Integer.parseInt(positionFields[1]), positionFields[2], positionFields[3], currentEffect, currentAllele)) { + if (currentEffect != null && serializer != null) { + if (serializer.write(currentEffect)) { + numEffectsWritten++; + } + } + + currentEffect = new VariantEffect(positionFields[0], Integer.parseInt(positionFields[1]), Integer.parseInt(positionFields[1]), positionFields[2]); + currentAllele = positionFields[3]; + } else if (isNewAllele(positionFields[0], Integer.parseInt(positionFields[1]), positionFields[2], positionFields[3], currentEffect, currentAllele)) { + currentAllele = positionFields[3]; + } + + parseLine(fields, currentEffect, currentAllele); + } + + // Don't forget to serialize the last effect read! + if (currentEffect != null && serializer != null) { + if (serializer.write(currentEffect)) { + numEffectsWritten++; + } + } + + return numEffectsWritten; + } + + private boolean isNewVariant(String chromosome, int start, String referenceAllele, String alternateAllele, + VariantEffect current, String currentAllele) { + if (current == null) { + return true; + } + + return !chromosome.equals(current.getChromosome()) + || start != current.getStart() + || !referenceAllele.equals(current.getReferenceAllele()) +// || !alternateAllele.equals(currentAllele) + ; + } + + private boolean isNewAllele(String chromosome, int start, String referenceAllele, String alternateAllele, + VariantEffect current, String currentAllele) { + if (current == null) { + return true; + } + + return chromosome.equals(current.getChromosome()) + && start == current.getStart() + && referenceAllele.equals(current.getReferenceAllele()) + && !alternateAllele.equals(currentAllele); + } + + private void parseLine(String[] fields, VariantEffect effect, String alternateAllele) { + ConsequenceType ct = new ConsequenceType(alternateAllele); + effect.addConsequenceType(alternateAllele, ct); + + // Gene and feature information + ct.setGeneId(fields[3]); + ct.setFeatureId(fields[4]); + ct.setFeatureType(fields[5]); + + // List of consequence types as SO codes + String[] consequencesName = fields[6].split(","); + int[] consequencesSo = new int[consequencesName.length]; + for (int i = 0; i < consequencesName.length; i++) { + Integer so = ConsequenceTypeMappings.termToAccession.get(consequencesName[i]); + if (so != null) { + consequencesSo[i] = so; + } else { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.WARNING, "{0} is not a valid consequence type", consequencesName[i]); + } + } + ct.setConsequenceTypes(consequencesSo); + + // Fields related to position can be empty (marked with "-") + if (!"-".equals(fields[7]) && StringUtils.isNumeric(fields[7])) { + ct.setcDnaPosition(Integer.parseInt(fields[7])); + } + if (!"-".equals(fields[8]) && StringUtils.isNumeric(fields[8])) { + ct.setCdsPosition(Integer.parseInt(fields[8])); + } + if (!"-".equals(fields[9]) && StringUtils.isNumeric(fields[9])) { + ct.setProteinPosition(Integer.parseInt(fields[9])); + } + + // Fields related to AA and codon changes can also be empty (marked with "-") + if (!"-".equals(fields[10])) { + ct.setAminoacidChange(fields[10]); + } + if (!"-".equals(fields[11])) { + ct.setCodonChange(fields[11]); + } + + // Variant ID + if (!"-".equals(fields[12])) { + ct.setVariationId(fields[12]); + } + + parseExtraFields(fields[13], effect, ct); + } + + private void parseExtraFields(String extra, VariantEffect effect, ConsequenceType ct) { + for (String field : extra.split(";")) { + String[] keyValue = field.split("="); + + switch (keyValue[0].toLowerCase()) { + case "aa_maf": + effect.getFrequencies().setMafNhlbiEspAfricanAmerican(Float.parseFloat(keyValue[1])); + break; + case "afr_maf": + effect.getFrequencies().setMaf1000GAfrican(Float.parseFloat(keyValue[1])); + break; + case "amr_maf": + effect.getFrequencies().setMaf1000GAmerican(Float.parseFloat(keyValue[1])); + break; + case "asn_maf": + effect.getFrequencies().setMaf1000GAsian(Float.parseFloat(keyValue[1])); + break; + case "biotype": + ct.setFeatureBiotype(keyValue[1]); + break; + case "canonical": + ct.setCanonical(keyValue[1].equalsIgnoreCase("YES") || keyValue[1].equalsIgnoreCase("Y")); + break; + case "ccds": + ct.setCcdsId(keyValue[1]); + break; + case "cell_type": + effect.getRegulatoryEffect().setCellType(keyValue[1]); + break; + case "clin_sig": + ct.setClinicalSignificance(keyValue[1]); + break; + case "distance": + ct.setVariantToTranscriptDistance(Integer.parseInt(keyValue[1])); + break; + case "domains": + ct.setProteinDomains(keyValue[1]); + break; + case "ea_maf": + effect.getFrequencies().setMafNhlbiEspEuropeanAmerican(Float.parseFloat(keyValue[1])); + break; + case "ensp": + ct.setProteinId(keyValue[1]); + break; + case "eur_maf": + effect.getFrequencies().setMaf1000GEuropean(Float.parseFloat(keyValue[1])); + break; + case "exon": + ct.setExonNumber(keyValue[1]); + break; + case "gmaf": // Format is GMAF=G:0.2640 + String[] gmafFields = keyValue[1].split(":"); + effect.getFrequencies().setAllele1000g(gmafFields[0]); + effect.getFrequencies().setMaf1000G(Float.parseFloat(gmafFields[1])); + break; + case "hgvsc": + ct.setHgvsc(keyValue[1]); + break; + case "hgvsp": + ct.setHgvsp(keyValue[1]); + break; + case "high_inf_pos": + effect.getRegulatoryEffect().setHighInformationPosition(keyValue[1].equalsIgnoreCase("YES") || keyValue[1].equalsIgnoreCase("Y")); + break; + case "intron": + ct.setIntronNumber(keyValue[1]); + break; + case "motif_name": + effect.getRegulatoryEffect().setMotifName(keyValue[1]); + break; + case "motif_pos": + effect.getRegulatoryEffect().setMotifPosition(Integer.parseInt(keyValue[1])); + break; + case "motif_score_change": + effect.getRegulatoryEffect().setMotifScoreChange(Float.parseFloat(keyValue[1])); + break; + case "polyphen": // Format is PolyPhen=possibly_damaging(0.859) + String[] polyphenFields = keyValue[1].split("[\\(\\)]"); + effect.getProteinSubstitutionScores().setPolyphenEffect(ProteinSubstitutionScores.PolyphenEffect.valueOf(polyphenFields[0].toUpperCase())); + effect.getProteinSubstitutionScores().setPolyphenScore(Float.parseFloat(polyphenFields[1])); + break; + case "pubmed": + ct.setPubmed(keyValue[1].split(",")); + break; + case "sift": // Format is SIFT=tolerated(0.07) + String[] siftFields = keyValue[1].split("[\\(\\)]"); + effect.getProteinSubstitutionScores().setSiftEffect(ProteinSubstitutionScores.SiftEffect.valueOf(siftFields[0].toUpperCase())); + effect.getProteinSubstitutionScores().setSiftScore(Float.parseFloat(siftFields[1])); + break; + case "strand": + ct.setFeatureStrand(keyValue[1]); + break; + case "sv": + ct.setStructuralVariantsId(keyValue[1].split(",")); + break; + case "symbol": + ct.setGeneName(keyValue[1]); + break; + case "symbol_source": + ct.setGeneNameSource(keyValue[1]); + break; + default: + // ALLELE_NUM, FREQS, IND, ZYG + break; + } + } + + } +} diff --git a/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java new file mode 100644 index 0000000000..4f171d9664 --- /dev/null +++ b/cellbase-build/src/main/java/org/opencb/cellbase/build/transform/serializers/json/JsonSerializer.java @@ -0,0 +1,113 @@ +package org.opencb.cellbase.build.transform.serializers.json; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.zip.GZIPOutputStream; +import org.opencb.commons.io.DataWriter; + +/** + * + * @author Cristina Yenyxe Gonzalez Garcia + */ +public class JsonSerializer implements DataWriter { + + private Path outdir; + private Path file; + + protected JsonFactory factory; + protected ObjectMapper jsonObjectMapper; + protected JsonGenerator generator; + private OutputStream stream; + + public JsonSerializer(Path outdir, Path file) { + this.outdir = outdir; + this.file = file; + this.factory = new JsonFactory(); + this.jsonObjectMapper = new ObjectMapper(this.factory); + } + + @Override + public boolean open() { + try { + stream = new GZIPOutputStream(new FileOutputStream( + Paths.get(outdir.toString(), file.getFileName().toString()).toAbsolutePath().toString() + ".json.gz")); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex); + return false; + } + return true; + } + + @Override + public boolean pre() { + try { + generator = factory.createGenerator(stream); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex); + return false; + } + + return true; + } + + @Override + public boolean write(T elem) { + try { + generator.writeObject(elem); + generator.writeRaw('\n'); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, elem.toString(), ex); + return false; + } + + return true; + } + + @Override + public boolean write(List batch) { + for (T elem : batch) { + try { + generator.writeObject(elem); + generator.writeRaw('\n'); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, elem.toString(), ex); + return false; + } + } + + return true; + } + + @Override + public boolean post() { + try { + stream.flush(); + generator.flush(); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex); + return false; + } + return true; + } + + @Override + public boolean close() { + try { + generator.close(); + } catch (IOException ex) { + Logger.getLogger(JsonSerializer.class.getName()).log(Level.SEVERE, null, ex); + return false; + } + return true; + } + +} diff --git a/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java b/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java new file mode 100644 index 0000000000..1b29ddd671 --- /dev/null +++ b/cellbase-build/src/test/java/org/opencb/cellbase/build/transform/VariantEffectParserTest.java @@ -0,0 +1,47 @@ +package org.opencb.cellbase.build.transform; + +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.nio.file.Paths; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; +import org.opencb.biodata.models.variant.effect.VariantEffect; +import org.opencb.cellbase.build.transform.serializers.json.JsonSerializer; + +/** + * + * @author Cristina Yenyxe Gonzalez Garcia + */ +public class VariantEffectParserTest { + + private static Path file; + private static JsonSerializer serializer; + + @BeforeClass + public static void setUpClass() throws URISyntaxException { + URL resource = VariantEffectParserTest.class.getResource("/vep-example-output.txt"); + file = Paths.get(resource.toURI()); + + serializer = new JsonSerializer<>(Paths.get("/tmp"), Paths.get("vep-example-output")); + serializer.open(); + serializer.pre(); + } + + @AfterClass + public static void tearDownClass() { + serializer.post(); + serializer.close(); + } + + @Test + public void testParse() throws Exception { + System.out.println("parse"); + VariantEffectParser instance = new VariantEffectParser(serializer); + int numEffectsWritten = instance.parse(file); + Assert.assertEquals(3, numEffectsWritten); + } + +} diff --git a/cellbase-build/src/test/resources/vep-example-output.txt b/cellbase-build/src/test/resources/vep-example-output.txt new file mode 100644 index 0000000000..90b8acd1d3 --- /dev/null +++ b/cellbase-build/src/test/resources/vep-example-output.txt @@ -0,0 +1,24 @@ +#Uploaded_variation Location Allele Gene Feature Feature_type Consequence cDNA_position CDS_position Protein_position Amino_acids Codons Existing_variation Extra +1_909238_G/C 1:909238 C ENSG00000187642 ENST00000341290 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1346;STRAND=-1;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000343864;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/C 1:909238 C ENSG00000187642 ENST00000433179 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1341;STRAND=-1;CANONICAL=YES;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000414022;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/C 1:909238 C ENSG00000187583 ENST00000491024 Transcript missense_variant 155 155 52 R/P cGt/cCt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=2/5;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.17);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000462558;HGVSc=ENST00000491024.1:c.155G>C;HGVSp=ENSP00000462558.1:p.Arg52Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/C 1:909238 C ENSG00000187583 ENST00000379409 Transcript missense_variant 1646 1616 539 R/P cGt/cCt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=13/15;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.25);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368719;HGVSc=ENST00000379409.2:c.1616G>C;HGVSp=ENSP00000368719.2:p.Arg539Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/T 1:909238 T ENSG00000187583 ENST00000480267 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=2335;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=retained_intron;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/T 1:909238 T ENSG00000187583 ENST00000379407 Transcript missense_variant 1385 1355 452 R/P cGt/cTt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=13/15;STRAND=1;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.23);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368717;CCDS=CCDS53256.1;HGVSc=ENST00000379407.3:c.1355G>T;HGVSp=ENSP00000368717.2:p.Arg452Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/T 1:909238 T ENSG00000187642 ENST00000479361 Transcript downstream_gene_variant - - - - - rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;DISTANCE=1347;STRAND=-1;SYMBOL=C1orf170;SYMBOL_SOURCE=HGNC;GMAF=G:0.2640;BIOTYPE=retained_intron;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +1_909238_G/T 1:909238 T ENSG00000187583 ENST00000379410 Transcript missense_variant 1495 1460 487 R/P cGt/cTt rs3829740 AA_MAF=0.219162;EA_MAF=0.416744;EXON=14/16;STRAND=1;CANONICAL=YES;SYMBOL=PLEKHN1;SYMBOL_SOURCE=HGNC;SIFT=tolerated(0.23);PolyPhen=benign(0);GMAF=G:0.2640;BIOTYPE=protein_coding;ENSP=ENSP00000368720;CCDS=CCDS4.1;HGVSc=ENST00000379410.3:c.1460G>T;HGVSp=ENSP00000368720.3:p.Arg487Pro;AFR_MAF=0.19;AMR_MAF=0.33;ASN_MAF=0.11;EUR_MAF=0.39 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000449294 Transcript frameshift_variant,feature_truncation 345 5 2 - - - EXON=3/5;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000390440;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000449294.2:c.5delA;HGVSp=ENSP00000390440.2:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000397491 Transcript frameshift_variant,feature_truncation 472 5 2 - - - EXON=3/27;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000380628;DOMAINS=Cleavage_site_(Signalp):Sigp;CCDS=CCDS58812.1;HGVSc=ENST00000397491.2:c.5delA;HGVSp=ENSP00000380628.2:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000421198 Transcript frameshift_variant,feature_truncation 258 5 2 - - - EXON=3/5;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000413628;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000421198.1:c.5delA;HGVSp=ENSP00000413628.1:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000427688 Transcript frameshift_variant,feature_truncation 380 5 2 - - - EXON=2/3;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000403311;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000427688.1:c.5delA;HGVSp=ENSP00000403311.1:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000435603 Transcript frameshift_variant,feature_truncation 185 5 2 - - - EXON=2/6;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000397445;DOMAINS=Cleavage_site_(Signalp):Sigp;HGVSc=ENST00000435603.1:c.5delA;HGVSp=ENSP00000397445.1:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000453040 Transcript 3_prime_UTR_variant,NMD_transcript_variant,feature_truncation 628 - - - - - EXON=3/25;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=nonsense_mediated_decay;ENSP=ENSP00000413109;HGVSc=ENST00000453040.1:c.*343delA +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000256509 Transcript frameshift_variant,feature_truncation 647 5 2 - - - EXON=3/28;STRAND=1;CANONICAL=YES;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000256509;DOMAINS=Cleavage_site_(Signalp):Sigp;CCDS=CCDS2556.1;HGVSc=ENST00000256509.2:c.5delA;HGVSp=ENSP00000256509.2:p.Glu2GlyfsTer9 +3_361464_A/- 3:361463-361464 - ENSG00000134121 ENST00000461289 Transcript upstream_gene_variant - - - - - - DISTANCE=23;STRAND=1;SYMBOL=CHL1;SYMBOL_SOURCE=HGNC;BIOTYPE=processed_transcript +5_121187650_duplication 5:121187650 duplication ENSG00000181867 ENST00000321339 Transcript transcript_amplification 1-870 - - - - - EXON=1/1;STRAND=1;CANONICAL=YES;SYMBOL=FTMT;SYMBOL_SOURCE=HGNC;BIOTYPE=protein_coding;ENSP=ENSP00000313691;CCDS=CCDS4128.1 +13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000544455 Transcript 5_prime_UTR_variant 53 - - - - rs55880202 EXON=1/28;STRAND=1;CANONICAL=YES;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000439902;CCDS=CCDS9344.1;HGVSc=ENST00000544455.1:c.-175C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 +13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000530893 Transcript 5_prime_UTR_variant 28 - - - - rs55880202 EXON=1/10;STRAND=1;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000435699;HGVSc=ENST00000530893.2:c.-540C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 +13_32889669_C/T 13:32889669 T ENSG00000189167 ENST00000345108 Transcript upstream_gene_variant - - - - - rs55880202 DISTANCE=3578;STRAND=-1;SYMBOL=ZAR1L;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000344616;CCDS=CCDS45023.1;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 +13_32889669_C/T 13:32889669 T ENSG00000139618 ENST00000380152 Transcript 5_prime_UTR_variant 59 - - - - rs55880202 EXON=1/27;STRAND=1;SYMBOL=BRCA2;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000369497;CCDS=CCDS9344.1;HGVSc=ENST00000380152.3:c.-175C>T;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 +13_32889669_C/T 13:32889669 T ENSG00000189167 ENST00000533490 Transcript upstream_gene_variant - - - - - rs55880202 DISTANCE=188;STRAND=-1;CANONICAL=YES;SYMBOL=ZAR1L;SYMBOL_SOURCE=HGNC;GMAF=T:0.0087;BIOTYPE=protein_coding;ENSP=ENSP00000437289;CCDS=CCDS45023.1;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 +13_32889669_C/T 13:32889669 T - ENSR00000054736 RegulatoryFeature regulatory_region_variant - - - - - rs55880202 GMAF=T:0.0087;AFR_MAF=0.04;AMR_MAF=0.0028;ASN_MAF=0;EUR_MAF=0 diff --git a/pom.xml b/pom.xml index 05ece0d1cc..adc1d44b69 100644 --- a/pom.xml +++ b/pom.xml @@ -62,6 +62,11 @@ cellbase-mongodb ${cellbase.version} + + org.opencb.biodata + models + 0.1 + org.opencb.commons bioformats