diff --git a/pom.xml b/pom.xml index a0761b08..5ac99d43 100644 --- a/pom.xml +++ b/pom.xml @@ -37,11 +37,13 @@ 2.0.2 0.2.5 3.0.5 + 33.0.0-jre 1.7 - 2.8.0 - 3.8.0 - 1.24.0 - 1.4.200 + 2.15.1 + 3.9.0 + 1.26.0 + 2.2.220 + 10.8.1 @@ -127,7 +129,7 @@ com.google.guava guava - 31.1-jre + ${guava.version} com.google.errorprone diff --git a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/ClingenDosageElementDao.java b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/ClingenDosageElementDao.java index 3c465d1e..603468b4 100644 --- a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/ClingenDosageElementDao.java +++ b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/ClingenDosageElementDao.java @@ -51,7 +51,7 @@ public int insertItem(DosageRegion item) { try (Connection connection = dataSource.getConnection()) { connection.setAutoCommit(false); String sql = "insert into SVANNA.CLINGEN_DOSAGE_ELEMENT(" + - " CONTIG, START, END, " + + " CONTIG, START_POS, END_POS, " + " ID, DOSAGE_SENSITIVITY, DOSAGE_EVIDENCE) " + " VALUES ( ?, ?, ?, ?, ?, ? )"; try (PreparedStatement preparedStatement = connection.prepareStatement(sql)) { @@ -83,11 +83,11 @@ public int insertItem(DosageRegion item) { @Override public List getOverlapping(GenomicRegion query) { - String sql = "select CONTIG, START, END, ID, DOSAGE_SENSITIVITY, DOSAGE_EVIDENCE " + + String sql = "select CONTIG, START_POS, END_POS, ID, DOSAGE_SENSITIVITY, DOSAGE_EVIDENCE " + " from SVANNA.CLINGEN_DOSAGE_ELEMENT " + " where CONTIG = ? " + - " and ? < END " + - " and START < ?"; + " and ? < END_POS " + + " and START_POS < ?"; try (Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sql)) { preparedStatement.setInt(1, query.contigId()); @@ -119,8 +119,8 @@ public List geneDosageDataForHgncIdAndRegion(String hgncId, GenomicRegio String sql = "select distinct ID, DOSAGE_SENSITIVITY, DOSAGE_EVIDENCE " + " from SVANNA.CLINGEN_DOSAGE_ELEMENT " + " where (CONTIG = ? " + - " and ? < END " + - " and START < ?) " + + " and ? < END_POS " + + " and START_POS < ?) " + " or ID = ?"; try (Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sql)) { @@ -146,7 +146,7 @@ private List processDosageRegionStatement(PreparedStatement prepar continue; } Coordinates coordinates = Coordinates.of(CoordinateSystem.zeroBased(), // database invariant - rs.getInt("START"), rs.getInt("END")); + rs.getInt("START_POS"), rs.getInt("END_POS")); GenomicRegion location = GenomicRegion.of(contig, Strand.POSITIVE, coordinates); Dosage dosage = Dosage.of(rs.getString("ID"), DosageSensitivity.valueOf(rs.getString("DOSAGE_SENSITIVITY")), diff --git a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/DbPopulationVariantDao.java b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/DbPopulationVariantDao.java index ad3efd7c..fcaf3ec7 100644 --- a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/DbPopulationVariantDao.java +++ b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/DbPopulationVariantDao.java @@ -60,7 +60,7 @@ private List processStatement(PreparedStatement preparedState } regions.add( BasePopulationVariant.of( - GenomicRegion.of(contig, Strand.POSITIVE, CoordinateSystem.zeroBased(), rs.getInt("START"), rs.getInt("END")), + GenomicRegion.of(contig, Strand.POSITIVE, CoordinateSystem.zeroBased(), rs.getInt("START_POS"), rs.getInt("END_POS")), rs.getString("ID"), VariantType.valueOf(rs.getString("VARIANT_TYPE")), rs.getFloat("ALLELE_FREQUENCY"), origin)); } @@ -74,7 +74,7 @@ public int insertItem(PopulationVariant item) { try (Connection connection = dataSource.getConnection()) { connection.setAutoCommit(false); - String sql = "insert into SVANNA.POPULATION_VARIANTS(CONTIG, START, END, " + + String sql = "insert into SVANNA.POPULATION_VARIANTS(CONTIG, START_POS, END_POS, " + "ID, VARIANT_TYPE, ORIGIN, ALLELE_FREQUENCY) " + "VALUES ( ?, ?, ?, ?, ?, ?, ? )"; try (PreparedStatement preparedStatement = connection.prepareStatement(sql)) { @@ -111,11 +111,11 @@ public Set availableOrigins() { @Override public List getOverlapping(GenomicRegion query, Set origins) { - String sql = "select CONTIG, START, END, ID, VARIANT_TYPE, ORIGIN, ALLELE_FREQUENCY " + + String sql = "select CONTIG, START_POS, END_POS, ID, VARIANT_TYPE, ORIGIN, ALLELE_FREQUENCY " + " from SVANNA.POPULATION_VARIANTS " + " where CONTIG = ? " + - " and ? < END " + - " and START < ?"; + " and ? < END_POS " + + " and START_POS < ?"; try (Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sql)) { preparedStatement.setInt(1, query.contigId()); diff --git a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/EnhancerAnnotationDao.java b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/EnhancerAnnotationDao.java index 2cd86ed7..e6dc76bf 100644 --- a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/EnhancerAnnotationDao.java +++ b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/EnhancerAnnotationDao.java @@ -61,7 +61,7 @@ public int insertItem(Enhancer enhancer) { try (Connection connection = dataSource.getConnection()) { connection.setAutoCommit(false); - String enhancerSql = "insert into SVANNA.ENHANCERS(CONTIG, START, END, " + + String enhancerSql = "insert into SVANNA.ENHANCERS(CONTIG, START_POS, END_POS, " + " ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU) " + " VALUES ( ?, ?, ?, ?, ?, ?, ? )"; String tissueSpecSql = "insert into SVANNA.ENHANCER_TISSUE_SPECIFICITY(ENHANCER_ID, " + @@ -111,7 +111,7 @@ public int insertItem(Enhancer enhancer) { } public List getAllItems() { - String sql = "select E.ENHANCER_ID, CONTIG, START, END, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + + String sql = "select E.ENHANCER_ID, CONTIG, START_POS, END_POS, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + " TERM_ID, TERM_LABEL, HPO_ID, HPO_LABEL, SPECIFICITY " + " from SVANNA.ENHANCERS E join SVANNA.ENHANCER_TISSUE_SPECIFICITY ETS on E.ENHANCER_ID = ETS.ENHANCER_ID"; try (Connection connection = dataSource.getConnection(); @@ -157,12 +157,12 @@ public List getOverlapping(GenomicRegion query) { try (Connection connection = dataSource.getConnection()) { if (!enhancerParameters.useFantom5()) { // just VISTA - String enhancerSql = "select E.ENHANCER_ID, CONTIG, START, END, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + + String enhancerSql = "select E.ENHANCER_ID, CONTIG, START_POS, END_POS, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + " TERM_ID, TERM_LABEL, HPO_ID, HPO_LABEL, SPECIFICITY " + " from SVANNA.ENHANCERS E join SVANNA.ENHANCER_TISSUE_SPECIFICITY ETS on E.ENHANCER_ID = ETS.ENHANCER_ID " + " where E.CONTIG = ? " + - " and ? < E.END " + - " and E.START < ? " + + " and ? < E.END_POS " + + " and E.START_POS < ? " + " and E.IS_DEVELOPMENTAL = true"; try (PreparedStatement ps = connection.prepareStatement(enhancerSql)) { ps.setInt(1, query.contigId()); @@ -172,12 +172,12 @@ public List getOverlapping(GenomicRegion query) { } } else { // FANTOM5 and maybe VISTA - String enhancerSql = "select E.ENHANCER_ID, CONTIG, START, END, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + + String enhancerSql = "select E.ENHANCER_ID, CONTIG, START_POS, END_POS, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU, " + " TERM_ID, TERM_LABEL, HPO_ID, HPO_LABEL, SPECIFICITY " + " from SVANNA.ENHANCERS E join SVANNA.ENHANCER_TISSUE_SPECIFICITY ETS on E.ENHANCER_ID = ETS.ENHANCER_ID" + " where E.CONTIG = ? " + - " and ? < E.END " + - " and E.START < ? " + + " and ? < E.END_POS " + + " and E.START_POS < ? " + " and (E.IS_DEVELOPMENTAL = ? or (E.IS_DEVELOPMENTAL = false and ETS.SPECIFICITY > ?))"; try (PreparedStatement ps = connection.prepareStatement(enhancerSql)) { ps.setInt(1, query.contigId()); @@ -210,7 +210,7 @@ private List processEnhancers(PreparedStatement statement) throws SQLE if (!builders.containsKey(enhancerId)) { // database invariant - Coordinates coordinates = Coordinates.of(CoordinateSystem.zeroBased(), rs.getInt("START"), rs.getInt("END")); + Coordinates coordinates = Coordinates.of(CoordinateSystem.zeroBased(), rs.getInt("START_POS"), rs.getInt("END_POS")); GenomicRegion location = GenomicRegion.of(contig, Strand.POSITIVE, coordinates); BaseEnhancer.Builder builder = BaseEnhancer.builder() .location(location) diff --git a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/RepetitiveRegionDao.java b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/RepetitiveRegionDao.java index 6deca1b0..4b566747 100644 --- a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/RepetitiveRegionDao.java +++ b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/RepetitiveRegionDao.java @@ -30,7 +30,7 @@ public RepetitiveRegionDao(DataSource dataSource, GenomicAssembly genomicAssembl } public List getAllItems() { - String sql = "select CONTIG, START, END, REPEAT_FAMILY from SVANNA.REPETITIVE_REGIONS"; + String sql = "select CONTIG, START_POS, END_POS, REPEAT_FAMILY from SVANNA.REPETITIVE_REGIONS"; try (Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sql)) { return processStatement(preparedStatement); @@ -42,11 +42,11 @@ public List getAllItems() { @Override public List getOverlapping(GenomicRegion query) { - String sql = "select CONTIG, START, END, REPEAT_FAMILY " + + String sql = "select CONTIG, START_POS, END_POS, REPEAT_FAMILY " + " from SVANNA.REPETITIVE_REGIONS " + " where CONTIG = ? " + - " and ? < END " + - " and START < ?"; + " and ? < END_POS " + + " and START_POS < ?"; try (Connection connection = dataSource.getConnection(); PreparedStatement preparedStatement = connection.prepareStatement(sql)) { preparedStatement.setInt(1, query.contigId()); @@ -70,7 +70,7 @@ private List processStatement(PreparedStatement preparedStatem } regions.add(RepetitiveRegion.of(contig, Strand.POSITIVE, CoordinateSystem.zeroBased(), // database invariant - rs.getInt("START"), rs.getInt("END"), + rs.getInt("START_POS"), rs.getInt("END_POS"), RepeatFamily.valueOf(rs.getString("REPEAT_FAMILY")))); } } @@ -83,7 +83,7 @@ public int insertItem(RepetitiveRegion item) { try (Connection connection = dataSource.getConnection()) { connection.setAutoCommit(false); - String sql = "insert into SVANNA.REPETITIVE_REGIONS(CONTIG, START, END, REPEAT_FAMILY) " + + String sql = "insert into SVANNA.REPETITIVE_REGIONS(CONTIG, START_POS, END_POS, REPEAT_FAMILY) " + "VALUES ( ?, ?, ?, ? )"; try (PreparedStatement preparedStatement = connection.prepareStatement(sql)) { preparedStatement.setInt(1, item.contigId()); diff --git a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/TadBoundaryDao.java b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/TadBoundaryDao.java index 8a3d11c5..cc4bce0b 100644 --- a/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/TadBoundaryDao.java +++ b/svanna-db/src/main/java/org/monarchinitiative/svanna/db/landscape/TadBoundaryDao.java @@ -53,7 +53,7 @@ public int insertItem(TadBoundary item) { try (Connection connection = dataSource.getConnection()) { connection.setAutoCommit(false); - String sql = "insert into SVANNA.TAD_BOUNDARY(CONTIG, START, END, MIDPOINT, ID, STABILITY) " + + String sql = "insert into SVANNA.TAD_BOUNDARY(CONTIG, START_POS, END_POS, MIDPOINT, ID, STABILITY) " + "VALUES ( ?, ?, ?, ?, ?, ?)"; try (PreparedStatement preparedStatement = connection.prepareStatement(sql)) { preparedStatement.setInt(1, item.contigId()); diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_create_table.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_create_table.sql index b07b405a..0b82ccb1 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_create_table.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_create_table.sql @@ -5,8 +5,8 @@ create table SVANNA.ENHANCERS ( ENHANCER_ID INT auto_increment, CONTIG INT not null, - START INT not null, - END INT not null, + START_POS INT not null, + END_POS INT not null, ENHANCER_SOURCE VARCHAR(50) not null, NAME VARCHAR(255) not null, IS_DEVELOPMENTAL BOOL not null, @@ -14,7 +14,7 @@ create table SVANNA.ENHANCERS ); create index SVANNA.ENHANCERS__CONTIG_START_END_IDX - on SVANNA.ENHANCERS (CONTIG, START, END); + on SVANNA.ENHANCERS (CONTIG, START_POS, END_POS); drop table if exists SVANNA.ENHANCER_TISSUE_SPECIFICITY; diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_insert_data.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_insert_data.sql index d60e0803..90b0cccb 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_insert_data.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/enhancer_insert_data.sql @@ -1,6 +1,6 @@ truncate table SVANNA.ENHANCERS; -insert into SVANNA.ENHANCERS(ENHANCER_ID, CONTIG, START, END, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU) +insert into SVANNA.ENHANCERS(ENHANCER_ID, CONTIG, START_POS, END_POS, ENHANCER_SOURCE, NAME, IS_DEVELOPMENTAL, TAU) values (1, 1, 10, 20, 'UNKNOWN', 'first', TRUE, .123), (2, 1, 30, 40, 'UNKNOWN', 'second', TRUE, .456); diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_create_table.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_create_table.sql index 86c3940f..d4cc2237 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_create_table.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_create_table.sql @@ -4,8 +4,8 @@ drop table if exists SVANNA.POPULATION_VARIANTS; create table SVANNA.POPULATION_VARIANTS ( CONTIG INT not null, - START INT not null, - END INT not null, + START_POS INT not null, + END_POS INT not null, ID VARCHAR(200) not null, VARIANT_TYPE VARCHAR(20) not null, @@ -13,4 +13,4 @@ create table SVANNA.POPULATION_VARIANTS ALLELE_FREQUENCY FLOAT not null ); create index SVANNA.POPULATION_VARIANTS__CONTIG_START_ON_POS_END_ON_POS_IDX - on SVANNA.POPULATION_VARIANTS (CONTIG, START, END); + on SVANNA.POPULATION_VARIANTS (CONTIG, START_POS, END_POS); diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_insert_data.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_insert_data.sql index 941d0b47..9a46849c 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_insert_data.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/population_variants_insert_data.sql @@ -1,4 +1,4 @@ -insert into SVANNA.POPULATION_VARIANTS(CONTIG, START, END, ID, VARIANT_TYPE, ORIGIN, ALLELE_FREQUENCY) +insert into SVANNA.POPULATION_VARIANTS(CONTIG, START_POS, END_POS, ID, VARIANT_TYPE, ORIGIN, ALLELE_FREQUENCY) VALUES ( 1, 10, 10, 'abc', 'INS_ME_LINE1', 'DGV', 22.1), ( 1, 30, 30, 'def', 'INS', 'GNOMAD_SV', 23.4), diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_create_table.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_create_table.sql index 8690103d..1bd74521 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_create_table.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_create_table.sql @@ -5,9 +5,9 @@ create table SVANNA.REPETITIVE_REGIONS ( ENHANCER_ID INT auto_increment, CONTIG INT not null, - START INT not null, - END INT not null, + START_POS INT not null, + END_POS INT not null, REPEAT_FAMILY VARCHAR(50) not null ); create index SVANNA.REPETITIVE_REGIONS__CONTIG_START_ON_POS_END_ON_POS_IDX - on SVANNA.REPETITIVE_REGIONS (CONTIG, START, END); + on SVANNA.REPETITIVE_REGIONS (CONTIG, START_POS, END_POS); diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_insert_data.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_insert_data.sql index 8fd740dd..87c5716d 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_insert_data.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/repetitive_regions_insert_data.sql @@ -1,4 +1,4 @@ -insert into SVANNA.REPETITIVE_REGIONS(CONTIG, START, END, REPEAT_FAMILY) +insert into SVANNA.REPETITIVE_REGIONS(CONTIG, START_POS, END_POS, REPEAT_FAMILY) VALUES (1, 20, 30, 'DNA_hAT_Blackjack'), (1, 30, 40, 'SINE_tRNA_Deu'), (2, 30, 40, 'RNA_srpRNA'); \ No newline at end of file diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_create_table.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_create_table.sql index 5689d2f7..7376bb4c 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_create_table.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_create_table.sql @@ -3,13 +3,13 @@ drop table if exists SVANNA.TAD_BOUNDARY; create table SVANNA.TAD_BOUNDARY ( CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand MIDPOINT INT not null, ID VARCHAR(200) not null, STABILITY FLOAT not null ); create index SVANNA.TAD_BOUNDARY__CONTIG_START_END_IDX - on SVANNA.TAD_BOUNDARY (CONTIG, START, END); + on SVANNA.TAD_BOUNDARY (CONTIG, START_POS, END_POS); create index SVANNA.TAD_BOUNDARY__CONTIG_MIDPOINT_IDX on SVANNA.TAD_BOUNDARY (CONTIG, MIDPOINT); diff --git a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_insert_data.sql b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_insert_data.sql index 7492faba..cd632dce 100644 --- a/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_insert_data.sql +++ b/svanna-db/src/test/resources/org/monarchinitiative/svanna/db/landscape/tad_boundary_insert_data.sql @@ -1,4 +1,4 @@ -insert into SVANNA.TAD_BOUNDARY(CONTIG, START, END, MIDPOINT, ID, STABILITY) +insert into SVANNA.TAD_BOUNDARY(CONTIG, START_POS, END_POS, MIDPOINT, ID, STABILITY) values ( 1, 20, 40, 30, 'one', .8), ( 1, 30, 50, 40, 'two', .9), diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java index eeb86f7f..fa09576c 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/cmd/BuildDb.java @@ -449,7 +449,9 @@ private static void ingestTads(TadProperties properties, GenomicAssembly assembl URL mcArthurSupplement = new URL(properties.mcArthur2021Supplement()); Path localPath = downloadUrl(mcArthurSupplement, tmpDir); - try (ZipFile zipFile = new ZipFile(localPath.toFile())) { + try (ZipFile zipFile = ZipFile.builder() + .setFile(localPath.toFile()) + .get()) { // this is the single file from the entire ZIP that we're interested in String entryName = "emcarthur-TAD-stability-heritability-184f51a/data/boundariesByStability/100kbBookendBoundaries_mainText/100kbBookendBoundaries_byStability.bed"; ZipArchiveEntry entry = zipFile.getEntry(entryName); @@ -683,7 +685,7 @@ public Integer call() throws Exception { DigestUtils digest = new DigestUtils(MessageDigestAlgorithms.SHA_256); for (File resource : resources) { - if (LOGGER.isDebugEnabled()) LOGGER.debug("Calculating SHA256 digest for `{}`", resource); + LOGGER.debug("Calculating SHA256 digest for `{}`", resource); String hexDigest = digest.digestAsHex(resource); fileToDigest.put(resource, hexDigest); } diff --git a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/io/ZipCompressionWrapper.java b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/io/ZipCompressionWrapper.java index 1baf07c0..d2488e8f 100644 --- a/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/io/ZipCompressionWrapper.java +++ b/svanna-ingest/src/main/java/org/monarchinitiative/svanna/ingest/io/ZipCompressionWrapper.java @@ -1,8 +1,8 @@ package org.monarchinitiative.svanna.ingest.io; -import org.apache.commons.compress.archivers.ArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.apache.commons.compress.utils.IOUtils; +import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,7 +36,7 @@ public void addResource(File file, String name) throws IOException { return; } - ArchiveEntry entry = archive.createArchiveEntry(file, name); + ZipArchiveEntry entry = archive.createArchiveEntry(file, name); archive.putArchiveEntry(entry); if (attributes.isRegularFile()) { try (InputStream is = Files.newInputStream(file.toPath())) { diff --git a/svanna-ingest/src/main/resources/db/migration/V1.0.0__create_schema.sql b/svanna-ingest/src/main/resources/db/migration/V1.0.0__create_schema.sql index 48f3b994..2584f16e 100644 --- a/svanna-ingest/src/main/resources/db/migration/V1.0.0__create_schema.sql +++ b/svanna-ingest/src/main/resources/db/migration/V1.0.0__create_schema.sql @@ -3,10 +3,10 @@ create schema if not exists SVANNA; drop table if exists SVANNA.ENHANCERS; create table SVANNA.ENHANCERS ( - ENHANCER_ID INT auto_increment, + ENHANCER_ID INT GENERATED ALWAYS AS IDENTITY, CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand ENHANCER_SOURCE VARCHAR(50) not null, NAME VARCHAR(255) not null, IS_DEVELOPMENTAL BOOL not null, @@ -14,7 +14,7 @@ create table SVANNA.ENHANCERS ); create index SVANNA.ENHANCERS__CONTIG_START_END_IDX - on SVANNA.ENHANCERS (CONTIG, START, END); + on SVANNA.ENHANCERS (CONTIG, START_POS, END_POS); drop table if exists SVANNA.ENHANCER_TISSUE_SPECIFICITY; @@ -36,21 +36,21 @@ drop table if exists SVANNA.REPETITIVE_REGIONS; create table SVANNA.REPETITIVE_REGIONS ( CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand REPEAT_FAMILY VARCHAR(50) not null ); create index SVANNA.REPETITIVE_REGIONS__CONTIG_START_END_IDX - on SVANNA.REPETITIVE_REGIONS (CONTIG, START, END); + on SVANNA.REPETITIVE_REGIONS (CONTIG, START_POS, END_POS); ---------------------------------- POPULATION VARIANTS ----------------------------------------------------------------- drop table if exists SVANNA.POPULATION_VARIANTS; create table SVANNA.POPULATION_VARIANTS ( CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand ID VARCHAR(200) not null, VARIANT_TYPE VARCHAR(20) not null, @@ -59,22 +59,22 @@ create table SVANNA.POPULATION_VARIANTS ); create index SVANNA.POPULATION_VARIANTS__CONTIG_START_END_IDX - on SVANNA.POPULATION_VARIANTS (CONTIG, START, END); + on SVANNA.POPULATION_VARIANTS (CONTIG, START_POS, END_POS); ---------------------------------- TAD BOUNDARY ------------------------------------------------------------------------ drop table if exists SVANNA.TAD_BOUNDARY; create table SVANNA.TAD_BOUNDARY ( CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand MIDPOINT INT not null, ID VARCHAR(200) not null, STABILITY FLOAT not null ); create index SVANNA.TAD_BOUNDARY__CONTIG_START_END_IDX - on SVANNA.TAD_BOUNDARY (CONTIG, START, END); + on SVANNA.TAD_BOUNDARY (CONTIG, START_POS, END_POS); create index SVANNA.TAD_BOUNDARY__CONTIG_MIDPOINT_IDX on SVANNA.TAD_BOUNDARY (CONTIG, MIDPOINT); @@ -88,22 +88,24 @@ create table SVANNA.HP_TERM_MICA IC_MICA FLOAT not null -- information content of the most common informative ancestor ); drop index if exists SVANNA.HP_TERM_MICA__LEFT_VALUE_RIGHT_VALUE_IDX; -create unique index SVANNA.HP_TERM_MICA__LEFT_VALUE_RIGHT_VALUE_IDX on SVANNA.HP_TERM_MICA (LEFT_VALUE, RIGHT_VALUE); +create unique index SVANNA.HP_TERM_MICA__LEFT_VALUE_RIGHT_VALUE_IDX + on SVANNA.HP_TERM_MICA (LEFT_VALUE, RIGHT_VALUE); ---------------------------------- CLINGEN DOSAGE ELEMENT -------------------------------------------------------------- drop table if exists SVANNA.CLINGEN_DOSAGE_ELEMENT; create table SVANNA.CLINGEN_DOSAGE_ELEMENT ( CONTIG INT not null, - START INT not null, -- zero-based start on POSITIVE strand - END INT not null, -- zero-based end on POSITIVE strand + START_POS INT not null, -- zero-based start on POSITIVE strand + END_POS INT not null, -- zero-based end on POSITIVE strand ID VARCHAR(200) not null, -- HGNC ID or other ID if available DOSAGE_SENSITIVITY VARCHAR(20) not null, DOSAGE_EVIDENCE VARCHAR(20) not null ); drop index if exists SVANNA.CLINGEN_DOSAGE_ELEMENT__CONTIG_START_END_IDX; -create index SVANNA.CLINGEN_DOSAGE_ELEMENT__CONTIG_START_END_IDX on SVANNA.CLINGEN_DOSAGE_ELEMENT (CONTIG, START, END); +create index SVANNA.CLINGEN_DOSAGE_ELEMENT__CONTIG_START_END_IDX + on SVANNA.CLINGEN_DOSAGE_ELEMENT (CONTIG, START_POS, END_POS); -- TODO - we should update DA layer to use numeric IDs, where available drop index if exists SVANNA.CLINGEN_DOSAGE_ELEMENT__ID;