Skip to content

Commit

Permalink
Added FMI LUT on GPU side + Query requests packed & aligned to 8 base…
Browse files Browse the repository at this point in the history
…s + I/O bugfixes + Added GCC native flags
  • Loading branch information
achacond committed Jul 7, 2016
1 parent c0c6fc2 commit e0748db
Show file tree
Hide file tree
Showing 74 changed files with 1,025 additions and 159 deletions.
2 changes: 1 addition & 1 deletion Makefile.mk.in
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ ifeq ($(CC),gcc)
AR=gcc-ar
OPT_AR=-fuse-linker-plugin
endif
FLAGS_OPT=-Ofast -msse4.2 $(OPT_LTO)
FLAGS_OPT=-Ofast -march=native $(OPT_LTO)
FLAGS_LINK=$(OPT_AR)
endif

Expand Down
Empty file modified resources/gpu_modules/AUTHORS
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/LICENSE
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion resources/gpu_modules/Makefile
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ CUDA_SRCS=$(addprefix $(FOLDER_SOURCE)/, $(addsuffix .cu, $(CUDA_MODULES)))
CUDA_OBJS=$(addprefix $(FOLDER_BUILD)/, $(addsuffix .o, $(CUDA_MODULES)))

BASICS=gpu_commons gpu_buffer gpu_errors gpu_io gpu_sample gpu_module gpu_devices gpu_index gpu_reference
FMI_MODULES=gpu_fmi_index gpu_fmi_primitives gpu_fmi_primitives_decode gpu_fmi_primitives_ssearch gpu_fmi_primitives_asearch
FMI_MODULES=gpu_fmi_index gpu_fmi_table gpu_fmi_primitives gpu_fmi_primitives_decode gpu_fmi_primitives_ssearch gpu_fmi_primitives_asearch
SA_MODULES=gpu_sa_index gpu_sa_primitives
BPM_MODULES=gpu_bpm_primitives
MODULES= $(FMI_MODULES) $(SA_MODULES) $(BPM_MODULES) $(BASICS)
Expand Down
Empty file modified resources/gpu_modules/gpu_filter_interface.h
100644 → 100755
Empty file.
17 changes: 12 additions & 5 deletions resources/gpu_modules/gpu_index_interface.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -100,18 +100,25 @@ typedef struct{
} gpu_fmi_decode_end_pos_t;

typedef struct {
uint64_t* c; // Occurrences of each character
uint64_t* C; // The cumulative occurrences ("ranks") of the symbols of the string
uint64_t* mayor_counters; // Pointer to the Mayor Counters (Rank)
uint64_t* bwt_mem; // Pointer to the BWT structure in memory
uint64_t bwt_length; // Length of the BWT
uint64_t* c; // Occurrences of each character
uint64_t* C; // The cumulative occurrences ("ranks") of the symbols of the string
uint64_t* mayor_counters; // Pointer to the Mayor Counters (Rank)
uint64_t* bwt_mem; // Pointer to the BWT structure in memory
uint64_t bwt_length; // Length of the BWT
uint64_t num_levels_fmi_table;
uint64_t skip_levels_fmi_table;
gpu_index_coding_t index_coding;
} gpu_gem_fmi_dto_t;

typedef struct {
char *h_plain;
gpu_fmi_entry_t *h_fmi;
uint64_t bwtSize;
uint32_t *h_offsetsTable;
gpu_sa_entry_t *h_table;
uint32_t numLevelsTable;
uint32_t skipLevelsTable;
uint32_t numElementsTable;
gpu_index_coding_t indexCoding;
} gpu_fmi_dto_t;

Expand Down
8 changes: 5 additions & 3 deletions resources/gpu_modules/gpu_interface.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,16 @@ typedef enum
/* GPU stages */
GPU_SEEDING = GPU_INDEX,
GPU_FILTERING = GPU_REFERENCE,

/* General setups */
GPU_NONE_MODULES = 0,
GPU_ALL_MODULES = GPU_SEEDING | GPU_FILTERING
} gpu_module_t;

typedef enum
{
/* Non-supported GPU architectures */
GPU_ARCH_TESLA = GPU_UINT32_ONE_MASK << 0,
/* Supported GPU architectures */
GPU_ARCH_FERMI_1G = GPU_UINT32_ONE_MASK << 1,
GPU_ARCH_FERMI_2G = GPU_UINT32_ONE_MASK << 2,
GPU_ARCH_KEPLER_1G = GPU_UINT32_ONE_MASK << 3,
Expand All @@ -64,12 +66,12 @@ typedef enum
GPU_ARCH_MAXWELL_2G = GPU_UINT32_ONE_MASK << 6,
GPU_ARCH_PASCAL_1G = GPU_UINT32_ONE_MASK << 7,
GPU_ARCH_PASCAL_2G = GPU_UINT32_ONE_MASK << 8,

/* Main GPU Architectures */
GPU_ARCH_FERMI = GPU_ARCH_FERMI_1G | GPU_ARCH_FERMI_2G,
GPU_ARCH_KEPLER = GPU_ARCH_KEPLER_1G | GPU_ARCH_KEPLER_2G,
GPU_ARCH_MAXWELL = GPU_ARCH_MAXWELL_1G | GPU_ARCH_MAXWELL_2G,
GPU_ARCH_PASCAL = GPU_ARCH_PASCAL_1G | GPU_ARCH_PASCAL_2G,

/* General setups */
GPU_ARCH_NEWGEN = GPU_UINT32_ONE_MASK << 31,
GPU_ARCH_SUPPORTED = GPU_ARCH_FERMI | GPU_ARCH_KEPLER | GPU_ARCH_MAXWELL | GPU_ARCH_NEWGEN
} gpu_dev_arch_t;
Expand Down
Empty file modified resources/gpu_modules/include/gpu_bpm_core.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_bpm_primitives.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_buffer.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_buffer_modules.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_commons.h
100644 → 100755
Empty file.
18 changes: 12 additions & 6 deletions resources/gpu_modules/include/gpu_devices.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,19 @@ typedef enum
typedef enum
{
/* Types of page-lock allocations */
GPU_PAGE_LOCKED_PORTABLE,
GPU_PAGE_LOCKED_MAPPED,
GPU_PAGE_LOCKED_WRITECOMBINED,
GPU_PAGE_LOCKED_PORTABLE = GPU_UINT32_ONE_MASK << 0,
GPU_PAGE_LOCKED_MAPPED = GPU_UINT32_ONE_MASK << 1,
GPU_PAGE_LOCKED_WRITECOMBINED = GPU_UINT32_ONE_MASK << 2,
/* Types of host allocations */
GPU_PAGE_LOCKED = GPU_PAGE_LOCKED_PORTABLE | GPU_PAGE_LOCKED_MAPPED | GPU_PAGE_LOCKED_WRITECOMBINED,
GPU_PAGE_UNLOCKED,
GPU_NONE_ALLOCATED
GPU_PAGE_UNLOCKED = GPU_UINT32_ONE_MASK << 3,
GPU_PAGE_LOCKED = GPU_PAGE_LOCKED_PORTABLE | GPU_PAGE_LOCKED_MAPPED | GPU_PAGE_LOCKED_WRITECOMBINED,
/* Types for non-allocated pages */
GPU_PAGE_UNALLOCATED = 0,
/* Types for assigned pages (contains data) */
GPU_PAGE_ASSIGNED = GPU_UINT32_ONE_MASK << 31,
/* */
GPU_PAGE_ASSIGNED_AND_UNLOCKED = GPU_PAGE_ASSIGNED | GPU_PAGE_UNLOCKED,
GPU_PAGE_ASSIGNED_AND_LOCKED = GPU_PAGE_ASSIGNED | GPU_PAGE_LOCKED
} memory_stats_t;

typedef struct {
Expand Down
1 change: 1 addition & 0 deletions resources/gpu_modules/include/gpu_errors.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ typedef enum
E_DATA_NOT_ALLOCATED,
E_NOT_SUPPORTED_ALLOC_POLICY,
E_OVERFLOWING_BUFFER,
E_FMI_TABLE_INCOMPATIBLE_SIZE,
E_NOT_IMPLEMENTED
} gpu_error_t;

Expand Down
Empty file modified resources/gpu_modules/include/gpu_fmi_core.h
100644 → 100755
Empty file.
3 changes: 3 additions & 0 deletions resources/gpu_modules/include/gpu_fmi_index.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
#include "gpu_commons.h"
#include "gpu_devices.h"
#include "gpu_fmi_structure.h"
#include "gpu_fmi_table.h"

/*****************************
Internal Objects (General)
*****************************/

typedef struct {
gpu_fmi_table_t table;
uint64_t bwtSize;
uint64_t numEntries;
gpu_fmi_entry_t *h_fmi;
Expand All @@ -42,6 +44,7 @@ gpu_error_t gpu_fmi_index_transfer_CPU_to_GPUs(gpu_fmi_buffer_t* const fmi, gpu_
/* Stream index functions */
gpu_error_t gpu_fmi_index_read_specs(FILE* fp, gpu_fmi_buffer_t* const fmi);
gpu_error_t gpu_fmi_index_read(FILE* fp, gpu_fmi_buffer_t* const fmi);
gpu_error_t gpu_fmi_index_write_specs(FILE* fp, const gpu_fmi_buffer_t* const fmi);
gpu_error_t gpu_fmi_index_write(FILE* fp, const gpu_fmi_buffer_t* const fmi);

/* Data transform functions */
Expand Down
Empty file modified resources/gpu_modules/include/gpu_fmi_primitives.h
100644 → 100755
Empty file.
4 changes: 3 additions & 1 deletion resources/gpu_modules/include/gpu_fmi_primitives_asearch.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ Common constants for Device & Host
*********************************/

/* Defines related to FMI adaptative BACKWARD-SEARCH primitives */
#define GPU_FMI_ENTRIES_PER_QUERY 2
#define GPU_FMI_ENTRIES_PER_QUERY 2
#define GPU_FMI_BASE_QUERY_LENGTH 8 //Bits per base query
#define GPU_FMI_BASES_PER_QUERY_ENTRY (GPU_UINT64_LENGTH / GPU_FMI_BASE_QUERY_LENGTH) //Bases per internal entry

#define GPU_FMI_THREADS_PER_QUERY (GPU_FMI_THREADS_PER_ENTRY * GPU_FMI_ENTRIES_PER_SEED)
#define GPU_FMI_D 2
Expand Down
Empty file modified resources/gpu_modules/include/gpu_fmi_primitives_decode.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_fmi_primitives_ssearch.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_fmi_structure.h
100644 → 100755
Empty file.
64 changes: 64 additions & 0 deletions resources/gpu_modules/include/gpu_fmi_table.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* GEM-Cutter "Highly optimized genomic resources for GPUs"
* Copyright (c) 2013-2016 by Alejandro Chacon <alejandro.chacond@gmail.com>
*
* Licensed under GNU General Public License 3.0 or later.
* Some rights reserved. See LICENSE, AUTHORS.
* @license GPL-3.0+ <http://www.gnu.org/licenses/gpl-3.0.en.html>
*/

#ifndef GPU_FMI_TABLE_H_
#define GPU_FMI_TABLE_H_

#include "gpu_commons.h"
#include "gpu_devices.h"

#define GPU_FMI_TABLE_ALPHABET_SIZE 4
#define GPU_FMI_TABLE_MIN_ELEMENTS 2
#define GPU_FMI_TABLE_MIN_LEVELS 1
#define GPU_FMI_TABLE_DEFAULT_LEVELS 11
#define GPU_FMI_TABLE_DEFAULT_SKIP_LEVELS 0

typedef struct{
uint32_t init;
uint32_t top;
} offset_table_t;

typedef struct{
uint32_t maxLevelsTableLUT;
uint32_t skipLevelsTableLUT;
uint32_t totalElemTableLUT;
offset_table_t* h_offsetsTableLUT;
offset_table_t** d_offsetsTableLUT;
gpu_sa_entry_t* h_fmiTableLUT;
gpu_sa_entry_t** d_fmiTableLUT;
memory_stats_t hostAllocStats;
memory_alloc_t* memorySpace;
} gpu_fmi_table_t;

/* High level functions */
gpu_error_t gpu_fmi_table_get_num_elements(const uint32_t numLevels, uint32_t* const totalElemTableLUT);
gpu_error_t gpu_fmi_table_init_dto(gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_init(gpu_fmi_table_t* const fmiTable, const uint32_t numLevels, const uint32_t numSupportedDevices);
gpu_error_t gpu_fmi_table_allocate(gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_read_specs(FILE* fp, gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_load_default_specs(gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_read(FILE* fp, gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_write_specs(FILE* fp, const gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_write(FILE* fp, const gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_transfer_CPU_to_GPUs(gpu_fmi_table_t* const fmiTable, gpu_device_info_t** const devices);
gpu_error_t gpu_fmi_table_build(gpu_fmi_table_t* const fmiTable, const gpu_fmi_entry_t* const h_fmi, const uint64_t bwtSize);
gpu_error_t gpu_fmi_table_get_size(const gpu_fmi_table_t* const fmiTable, size_t* const bytesPerFmiTable);
gpu_error_t gpu_fmi_table_free_host(gpu_fmi_table_t* const fmiTable);
gpu_error_t gpu_fmi_table_free_unused_host(gpu_fmi_table_t* const fmiTable, gpu_device_info_t** const devices);
gpu_error_t gpu_fmi_table_free_device(gpu_fmi_table_t* const fmiTable, gpu_device_info_t** const devices);
gpu_error_t gpu_fmi_table_free_metainfo(gpu_fmi_table_t* const fmiTable);

/* Low level functions */
gpu_error_t gpu_fmi_table_init_offsets(offset_table_t* const offsetsTableLUT, const uint32_t numLevels);
void gpu_fmi_table_process_entry(const gpu_fmi_entry_t* const h_fmi, gpu_sa_entry_t* const currentIntervals, const gpu_sa_entry_t L, const gpu_sa_entry_t R);
gpu_error_t gpu_fmi_table_process_forward_level(const gpu_fmi_entry_t* const h_fmi, const uint32_t idLevel, offset_table_t* const offsetsTableLUT, gpu_sa_entry_t* const fmiTableLUT);
gpu_error_t gpu_fmi_table_process_backward_level(const gpu_fmi_entry_t* const h_fmi, const uint32_t idLevel, offset_table_t* const offsetsTableLUT, gpu_sa_entry_t* const fmiTableLUT);


#endif /* GPU_FMI_TABLE_H_ */
Empty file modified resources/gpu_modules/include/gpu_index.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_index_modules.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_io.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_module.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_reference.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_resources.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_sa_core.h
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions resources/gpu_modules/include/gpu_sa_index.h
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ gpu_error_t gpu_sa_index_transfer_CPU_to_GPUs(gpu_sa_buffer_t* const sa, gpu_dev
/* Stream index functions */
gpu_error_t gpu_sa_index_read_specs(FILE* fp, gpu_sa_buffer_t* const sa);
gpu_error_t gpu_sa_index_read(FILE* fp, gpu_sa_buffer_t* const sa);
gpu_error_t gpu_sa_index_write_specs(FILE* fp, const gpu_sa_buffer_t* const sa);
gpu_error_t gpu_sa_index_write(FILE* fp, const gpu_sa_buffer_t* const sa);

/* Data load functions */
Expand Down
Empty file modified resources/gpu_modules/include/gpu_sa_primitives.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/include/gpu_sample.h
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/src/gpu_bpm_filter.cu
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/src/gpu_bpm_primitives.c
100644 → 100755
Empty file.
11 changes: 7 additions & 4 deletions resources/gpu_modules/src/gpu_buffer.c
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,15 @@ gpu_error_t gpu_buffer_get_min_memory_size(size_t *bytesPerBuffer)
{
const uint32_t averarageNumPEQEntries = 1;
const uint32_t candidatesPerQuery = 1;
const uint32_t averageQuerySize = 100;
const uint32_t averageRegionsPerQuery = 10;

const size_t bytesPerBPMBuffer = GPU_BPM_MIN_ELEMENTS * gpu_bpm_size_per_candidate(averarageNumPEQEntries,candidatesPerQuery);
const size_t bytesPerSearchBuffer = GPU_FMI_SEARCH_MIN_ELEMENTS * gpu_fmi_ssearch_input_size();
const size_t bytesPerDecodeBuffer = GPU_FMI_DECODE_MIN_ELEMENTS * gpu_fmi_decode_input_size();
const size_t bytesPerBPMBuffer = GPU_BPM_MIN_ELEMENTS * gpu_bpm_size_per_candidate(averarageNumPEQEntries,candidatesPerQuery);
const size_t bytesPerSSearchBuffer = GPU_FMI_SEARCH_MIN_ELEMENTS * gpu_fmi_asearch_size_per_query(averageQuerySize, averageRegionsPerQuery);
const size_t bytesPerASearchBuffer = GPU_FMI_SEARCH_MIN_ELEMENTS * gpu_fmi_ssearch_input_size();
const size_t bytesPerDecodeBuffer = GPU_FMI_DECODE_MIN_ELEMENTS * gpu_fmi_decode_input_size();

(* bytesPerBuffer) = GPU_MAX(bytesPerBPMBuffer,GPU_MAX(bytesPerSearchBuffer,bytesPerDecodeBuffer));
(* bytesPerBuffer) = GPU_MAX(bytesPerBPMBuffer,GPU_MAX(bytesPerDecodeBuffer,GPU_MAX(bytesPerSSearchBuffer,bytesPerASearchBuffer)));
return (SUCCESS);
}

Expand Down
Empty file modified resources/gpu_modules/src/gpu_commons.c
100644 → 100755
Empty file.
Empty file modified resources/gpu_modules/src/gpu_devices.c
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions resources/gpu_modules/src/gpu_errors.c
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const char* gpuGetErrorString(gpu_error_t error)
case E_NOT_IMPLEMENTED: return "GEM GPU - Error: functionality not implemented";
case E_DATA_NOT_ALLOCATED: return "GEM GPU - Error: structure not allocated to any memory";
case E_OVERFLOWING_BUFFER: return "GEM GPU - Error: overflowing elements per buffer";
case E_FMI_TABLE_INCOMPATIBLE_SIZE: return "GEM GPU - Error: fmi table num levels incompatible";
default: return "GEM GPU - Unknown error";
}
}
Expand Down
Loading

0 comments on commit e0748db

Please sign in to comment.