Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
226 lines (193 sloc) 9.26 KB
// The Computer Language Benchmarks Game
// http://benchmarksgame.alioth.debian.org/
//
// Contributed by Jeremy Zerfas
// MODIFIED BY D. Lemire (AVX2 vectorization)
// This controls the width of lines that are output by this program.
#define MAXIMUM_LINE_WIDTH 60
#include <x86intrin.h>
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
// intptr_t should be the native integer type on most sane systems.
typedef intptr_t intnative_t;
typedef struct{
char letter;
float probability;
} nucleotide_info;
// Repeatedly print string_To_Repeat until it has printed
// number_Of_Characters_To_Create. The output is also wrapped to
// MAXIMUM_LINE_WIDTH columns.
static void repeat_And_Wrap_String(const char string_To_Repeat[],
const intnative_t number_Of_Characters_To_Create){
const intnative_t string_To_Repeat_Length=strlen(string_To_Repeat);
// Create an extended_String_To_Repeat which is a copy of string_To_Repeat
// but extended with another copy of the first MAXIMUM_LINE_WIDTH characters
// of string_To_Repeat appended to the end. Later on this allows us to
// generate a line of output just by doing simple memory copies using an
// appropriate offset into extended_String_To_Repeat.
char extended_String_To_Repeat[string_To_Repeat_Length+MAXIMUM_LINE_WIDTH];
for(intnative_t column=0; column<string_To_Repeat_Length+MAXIMUM_LINE_WIDTH;
column++)
extended_String_To_Repeat[column]=
string_To_Repeat[column%string_To_Repeat_Length];
intnative_t offset=0;
char line[MAXIMUM_LINE_WIDTH+1];
line[MAXIMUM_LINE_WIDTH]='\n';
for(intnative_t current_Number_Of_Characters_To_Create=
number_Of_Characters_To_Create;
current_Number_Of_Characters_To_Create>0;){
// Figure out the length of the line we need to write. If it's less than
// MAXIMUM_LINE_WIDTH then we also need to add a line feed in the right
// spot too.
intnative_t line_Length=MAXIMUM_LINE_WIDTH;
if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){
line_Length=current_Number_Of_Characters_To_Create;
line[line_Length]='\n';
}
memcpy(line, extended_String_To_Repeat+offset, line_Length);
// Update the offset, reducing it by string_To_Repeat_Length if
// necessary.
offset+=line_Length;
if(offset>string_To_Repeat_Length)
offset-=string_To_Repeat_Length;
// Output the line to stdout and update the
// current_Number_Of_Characters_To_Create.
fwrite(line, line_Length+1, 1, stdout);
current_Number_Of_Characters_To_Create-=line_Length;
}
}
// Generate a floating point pseudorandom number from 0.0 to max using a linear
// congruential generator.
#define IM 139968
#define IA 3877
#define IC 29573
#define SEED 42
static inline float get_LCG_Pseudorandom_Number(const float max){
static uint32_t seed=SEED;
seed=(seed*IA + IC)%IM;
return max/IM*seed;
}
// computes the 32-bit integer division with IM
static __m256i divIM(__m256i a) {
__m256i multiplier = _mm256_set1_epi64x(UINT64_C(502748801));
__m256i fullmulhi32 = _mm256_mul_epu32(_mm256_srli_epi64(a, 32),multiplier);
__m256i fullmullo32 = _mm256_mul_epu32(a, multiplier);
fullmulhi32 = _mm256_and_si256(_mm256_srli_epi64(fullmulhi32,14), _mm256_set1_epi64x(UINT64_C(0xffffffff00000000)));
fullmullo32 = _mm256_srli_epi64(fullmullo32,14+32);
return _mm256_or_si256(fullmullo32, fullmulhi32);
}
// computes the 32-bit integer remainder with IM
static __m256i modIM(__m256i a) {
return _mm256_sub_epi32(a,_mm256_mullo_epi32(divIM(a),_mm256_set1_epi32(IM)));
}
static __m256i vseed;
void init_vector_LCG() {
// this is naive
uint32_t buffer [8];
for(int k = 0; k < 8 ; k++) buffer[k] = rand();
vseed = _mm256_loadu_si256((__m256i*)buffer);
}
// vectorized version
static inline __m256i vector_get_LCG_Pseudorandom_Number(){
vseed = modIM(_mm256_add_epi32(_mm256_mullo_epi32(vseed,_mm256_set1_epi32(IA)),_mm256_set1_epi32(IC)));
return vseed;
}
// Print a pseudorandom DNA sequence that is number_Of_Characters_To_Create
// characters long and made up of the nucleotides specified in
// nucleotides_Information and occurring at the frequencies specified in
// nucleotides_Information. The output is also wrapped to MAXIMUM_LINE_WIDTH
// columns.
static void generate_And_Wrap_Pseudorandom_DNA_Sequence(
const nucleotide_info nucleotides_Information[],
const intnative_t number_Of_Nucleotides,
const intnative_t number_Of_Characters_To_Create){
uint8_t charbuffer [sizeof(__m256i)];
assert(number_Of_Nucleotides<16);
for(intnative_t i=0; i<number_Of_Nucleotides; i++){
charbuffer[i] = nucleotides_Information[i].letter;
charbuffer[i+sizeof(__m128i)] = nucleotides_Information[i].letter;
}
for(intnative_t i=number_Of_Nucleotides; i<16; i++){
charbuffer[i] = '*';
charbuffer[i+sizeof(__m128i)] = '*';
}
__m256i vletters = _mm256_loadu_si256((__m256i*)charbuffer);
// Cumulate the probabilities. Note that the probability is being multiplied
// by IM because later on we'll also be calling the random number generator
// with a value that is multiplied by IM. Since the random number generator
// does a division by IM this allows the compiler to cancel out the
// multiplication and division by IM with each other without requiring any
// changes to the random number generator code whose code was explicitly
// defined in the rules.
__m256i cumulative_Probabilities[number_Of_Nucleotides - 1];
double cumulative_Probability=0.0;
for(intnative_t i=0; i<number_Of_Nucleotides - 1; i++){
cumulative_Probability+=nucleotides_Information[i].probability;
cumulative_Probabilities[i]=_mm256_set1_epi32((uint32_t)(cumulative_Probability*IM));
}
char line[(MAXIMUM_LINE_WIDTH+1+sizeof(__m256i)-1)/sizeof(__m256i) * sizeof(__m256i)];
line[MAXIMUM_LINE_WIDTH]='\n';
for(intnative_t current_Number_Of_Characters_To_Create=
number_Of_Characters_To_Create;
current_Number_Of_Characters_To_Create>0;){
intnative_t line_Length=MAXIMUM_LINE_WIDTH;
if(current_Number_Of_Characters_To_Create<MAXIMUM_LINE_WIDTH){
line_Length=current_Number_Of_Characters_To_Create;
}
for(intnative_t column=0; column<line_Length; column+=sizeof(__m256i)){
const __m256i r1=vector_get_LCG_Pseudorandom_Number();
const __m256i r2=vector_get_LCG_Pseudorandom_Number();
const __m256i r3=vector_get_LCG_Pseudorandom_Number();
const __m256i r4=vector_get_LCG_Pseudorandom_Number();
__m256i count1 = _mm256_set1_epi32(number_Of_Nucleotides -1);
__m256i count2 = _mm256_set1_epi32(number_Of_Nucleotides -1);
__m256i count3 = _mm256_set1_epi32(number_Of_Nucleotides -1);
__m256i count4 = _mm256_set1_epi32(number_Of_Nucleotides -1);
for(intnative_t i=0; i<number_Of_Nucleotides -1 ; i++) {
count1 = _mm256_add_epi32(count1,_mm256_cmpgt_epi32(cumulative_Probabilities[i],r1));
count2 = _mm256_add_epi32(count2,_mm256_cmpgt_epi32(cumulative_Probabilities[i],r2));
count3 = _mm256_add_epi32(count3,_mm256_cmpgt_epi32(cumulative_Probabilities[i],r3));
count4 = _mm256_add_epi32(count4,_mm256_cmpgt_epi32(cumulative_Probabilities[i],r4));
}
__m256i index1 = _mm256_or_si256(count1,_mm256_slli_epi32(count2,8));
__m256i index2 = _mm256_or_si256(_mm256_slli_epi32(count3,16), _mm256_slli_epi32(count3,24));
__m256i index = _mm256_or_si256(index1,index2);
__m256i vline = _mm256_shuffle_epi8(vletters,index);
_mm256_storeu_si256((__m256i*)(line+column),vline);
}
line[line_Length]='\n';
// Output the line to stdout and update the
// current_Number_Of_Characters_To_Create.
fwrite(line, line_Length+1, 1, stdout);
current_Number_Of_Characters_To_Create-=line_Length;
}
}
int main(int argc, char ** argv){
init_vector_LCG();
const intnative_t n=atoi(argv[1]);
fputs(">ONE Homo sapiens alu\n", stdout);
const char homo_Sapiens_Alu[]=
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC"
"AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG"
"TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG"
"AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
repeat_And_Wrap_String(homo_Sapiens_Alu, 2*n);
fputs(">TWO IUB ambiguity codes\n", stdout);
nucleotide_info iub_Nucleotides_Information[]={
{'a', 0.27}, {'c', 0.12}, {'g', 0.12}, {'t', 0.27}, {'B', 0.02},
{'D', 0.02}, {'H', 0.02}, {'K', 0.02}, {'M', 0.02}, {'N', 0.02},
{'R', 0.02}, {'S', 0.02}, {'V', 0.02}, {'W', 0.02}, {'Y', 0.02}};
generate_And_Wrap_Pseudorandom_DNA_Sequence(iub_Nucleotides_Information,
sizeof(iub_Nucleotides_Information)/sizeof(nucleotide_info), 3*n);
fputs(">THREE Homo sapiens frequency\n", stdout);
nucleotide_info homo_Sapien_Nucleotides_Information[]={
{'a', 0.3029549426680}, {'c', 0.1979883004921},
{'g', 0.1975473066391}, {'t', 0.3015094502008}};
generate_And_Wrap_Pseudorandom_DNA_Sequence(
homo_Sapien_Nucleotides_Information,
sizeof(homo_Sapien_Nucleotides_Information)/sizeof(nucleotide_info), 5*n);
return 0;
}