Permalink
Browse files

--

  • Loading branch information...
lvv committed Dec 15, 2008
1 parent 3f93d73 commit 096a8500b37bcda77765d2142f5f73bb7326c0ab
Showing with 27 additions and 2 deletions.
  1. +2 −1 README.txt
  2. +1 −1 b-array.cc
  3. +24 −0 bug.cc
@@ -42,7 +42,8 @@ Second set of curly braces needed because this is an array inside a class.
There are no mallocs, no extra pointers, no extraneous class members.
.lvv::array have following added capabilities:
- Vector operation: `A1 += A2; cout << A1;`
- Vector operation: `A1 += A2; cout << A1;`
- Optimized template specialization for specific combination of CPU capabilities, array size and type.
- explicit SSE vectorization (gcc not very good yet in auto-vectorization).
- parallelization with OpenMP
- Index of first element defaults to 0, but can be any number.
@@ -1,5 +1,5 @@
/////////////////// CONFIG
#define F32
#define I16
#define INCLUDE "b-cmp.h"
//#define INCLUDE "b-sum.h"
#define REPEAT 9
24 bug.cc
@@ -0,0 +1,24 @@
#include <immintrin.h>
#include <stdio.h>
#include <stdint.h>
int main(int argc, char *argv[]) {
int16_t A[2000]; for (int i=0; i<(2000-2); i+=2) { A[i]=1; A[i+1]=2; }; A[333] = 3;
#define mk_m128i(x) *(__m128i*)&(x)
__m128i m1 = mk_m128i(A[0]);
__m128i m2 = mk_m128i(A[8]);
for (int i= 16; i < 2000-16; i+=16) { // SSE
m1 = _mm_max_epi16(m1, mk_m128i(A[i]) );
m2 = _mm_max_epi16(m2, mk_m128i(A[i+8]) );
}
m1 = _mm_max_epi16(m1, m2);
int16_t* ip = (int16_t*)&m1;
printf("%hi %hi %hi %hi %hi %hi %hi %hi \n", *ip++, *ip++, *ip++, *ip++, *ip++, *ip++, *ip++, *ip);
return 0;
}

0 comments on commit 096a850

Please sign in to comment.