Skip to content

Commit 3b2a8c0

Browse files
authored
created SIMD version of line-rect intersection (#42)
* created SIMD version of line-rect intersection * added the setup file that is needed to compile SIMD stuff * fixed minor bug and changed the structure a bit * renamed min->MIN * documented the AVX2 code and cleaned it up a bit * tackled one review * moved the SIMD implementation in `collisions.c`
1 parent 74fd53b commit 3b2a8c0

File tree

2 files changed

+110
-17
lines changed

2 files changed

+110
-17
lines changed

setup.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,21 @@
33
import shlex
44
import glob
55
import sys
6+
import os
67

7-
extensions = [Extension("geometry", sources=["src_c/geometry.c"])]
8+
9+
compiler_options = {"unix": ["-mavx2"], "msvc": ["/arch:AVX2"]}
10+
11+
compiler_type = "msvc" if os.name == "nt" else "unix"
12+
13+
14+
extensions = [
15+
Extension(
16+
"geometry",
17+
sources=["src_c/geometry.c"],
18+
extra_compile_args=compiler_options[compiler_type],
19+
)
20+
]
821

922

1023
def build() -> None:

src_c/collisions.c

Lines changed: 96 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,26 +1,14 @@
1+
#ifdef __AVX2__
2+
#include <immintrin.h>
3+
#endif
4+
15
#include "include/collisions.h"
26
#include <stdio.h>
37

4-
#ifndef ABS
5-
#define ABS(x) ((x) < 0 ? -(x) : (x))
6-
#endif /* ~ABS */
78
#ifndef DOT2D
89
#define DOT2D(X0, Y0, X1, Y1) ((X0) * (X1) + (Y0) * (Y1))
910
#endif /* ~DOT2D */
1011

11-
#ifndef CODE_BOTTOM
12-
#define CODE_BOTTOM 1
13-
#endif /* CODE_BOTTOM */
14-
#ifndef CODE_TOP
15-
#define CODE_TOP 2
16-
#endif /* CODE_TOP */
17-
#ifndef CODE_LEFT
18-
#define CODE_LEFT 4
19-
#endif /* CODE_LEFT */
20-
#ifndef CODE_RIGHT
21-
#define CODE_RIGHT 8
22-
#endif /* CODE_RIGHT */
23-
2412
static int
2513
pgCollision_LineLine(pgLineBase *A, pgLineBase *B)
2614
{
@@ -204,6 +192,97 @@ static int
204192
pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y,
205193
double *T)
206194
{
195+
#ifdef __AVX2__
196+
// this function does 4 line-line collisions at once
197+
double Rx = (double)rect->x;
198+
double Ry = (double)rect->y;
199+
double Rw = (double)rect->w;
200+
double Rh = (double)rect->h;
201+
202+
// here we start to setup the variables
203+
__m256d x1_256d = _mm256_set1_pd(line->x1);
204+
__m256d y1_256d = _mm256_set1_pd(line->y1);
205+
__m256d x2_256d = _mm256_set1_pd(line->x2);
206+
__m256d y2_256d = _mm256_set1_pd(line->y2);
207+
__m256d x3_256d = _mm256_set_pd(Rx, Rx, Rx, Rx + Rw);
208+
__m256d y3_256d = _mm256_set_pd(Ry, Ry, Ry + Rh, Ry);
209+
__m256d x4_256d = _mm256_set_pd(Rx + Rw, Rx, Rx + Rw, Rx + Rw);
210+
__m256d y4_256d = _mm256_set_pd(Ry, Ry + Rh, Ry + Rh, Ry + Rh);
211+
212+
// here we calculate the differences between the the coords of the points
213+
__m256d x1_m_x2_256d = _mm256_sub_pd(x1_256d, x2_256d);
214+
__m256d y3_m_y4_256d = _mm256_sub_pd(y3_256d, y4_256d);
215+
__m256d y1_m_y2_256d = _mm256_sub_pd(y1_256d, y2_256d);
216+
__m256d x3_m_x4_256d = _mm256_sub_pd(x3_256d, x4_256d);
217+
218+
// we calculate the denominator of the equations
219+
__m256d den_256d =
220+
_mm256_sub_pd(_mm256_mul_pd(x1_m_x2_256d, y3_m_y4_256d),
221+
_mm256_mul_pd(y1_m_y2_256d, x3_m_x4_256d));
222+
223+
// if the denominator is 0 then the line is parallel to the other line
224+
// in this occasion this can't be true here as a line will never be
225+
// parallel to all four sides of a rectangle
226+
__m256d den_zero_256d =
227+
_mm256_cmp_pd(den_256d, _mm256_setzero_pd(), _CMP_EQ_OQ);
228+
229+
// we dont want to cause any floating point errors by dividing by 0
230+
// so we set the ones that are equal to 0 to 1
231+
den_256d = _mm256_or_pd(den_zero_256d, den_256d);
232+
233+
// we calculate the rest of the differences between the coords of the
234+
// points
235+
__m256d x1_m_x3_256d = _mm256_sub_pd(x1_256d, x3_256d);
236+
__m256d y1_m_y3_256d = _mm256_sub_pd(y1_256d, y3_256d);
237+
238+
// calculate the t values
239+
__m256d t_256d = _mm256_sub_pd(_mm256_mul_pd(x1_m_x3_256d, y3_m_y4_256d),
240+
_mm256_mul_pd(y1_m_y3_256d, x3_m_x4_256d));
241+
t_256d = _mm256_div_pd(t_256d, den_256d);
242+
243+
// calculate the u values
244+
__m256d u_256d = _mm256_sub_pd(_mm256_mul_pd(x1_m_x2_256d, y1_m_y3_256d),
245+
_mm256_mul_pd(y1_m_y2_256d, x1_m_x3_256d));
246+
u_256d =
247+
_mm256_mul_pd(_mm256_div_pd(u_256d, den_256d), _mm256_set1_pd(-1.0));
248+
249+
// we check this condition t >= 0 && t <= 1 && u >= 0 && u <= 1
250+
__m256d ones_256d = _mm256_set1_pd(1.0);
251+
__m256d zeros_256d = _mm256_set1_pd(0.0);
252+
__m256d t_zero_256d = _mm256_cmp_pd(t_256d, zeros_256d, _CMP_GE_OQ);
253+
__m256d t_one_256d = _mm256_cmp_pd(t_256d, ones_256d, _CMP_LE_OQ);
254+
__m256d u_zero_256d = _mm256_cmp_pd(u_256d, zeros_256d, _CMP_GE_OQ);
255+
__m256d u_one_256d = _mm256_cmp_pd(u_256d, ones_256d, _CMP_LE_OQ);
256+
__m256d t_u_256d = _mm256_and_pd(_mm256_and_pd(t_zero_256d, t_one_256d),
257+
_mm256_and_pd(u_zero_256d, u_one_256d));
258+
259+
// if no lines touch the rectangle then this will be true
260+
if (_mm256_movemask_pd(t_u_256d) == 0x0) {
261+
return 0;
262+
}
263+
264+
double t = DBL_MAX;
265+
266+
// here we know that there is at least one intersection so
267+
// we search for the smallest t value that still meets the above conditions
268+
int i = 0;
269+
for (i = 0; i < 4; i++) {
270+
if (((double *)&t_u_256d)[i]) {
271+
t = MIN(t, ((double *)&t_256d)[i]);
272+
}
273+
}
274+
275+
// outputs
276+
if (T)
277+
*T = t;
278+
if (X)
279+
*X = line->x1 + t * (line->x2 - line->x1);
280+
if (Y)
281+
*Y = line->y1 + t * (line->y2 - line->y1);
282+
283+
return 1;
284+
#else
285+
207286
double x = (double)rect->x;
208287
double y = (double)rect->y;
209288
double w = (double)rect->w;
@@ -238,6 +317,7 @@ pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y,
238317
}
239318

240319
return ret;
320+
#endif /* ~__AVX2__ */
241321
}
242322

243323
static int

0 commit comments

Comments
 (0)