|
| 1 | +#ifdef __AVX2__ |
| 2 | +#include <immintrin.h> |
| 3 | +#endif |
| 4 | + |
1 | 5 | #include "include/collisions.h" |
2 | 6 | #include <stdio.h> |
3 | 7 |
|
4 | | -#ifndef ABS |
5 | | -#define ABS(x) ((x) < 0 ? -(x) : (x)) |
6 | | -#endif /* ~ABS */ |
7 | 8 | #ifndef DOT2D |
8 | 9 | #define DOT2D(X0, Y0, X1, Y1) ((X0) * (X1) + (Y0) * (Y1)) |
9 | 10 | #endif /* ~DOT2D */ |
10 | 11 |
|
11 | | -#ifndef CODE_BOTTOM |
12 | | -#define CODE_BOTTOM 1 |
13 | | -#endif /* CODE_BOTTOM */ |
14 | | -#ifndef CODE_TOP |
15 | | -#define CODE_TOP 2 |
16 | | -#endif /* CODE_TOP */ |
17 | | -#ifndef CODE_LEFT |
18 | | -#define CODE_LEFT 4 |
19 | | -#endif /* CODE_LEFT */ |
20 | | -#ifndef CODE_RIGHT |
21 | | -#define CODE_RIGHT 8 |
22 | | -#endif /* CODE_RIGHT */ |
23 | | - |
24 | 12 | static int |
25 | 13 | pgCollision_LineLine(pgLineBase *A, pgLineBase *B) |
26 | 14 | { |
@@ -204,6 +192,97 @@ static int |
204 | 192 | pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y, |
205 | 193 | double *T) |
206 | 194 | { |
| 195 | +#ifdef __AVX2__ |
| 196 | + // this function does 4 line-line collisions at once |
| 197 | + double Rx = (double)rect->x; |
| 198 | + double Ry = (double)rect->y; |
| 199 | + double Rw = (double)rect->w; |
| 200 | + double Rh = (double)rect->h; |
| 201 | + |
| 202 | + // here we start to setup the variables |
| 203 | + __m256d x1_256d = _mm256_set1_pd(line->x1); |
| 204 | + __m256d y1_256d = _mm256_set1_pd(line->y1); |
| 205 | + __m256d x2_256d = _mm256_set1_pd(line->x2); |
| 206 | + __m256d y2_256d = _mm256_set1_pd(line->y2); |
| 207 | + __m256d x3_256d = _mm256_set_pd(Rx, Rx, Rx, Rx + Rw); |
| 208 | + __m256d y3_256d = _mm256_set_pd(Ry, Ry, Ry + Rh, Ry); |
| 209 | + __m256d x4_256d = _mm256_set_pd(Rx + Rw, Rx, Rx + Rw, Rx + Rw); |
| 210 | + __m256d y4_256d = _mm256_set_pd(Ry, Ry + Rh, Ry + Rh, Ry + Rh); |
| 211 | + |
| 212 | + // here we calculate the differences between the the coords of the points |
| 213 | + __m256d x1_m_x2_256d = _mm256_sub_pd(x1_256d, x2_256d); |
| 214 | + __m256d y3_m_y4_256d = _mm256_sub_pd(y3_256d, y4_256d); |
| 215 | + __m256d y1_m_y2_256d = _mm256_sub_pd(y1_256d, y2_256d); |
| 216 | + __m256d x3_m_x4_256d = _mm256_sub_pd(x3_256d, x4_256d); |
| 217 | + |
| 218 | + // we calculate the denominator of the equations |
| 219 | + __m256d den_256d = |
| 220 | + _mm256_sub_pd(_mm256_mul_pd(x1_m_x2_256d, y3_m_y4_256d), |
| 221 | + _mm256_mul_pd(y1_m_y2_256d, x3_m_x4_256d)); |
| 222 | + |
| 223 | + // if the denominator is 0 then the line is parallel to the other line |
| 224 | + // in this occasion this can't be true here as a line will never be |
| 225 | + // parallel to all four sides of a rectangle |
| 226 | + __m256d den_zero_256d = |
| 227 | + _mm256_cmp_pd(den_256d, _mm256_setzero_pd(), _CMP_EQ_OQ); |
| 228 | + |
| 229 | + // we dont want to cause any floating point errors by dividing by 0 |
| 230 | + // so we set the ones that are equal to 0 to 1 |
| 231 | + den_256d = _mm256_or_pd(den_zero_256d, den_256d); |
| 232 | + |
| 233 | + // we calculate the rest of the differences between the coords of the |
| 234 | + // points |
| 235 | + __m256d x1_m_x3_256d = _mm256_sub_pd(x1_256d, x3_256d); |
| 236 | + __m256d y1_m_y3_256d = _mm256_sub_pd(y1_256d, y3_256d); |
| 237 | + |
| 238 | + // calculate the t values |
| 239 | + __m256d t_256d = _mm256_sub_pd(_mm256_mul_pd(x1_m_x3_256d, y3_m_y4_256d), |
| 240 | + _mm256_mul_pd(y1_m_y3_256d, x3_m_x4_256d)); |
| 241 | + t_256d = _mm256_div_pd(t_256d, den_256d); |
| 242 | + |
| 243 | + // calculate the u values |
| 244 | + __m256d u_256d = _mm256_sub_pd(_mm256_mul_pd(x1_m_x2_256d, y1_m_y3_256d), |
| 245 | + _mm256_mul_pd(y1_m_y2_256d, x1_m_x3_256d)); |
| 246 | + u_256d = |
| 247 | + _mm256_mul_pd(_mm256_div_pd(u_256d, den_256d), _mm256_set1_pd(-1.0)); |
| 248 | + |
| 249 | + // we check this condition t >= 0 && t <= 1 && u >= 0 && u <= 1 |
| 250 | + __m256d ones_256d = _mm256_set1_pd(1.0); |
| 251 | + __m256d zeros_256d = _mm256_set1_pd(0.0); |
| 252 | + __m256d t_zero_256d = _mm256_cmp_pd(t_256d, zeros_256d, _CMP_GE_OQ); |
| 253 | + __m256d t_one_256d = _mm256_cmp_pd(t_256d, ones_256d, _CMP_LE_OQ); |
| 254 | + __m256d u_zero_256d = _mm256_cmp_pd(u_256d, zeros_256d, _CMP_GE_OQ); |
| 255 | + __m256d u_one_256d = _mm256_cmp_pd(u_256d, ones_256d, _CMP_LE_OQ); |
| 256 | + __m256d t_u_256d = _mm256_and_pd(_mm256_and_pd(t_zero_256d, t_one_256d), |
| 257 | + _mm256_and_pd(u_zero_256d, u_one_256d)); |
| 258 | + |
| 259 | + // if no lines touch the rectangle then this will be true |
| 260 | + if (_mm256_movemask_pd(t_u_256d) == 0x0) { |
| 261 | + return 0; |
| 262 | + } |
| 263 | + |
| 264 | + double t = DBL_MAX; |
| 265 | + |
| 266 | + // here we know that there is at least one intersection so |
| 267 | + // we search for the smallest t value that still meets the above conditions |
| 268 | + int i = 0; |
| 269 | + for (i = 0; i < 4; i++) { |
| 270 | + if (((double *)&t_u_256d)[i]) { |
| 271 | + t = MIN(t, ((double *)&t_256d)[i]); |
| 272 | + } |
| 273 | + } |
| 274 | + |
| 275 | + // outputs |
| 276 | + if (T) |
| 277 | + *T = t; |
| 278 | + if (X) |
| 279 | + *X = line->x1 + t * (line->x2 - line->x1); |
| 280 | + if (Y) |
| 281 | + *Y = line->y1 + t * (line->y2 - line->y1); |
| 282 | + |
| 283 | + return 1; |
| 284 | +#else |
| 285 | + |
207 | 286 | double x = (double)rect->x; |
208 | 287 | double y = (double)rect->y; |
209 | 288 | double w = (double)rect->w; |
@@ -238,6 +317,7 @@ pgIntersection_LineRect(pgLineBase *line, SDL_Rect *rect, double *X, double *Y, |
238 | 317 | } |
239 | 318 |
|
240 | 319 | return ret; |
| 320 | +#endif /* ~__AVX2__ */ |
241 | 321 | } |
242 | 322 |
|
243 | 323 | static int |
|
0 commit comments