-
Notifications
You must be signed in to change notification settings - Fork 1
/
cuda.c
2540 lines (2230 loc) · 100 KB
/
cuda.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* CUDA stuff.
* Computing chi^2 on GPU for a given combination of free parameters
*
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <curand_kernel.h>
#include "asteroid.h"
__device__ void ODE_func (double y[], double f[], double mu[])
/* Three ODEs for the tumbling evolution of the three Euler angles, phi, theta, and psi.
* Derived in a manner similar to Kaasalainen 2001, but for the setup of Samarasinha and A'Hearn 1991
* (a > b > c; Il > Ii > Is; either a or c can be the axis of rotation). This is so called "L-convention"
* (Samarasinha & Mueller 2015).
*
* The setup of Kaasalainen 2001 results
* in large derivatives, and low accuracy and instability of ODEs for small I1.
*
* Here Ip=(1/Ii+1/Is)/2; Im=(1/Ii-1/Is)/2.
*/
{
#ifdef TORQUE
// Simplest constant (in the co-moving frame) torque model applied to a tumbling atseroid. Solving 6 ODEs - three Euler equations of motion
// in the presence of constant torque, and three more to get the Euler angles phi, psi, theta.
// mu[0]: (Is-Il)/Ii
// mu[1]: (Il-Ii)/Is
// mu[2]: (Ii-Is)/Il
// mu[3]: Ki/Ii
// mu[4]: Ks/Is
// mu[5]: Kl/Il
// y[0]: Omega_i
// y[1]: Omega_s
// y[2]: Omega_l
// y[3]: phi
// y[4]: theta
// y[5]: psi
// Three Euler equations of motion:
// d Omega_i /dt:
f[0] = mu[0]*y[1]*y[2] + mu[3];
// d Omega_s /dt:
f[1] = mu[1]*y[2]*y[0] + mu[4];
// d Omega_l /dt:
f[2] = mu[2]*y[0]*y[1] + mu[5];
// Three ODEs for the three Euler angles
// dphi/dt:
f[3] = (y[0]*sin(y[5]) + y[1]*cos(y[5])) / sin(y[4]);
// dtheta/dt:
f[4] = y[0]*cos(y[5]) - y[1]*sin(y[5]);
// dpsi/dt:
f[5] = y[2] - f[3]*cos(y[4]);
#else
// No torque case (so we don't need to use the Euler's equations)
// mu[0,1,2] -> L, Ip, Im
// double phi = y[0];
// double theta = y[1];
// double psi = y[2];
// dphi/dt:
f[0] = mu[0]*(mu[1]-mu[2]*cos(2.0*y[2]));
// dtheta/dt:
f[1] = mu[0]*mu[2]*sin(y[1])*sin(2.0*y[2]);
// dpsi/dt (assuming Il=1):
f[2] = cos(y[1])*(mu[0]-f[0]);
#endif // TORQUE
}
__device__ CHI_FLOAT chi2one(double *params, struct obs_data *sData, int N_data, int N_filters, CHI_FLOAT *delta_V, int Nplot, struct chi2_struct *sp,
#ifdef ANIMATE
unsigned char * d_rgb,
#endif
int sTypes[][N_SEG])
// Computung chi^2 for a single model parameters combination, on GPU, by a single thread
// NUDGE is not supported in SEGMENT mode!
{
int i, m;
double Ep_b, Ep_c, Ep_a, Sp_b, Sp_c, Sp_a;
CHI_FLOAT chi2a;
double sum_y2[N_FILTERS];
double sum_y[N_FILTERS];
double sum_w[N_FILTERS];
for (m=0; m<N_filters; m++)
{
sum_y2[m] = 0.0;
sum_y[m] = 0.0;
sum_w[m] = 0.0;
}
/* Tumbling model description:
*
* Triaxial ellipsoid with physical axes a, b, c. a and c are extremal ,
* b is always intermediate. (c < b < a=1) Photometric a,b,c can be totally different.
*
* The corresponding moments of inertia are Il (a), Ii (b), Is(c); Il < Ii < Is.
*
* The frame of reference is that of Samarasinha and A'Hearn 1991: b-c-a (i-s-l) stands for x-y-z
*
* Input parameters with fixed constraints (5-6):
* - <M>: angular momentum vector described by params.theta_M and params.phi_M
* - phi_0: initial Euler angle for precession, 0...2*pi
* - L: angular momentum L value, radians/day; if P is perdiod in hours, L=48*pi/P
* - c_tumb: log10 of the physical (tumbling) value of the smallest axis c size; c < b < a=1
* - A (only in TREND mode): scaling parameter "A" for de-trending the brightness curve, in magnitude/radian units (to be multiplied by the phase angle alpha to get magnitude correction)
* TORQUE parameters:
* - Ti, Ts, Tl: derivatives for the corresponding Omega_i,s,l angles; units are rad/day
*
* Derived values:
* - Ii_inv = 1/Ii; inverse principle moment of inertia Ii
* - Is_inv = 1/Is; inverse principle moment of inertia Is
*
* Parameters which are constrained by other parameters (3):
* - b_tumb: log10 of the physical (tumbling) value of the intermediate axis b size; constrained by c: log10(b)=log10(c)...0
* - Es: dimensionless total energy, constrained by Ii: SAM: Es<1/Ii; LAM: Es>1/Ii
* - psi_0: initial Euler angle of rotation of the body, constrained (only for SAM) by Ii, Is, Einv=1/Es: psi_max=atan(sqrt(Ii*(Is-Einv)/Is/(Einv-Ii))); psi_0=[-psi_max,psi_max]
*
* Derived values:
* - theta_0: initial Euler nutation angle; 0 ... pi range, derived from Ii, Is, Es, psi_0
*
* Time integration to compute the three Euler angles (phi, psi, theta) values for all observed data points
* - Initial conditions: phi_0, psi_0, theta_0
* - Parameters needed: L, Ip=0.5*(Ii_inv+Is_inv); Im=0.5*(Ii_inv-Is_inv);
* - time step - macro parameter TIME_STEP (days)
*
*/
// We work in the inertial observed (Solar barycentric) frame of reference X, Y, Z.
// By applying sequentially the three Euler angles of the asteroid (which we compute by solving the three ODEs numerically),
// we derive the asteroid's internal axes (coinciding with b, c, a for x, y, z, or i, s, l)
// orienation in the barycentric frame of reference. This allows us to compute the orientation of the asteroid->sun and asteroid->earth
// vectors in the asteroid's frame of reference, which is then used to compute it's apparent brightness for the observer.
#ifdef NUDGE
int M = 0;
float t_mod[M_MAX], V_mod[M_MAX];
float t_old[2];
float V_old[2];
#endif
#ifdef MINIMA_TEST
int N_minima = 0;
float Vmin[MAX_MINIMA];
float t_old[2];
float V_old[2];
#endif
// Loop for multiple data segments
// (Will use one segment, for all the data, when SEGMENT is not defined)
for (int iseg=0; iseg<N_SEG; iseg++)
{
// Defining these for better readability:
#define P_theta_M params[sTypes[T_theta_M][iseg]]
#define P_phi_M params[sTypes[T_phi_M][iseg]]
#define P_phi_0 params[sTypes[T_phi_0][iseg]]
#define P_L params[sTypes[T_L][iseg]]
#define P_A params[sTypes[T_A][iseg]]
#define P_Ti params[sTypes[T_Ti][iseg]]
#define P_Ts params[sTypes[T_Ts][iseg]]
#define P_Tl params[sTypes[T_Tl][iseg]]
#define P_T2i params[sTypes[T_T2i][iseg]]
#define P_T2s params[sTypes[T_T2s][iseg]]
#define P_T2l params[sTypes[T_T2l][iseg]]
#define P_Tt params[sTypes[T_Tt][iseg]]
#define P_c_tumb params[sTypes[T_c_tumb][iseg]]
#define P_b_tumb params[sTypes[T_b_tumb][iseg]]
#define P_Es params[sTypes[T_Es][iseg]]
#define P_psi_0 params[sTypes[T_psi_0][iseg]]
#define P_c params[sTypes[T_c][iseg]]
#define P_b params[sTypes[T_b][iseg]]
#define P_theta_R params[sTypes[T_theta_R][iseg]]
#define P_phi_R params[sTypes[T_phi_R][iseg]]
#define P_psi_R params[sTypes[T_psi_R][iseg]]
#define P_kappa params[sTypes[T_kappa][iseg]]
// Calculations which are time independent:
// In tumbling mode, the vector M is the angular momentum vector (fixed in the inertial - barycentric - frame of reference)
// It is defined by the two angles (input parameters) - params.theta_M and params.phi_M
// It's a unit vector
double M_x = sin(P_theta_M) * cos(P_phi_M);
double M_y = sin(P_theta_M) * sin(P_phi_M);
double M_z = cos(P_theta_M);
// In the new inertial frame of reference with the <M> vector being the z-axis, we arbitrarily choose the x-axis, <XM>, to be [y x M].
// It is fixed in the inertial frame of reference, so can be computed here:
// Made a unit vector
double XM = sqrt(M_z*M_z+M_x*M_x);
double XM_x = M_z / XM;
// double XM_y = 0.0;
double XM_z = -M_x / XM;
// The third axis, YM, is derived as [M x XM]; a unit vector by design:
double YM_x = M_y*XM_z;
double YM_y = M_z*XM_x - M_x*XM_z;
double YM_z = -M_y*XM_x;
// We set Il (moment of inertia corresponding to the largest axis, a) to 1.
// Shortest axis (c), largest moment of inertia:
double Is = (1.0 + P_b_tumb*P_b_tumb) / (P_b_tumb*P_b_tumb + P_c_tumb*P_c_tumb);
// Intermediate axis (b), intermediate moment of inertia:
double Ii = (1.0 + P_c_tumb*P_c_tumb) / (P_b_tumb*P_b_tumb + P_c_tumb*P_c_tumb);
double Is_inv = 1.0 / Is;
double Ii_inv = 1.0 / Ii;
// Now we have a=1>b>c, and Il=1<Ii<Is
// Axis of rotation can be either "a" (LAM) or "c" (SAM)
// Initial Euler angles values:
double phi = P_phi_0;
// Initial value of the Euler angle theta is determined by other parameters:
// It's ok to use only plus sign in from of sqrt, because theta changes in [0..pi] interval
double theta = asin(sqrt((P_Es-1.0)/(sin(P_psi_0)*sin(P_psi_0)*(Ii_inv-Is_inv)+Is_inv-1.0)));
double psi = P_psi_0;
#ifdef TORQUE
double mu[6];
// Parameters for the ODEs (don't change with time):
// Using the fact that Il = 1:
mu[0] = (Is-1.0)*Ii_inv;
mu[1] = (1.0-Ii)*Is_inv;
mu[2] = Ii - Is;
mu[3] = P_Ti;
mu[4] = P_Ts;
mu[5] = P_Tl;
// Initial values for the three components of the angular velocity vector in the asteroid's frame of reference
// Initially this vector's orientation is determined by the initial Euler angles, theta, psi, and phi
double Omega_i = P_L * Ii_inv * sin(theta) * sin(psi);
double Omega_s = P_L * Is_inv * sin(theta) * cos(psi);
double Omega_l = P_L * cos(theta);
#else
double mu[3];
double Ip = 0.5*(Ii_inv + Is_inv);
double Im = 0.5*(Ii_inv - Is_inv);
mu[0] = P_L;
mu[1] = Ip;
mu[2] = Im;
#endif
#ifdef MIN_DV
double Vmin = 1e20;
double Vmax = -1e20;
#endif
int i1, i2;
#ifdef SEGMENT
i1 = sp->start_seg[iseg];
if (iseg < N_SEG-1)
i2 = sp->start_seg[iseg+1];
else
i2 = N_data;
#else
i1 = 0;
i2 = N_data;
#endif
#ifdef ANIMATE
int i1_rgb = d_i1;
int i2_rgb = d_i2;
#endif
// The loop over all data points in the current segment
for (i=i1; i<i2; i++)
{
// Derive the three Euler angles theta, phi, psi here, by solving three ODEs numerically
if (i > i1)
{
int N_steps;
double h;
OBS_TYPE t1 = sData[i-1].MJD;
OBS_TYPE t2 = sData[i].MJD;
#ifdef TORQUE2
// The split point (in time) between the two torque regimes (can vary between sData[i1].MJD and sData[i2-1].MJD):
OBS_TYPE t_split = P_Tt*(sData[i2-1].MJD - sData[i1].MJD) + sData[i1].MJD;
int Nsplit;
if (t_split >= t1 && t_split < t2)
// We are in the split time interval (when torque changes inside the interval), so need to run the ODE loop twice -
// before and after the torque change
Nsplit = 2;
else
Nsplit = 1;
for (int isplit=0; isplit<Nsplit; isplit++)
{
if (Nsplit == 2)
{
if (isplit == 0)
{
t2 = t_split;
}
else
{
t1 = t_split;
t2 = sData[i].MJD;
// Right after the split point, changing the torque parameters to th second set:
mu[3] = P_T2i;
mu[4] = P_T2s;
mu[5] = P_T2l;
}
}
#endif
// How many integration steps to the current (i-th) observed value, from the previous (i-1) one:
// Forcing the maximum possible time step of TIME_STEP days (macro parameter), to ensure accuracy
N_steps = (t2 - t1) / TIME_STEP + 1;
// Current equidistant time steps (h<=TIME_STEP):
h = (t2 - t1) / N_steps;
// Initial values for ODEs variables = the old values, from the previous i cycle:
#ifdef TORQUE
const int N_ODE = 6;
double y[6];
y[0] = Omega_i;
y[1] = Omega_s;
y[2] = Omega_l;
y[3] = phi;
y[4] = theta;
y[5] = psi;
#else
const int N_ODE = 3;
double y[3];
y[0] = phi;
y[1] = theta;
y[2] = psi;
#endif
#ifdef PLOT_OMEGA
double K1[N_ODE];
#endif
// RK4 method for solving ODEs with a fixed time step h
for (int l=0; l<N_steps; l++)
{
double f[N_ODE], K2[N_ODE], K3[N_ODE], K4[N_ODE];
#ifndef PLOT_OMEGA
double K1[N_ODE];
#endif
ODE_func (y, K1, mu);
int j;
for (j=0; j<N_ODE; j++)
f[j] = y[j] + 0.5*h*K1[j];
ODE_func (f, K2, mu);
for (j=0; j<N_ODE; j++)
f[j] = y[j] + 0.5*h*K2[j];
ODE_func (f, K3, mu);
for (j=0; j<N_ODE; j++)
f[j] = y[j] + h*K3[j];
ODE_func (f, K4, mu);
for (j=0; j<N_ODE; j++)
y[j] = y[j] + 1/6.0 * h *(K1[j] + 2*K2[j] + 2*K3[j] + K4[j]);
}
// New (current) values of the ODEs variables derived from solving the ODEs:
#ifdef TORQUE
Omega_i = y[0];
Omega_s = y[1];
Omega_l = y[2];
phi = y[3];
theta = y[4];
psi = y[5];
#ifdef PLOT_OMEGA
if (Nplot > 0)
{
double phi_dot = K1[3];
double theta_dot = K1[4];
double psi_dot = K1[5];
// Computing angular velocity vector components in inertial coordinate system XYZ
double Omega_X = psi_dot*sin(theta)*sin(phi) + theta_dot*cos(phi);
double Omega_Y =-psi_dot*sin(theta)*cos(phi) + theta_dot*sin(phi);
double Omega_Z = psi_dot*cos(theta) + phi_dot;
// Converting to spherical inertial coordinate system:
// Absolute magnitude Omega:
d_Omega[0][i] = sqrt(Omega_X*Omega_X + Omega_Y*Omega_Y + Omega_Z*Omega_Z);
// Polar angle theta_Omega:
d_Omega[1][i] = acos(Omega_Z/d_Omega[0][i]);
// Azimuthal angle phi_Omega:
d_Omega[2][i] = atan2(Omega_Y, Omega_X);
// In comoving spherical coordinate system:
// Absolute magnitude Omega:
d_Omega[3][i] = sqrt(Omega_i*Omega_i + Omega_s*Omega_s + Omega_l*Omega_l);
// Polar angle theta_Omega:
d_Omega[4][i] = acos(Omega_l/d_Omega[3][i]);
// Azimuthal angle phi_Omega:
d_Omega[5][i] = atan2(Omega_s, Omega_i);
}
#endif
#ifdef LAST
if (Nplot>0 && i==Nplot-1)
// Preserving the final values of L nd E:
{
double L_last = sqrt(Omega_i*Omega_i*Ii*Ii + Omega_s*Omega_s*Is*Is + Omega_l*Omega_l);
double E_last = 1 + 1/(L_last*L_last) * (sin(psi)*sin(psi)*(Ii_inv-Is_inv)+Is_inv-1) * (Omega_i*Omega_i*Ii*Ii + Omega_s*Omega_s*Is*Is);
d_L_last = L_last;
d_E_last = E_last;
}
#endif
#else
phi = y[0];
theta = y[1];
psi = y[2];
#endif
#ifdef TORQUE2
} // isplit loop
#endif
}
// At this point we know the three Euler angles for the current moment of time (data point) - phi, theta, psi.
double cos_phi = cos(phi);
double sin_phi = sin(phi);
// Components of the node vector N=[M x a], derived by rotating vector XM towards vector YM by Euler angle phi
// It is unit by design
// Using XM_y = 0
double N_x = XM_x*cos_phi + YM_x*sin_phi;
double N_y = YM_y*sin_phi;
double N_z = XM_z*cos_phi + YM_z*sin_phi;
// Vector p=[N x M]; a unit one
double p_x = N_y*M_z - N_z*M_y;
double p_y = N_z*M_x - N_x*M_z;
double p_z = N_x*M_y - N_y*M_x;
double cos_theta = cos(theta);
double sin_theta = sin(theta);
// Vector of rotation <a> (the longest axes of the ellipsoid; x3; z; l) is derived by rotating <M> by Euler angle theta towards <p>,
// with the node vector <N> being the rotation vector (Rodrigues formula); a unit vector
double a_x = M_x*cos_theta + p_x*sin_theta;
double a_y = M_y*cos_theta + p_y*sin_theta;
double a_z = M_z*cos_theta + p_z*sin_theta;
// Vector w=[a x N]; a unit one
double w_x = a_y*N_z - a_z*N_y;
double w_y = a_z*N_x - a_x*N_z;
double w_z = a_x*N_y - a_y*N_x;
double sin_psi = sin(psi);
double cos_psi = cos(psi);
// Second axis of the ellipsoid, b (x1; x; i); a unit vector; derived by rotating <N> by Euler angle psi towards <w>,
// with vector <a> being the rotation axis
double b_x = N_x*cos_psi + w_x*sin_psi;
double b_y = N_y*cos_psi + w_y*sin_psi;
double b_z = N_z*cos_psi + w_z*sin_psi;
// Third ellipsoid axis c (x2; y; s) - the shortest one; c=[a x b]; unit vector by design
double c_x = a_y*b_z - a_z*b_y;
double c_y = a_z*b_x - a_x*b_z;
double c_z = a_x*b_y - a_y*b_x;
#if defined(ROTATE) || defined(BW_BALL)
// Optional rotation of the brightness ellipsoid relative to the kinematic ellipsoid, using angles theta_R, phi_R, psi_R
// Using the same setup, as for the main Euler rotation, above (substituting XM->b, YM->c, M->a)
// The meaning of the vectors b,c,a is changing here. At the end these are the new (rotated) basis.
cos_phi = cos(P_phi_R);
sin_phi = sin(P_phi_R);
N_x = b_x*cos_phi + c_x*sin_phi;
N_y = b_y*cos_phi + c_y*sin_phi;
N_z = b_z*cos_phi + c_z*sin_phi;
p_x = N_y*a_z - N_z*a_y;
p_y = N_z*a_x - N_x*a_z;
p_z = N_x*a_y - N_y*a_x;
cos_theta = cos(P_theta_R);
sin_theta = sin(P_theta_R);
// Vector a is changing meaning - now it is the rotated one:
a_x = a_x*cos_theta + p_x*sin_theta;
a_y = a_y*cos_theta + p_y*sin_theta;
a_z = a_z*cos_theta + p_z*sin_theta;
#endif
#ifdef ROTATE
w_x = a_y*N_z - a_z*N_y;
w_y = a_z*N_x - a_x*N_z;
w_z = a_x*N_y - a_y*N_x;
sin_psi = sin(P_psi_R);
cos_psi = cos(P_psi_R);
// Vector b is changing meaning - now it is the rotated one:
b_x = N_x*cos_psi + w_x*sin_psi;
b_y = N_y*cos_psi + w_y*sin_psi;
b_z = N_z*cos_psi + w_z*sin_psi;
// Vector c is changing meaning - now it is the rotated one:
c_x = a_y*b_z - a_z*b_y;
c_y = a_z*b_x - a_x*b_z;
c_z = a_x*b_y - a_y*b_x;
#endif // ROTATE
// Now following Muinonen & Lumme, 2015 to compute the visual brightness of the asteroid.
// Attention! My (Samarasinha and A'Hearn 1991) frame of reference is b-c-a, but the Muinonen's frame is a-b-c
// On 17.10.2018 the bug was fixed, and now I properly convert the Muinonen's equations to the b-c-a frame
#ifdef INTERP
// Using Sun and Earth coordinates interpolated in situ
double rr[3];
double E_x1,E_y1,E_z1, S_x1,S_y1,S_z1;
// Quadratic interpolation:
rr[0] = (sData[i].MJD-sp->MJD0[1]) * (sData[i].MJD-sp->MJD0[2]) / (sp->MJD0[0]-sp->MJD0[1]) / (sp->MJD0[0]-sp->MJD0[2]);
rr[1] = (sData[i].MJD-sp->MJD0[0]) * (sData[i].MJD-sp->MJD0[2]) / (sp->MJD0[1]-sp->MJD0[0]) / (sp->MJD0[1]-sp->MJD0[2]);
rr[2] = (sData[i].MJD-sp->MJD0[0]) * (sData[i].MJD-sp->MJD0[1]) / (sp->MJD0[2]-sp->MJD0[0]) / (sp->MJD0[2]-sp->MJD0[1]);
E_x1 = sp->E_x0[0]*rr[0] + sp->E_x0[1]*rr[1] + sp->E_x0[2]*rr[2];
E_y1 = sp->E_y0[0]*rr[0] + sp->E_y0[1]*rr[1] + sp->E_y0[2]*rr[2];
E_z1 = sp->E_z0[0]*rr[0] + sp->E_z0[1]*rr[1] + sp->E_z0[2]*rr[2];
S_x1 = sp->S_x0[0]*rr[0] + sp->S_x0[1]*rr[1] + sp->S_x0[2]*rr[2];
S_y1 = sp->S_y0[0]*rr[0] + sp->S_y0[1]*rr[1] + sp->S_y0[2]*rr[2];
S_z1 = sp->S_z0[0]*rr[0] + sp->S_z0[1]*rr[1] + sp->S_z0[2]*rr[2];
// Normalizing the vectors E and S:
double E = sqrt(E_x1*E_x1 + E_y1*E_y1 + E_z1*E_z1);
E_x1= E_x1 / E;
E_y1= E_y1 / E;
E_z1= E_z1 / E;
double S = sqrt(S_x1*S_x1 + S_y1*S_y1 + S_z1*S_z1);
S_x1= S_x1 / S;
S_y1= S_y1 / S;
S_z1= S_z1 / S;
#else
// Using Sun and Earth coordinates interpolated previously on CPU
#define E_x1 sData[i].E_x
#define E_y1 sData[i].E_y
#define E_z1 sData[i].E_z
#define S_x1 sData[i].S_x
#define S_y1 sData[i].S_y
#define S_z1 sData[i].S_z
#endif
// Earth vector in the new (b,c,a) basis
// Switching from Muinonen coords (abc) to Samarasinha coords (bca)
Ep_b = b_x*E_x1 + b_y*E_y1 + b_z*E_z1;
Ep_c = c_x*E_x1 + c_y*E_y1 + c_z*E_z1;
Ep_a = a_x*E_x1 + a_y*E_y1 + a_z*E_z1;
// Sun vector in the new (b,c,a) basis
// Switching from Muinonen coords (abc) to Samarasinha coords (bca)
Sp_b = b_x*S_x1 + b_y*S_y1 + b_z*S_z1;
Sp_c = c_x*S_x1 + c_y*S_y1 + c_z*S_z1;
Sp_a = a_x*S_x1 + a_y*S_y1 + a_z*S_z1;
#ifdef BC
double b = P_b;
double c = P_c;
#else
double b = P_b_tumb;
double c = P_c_tumb;
#endif
#ifdef ANIMATE
if (i>=i1_rgb && i<i2_rgb)
// Module to compute the image of the asteroid using all threads in this block
compute_rgb(d_rgb, b, c,
E_x1, E_y1, E_z1,
Ep_b, Ep_c, Ep_a,
Sp_b, Sp_c, Sp_a,
b_x, b_y, b_z,
c_x, c_y, c_z,
a_x, a_y, a_z,
i, i1_rgb);
continue;
#endif
// Now that we converted the Earth and Sun vectors to the internal asteroidal basis (a,b,c),
// we can apply the formalism of Muinonen & Lumme, 2015 to calculate the brightness of the asteroid.
double Vmod;
#if defined(BW_BALL)
/* The simplest non-geometric brightness model - "black and white ball".
* The "a" axis end hemisphere is dark (albedo kappa<1), the oppostire hemisphere is bright (albedo=1).
* Assuming the phase angle = 0 (sun is behind the observer) for simplicity.
*/
// Zeta is the angle between the rotated axis "a1" and the direction to the observer (Ep)
double cos_zeta = Ep_a;
// Relative bw ball brightness (1 when only the bright hemisphere is visible; kappa when only the dark one):
// Vmod = -2.5*log10(0.5*(P_kappa*(1+cos_alpha) + (1-cos_alpha)));
// Bug fix on May 29, 2019:
Vmod = -2.5*log10(0.5*(P_kappa*(1+cos_zeta) + 0.5*(1-cos_zeta)));
#elif defined(RECT)
/* Simplified (phase is fixed at 0) rectangular prism brightness model.
* Here a, b, c correspond to half-lengths of the longest, intermediate, and shortest sides.
* Phase is fixed at 0, so all we are computing is the surface area of the prism projected on the plane of sky.
* Except for special cases, projected rectangular prism will consist of the three projected sides
* (which are parallelograms in projection): (a,b), (b,c), and (a,c).
* */
// Scaling the unit vectors b,c by the lengths provided by b and c (a=1):
b_x = b * b_x;
b_y = b * b_y;
b_z = b * b_z;
c_x = c * c_x;
c_y = c * c_y;
c_z = c * c_z;
// Projected axes (onto the plane of sky):
double ap_x = a_y*Ep_a - a_z*Ep_c;
double ap_y = a_z*Ep_b - a_x*Ep_a;
double ap_z = a_x*Ep_c - a_y*Ep_b;
double bp_x = b_y*Ep_a - b_z*Ep_c;
double bp_y = b_z*Ep_b - b_x*Ep_a;
double bp_z = b_x*Ep_c - b_y*Ep_b;
double cp_x = c_y*Ep_a - c_z*Ep_c;
double cp_y = c_z*Ep_b - c_x*Ep_a;
double cp_z = c_x*Ep_c - c_y*Ep_b;
// Vector products to be used in surface area of the projected prism computation:
double ab_x = ap_y*bp_z - ap_z*bp_y;
double ab_y = ap_z*bp_x - ap_x*bp_z;
double ab_z = ap_x*bp_y - ap_y*bp_x;
double ac_x = ap_y*cp_z - ap_z*cp_y;
double ac_y = ap_z*cp_x - ap_x*cp_z;
double ac_z = ap_x*cp_y - ap_y*cp_x;
double bc_x = bp_y*cp_z - bp_z*cp_y;
double bc_y = bp_z*cp_x - bp_x*cp_z;
double bc_z = bp_x*cp_y - bp_y*cp_x;
// Brightness is assumed to be proportional to the surface area of the projected rectangular prism (so no phase effects):
double ab = ab_x*ab_x+ab_y*ab_y+ab_z*ab_z;
double ac = ac_x*ac_x+ac_y*ac_y+ac_z*ac_z;
double bc = bc_x*bc_x+bc_y*bc_y+bc_z*bc_z;
if (ab < 0.0)
ab = 0.0;
if (bc < 0.0)
bc = 0.0;
if (ac < 0.0)
ac = 0.0;
Vmod = -2.5*log10(4*(sqrt(ab) + sqrt(bc) + sqrt(ac)));
#else
/* The defaul brightness model (triaxial ellipsoid, constant albedo), from Muinonen & Lumme, 2015
*/
double cos_alpha_p, sin_alpha_p, scalar_Sun, scalar_Earth, scalar;
double cos_lambda_p, sin_lambda_p, alpha_p, lambda_p;
// The two scalars from eq.(12) of Muinonen & Lumme, 2015; assuming a=1
// Switching from Muinonen coords (abc) to Samarasinha coords (bca)
scalar_Sun = sqrt(Sp_b*Sp_b/(b*b) + Sp_c*Sp_c/(c*c) + Sp_a*Sp_a);
scalar_Earth = sqrt(Ep_b*Ep_b/(b*b) + Ep_c*Ep_c/(c*c) + Ep_a*Ep_a);
// From eq.(13):
// Switching from Muinonen coords (abc) to Samarasinha coords (bca)
cos_alpha_p = (Sp_b*Ep_b/(b*b) + Sp_c*Ep_c/(c*c) + Sp_a*Ep_a) / (scalar_Sun * scalar_Earth);
sin_alpha_p = sqrt(1.0 - cos_alpha_p*cos_alpha_p);
alpha_p = atan2(sin_alpha_p, cos_alpha_p);
// From eq.(14):
scalar = sqrt(scalar_Sun*scalar_Sun + scalar_Earth*scalar_Earth + 2*scalar_Sun*scalar_Earth*cos_alpha_p);
cos_lambda_p = (scalar_Sun + scalar_Earth*cos_alpha_p) / scalar;
sin_lambda_p = scalar_Earth*sin_alpha_p / scalar;
lambda_p = atan2(sin_lambda_p, cos_lambda_p);
// Asteroid's model visual brightness, from eq.(10):
// Simplest case of isotropic single-particle scattering, P(alpha)=1:
Vmod = -2.5*log10(b*c * scalar_Sun*scalar_Earth/scalar * (cos(lambda_p-alpha_p) + cos_lambda_p +
sin_lambda_p*sin(lambda_p-alpha_p) * log(1.0 / tan(0.5*lambda_p) / tan(0.5*(alpha_p-lambda_p)))));
#endif // if BW_BALL
#ifdef TREND
// Solar phase angle:
double alpha = acos(Sp_b*Ep_b + Sp_c*Ep_c + Sp_a*Ep_a);
// De-trending the brightness curve:
Vmod = Vmod - P_A*alpha;
#endif
if (Nplot > 0)
{
#ifndef MINIMA_TEST
d_Vmod[i] = Vmod + delta_V[0]; //???
#endif
}
else
{
// Filter:
int m = sData[i].Filter;
// Difference between the observational and model magnitudes:
double y = sData[i].V - Vmod;
// printf("%f %f\n",sData[i].V ,Vmod);
sum_y2[m] = sum_y2[m] + y*y*sData[i].w;
sum_y[m] = sum_y[m] + y*sData[i].w;
sum_w[m] = sum_w[m] + sData[i].w;
}
#ifdef NUDGE
// Determining if the previous time point was a local minimum
if (i < i1 + 2)
{
t_old[i-i1] = sData[i].MJD;
V_old[i-i1] = Vmod;
}
else
{
if (V_old[1]>V_old[0] && V_old[1]>=Vmod)
// We just found a brightness minimum (V maximum), between i-2 ... i
{
bool local=0;
for (int ii=0; ii<sp->N_obs; ii++)
// If the model minimum at t_old[1] is within DT_MAX2 days from any observed minimum in sp structure, we mark it as local.
// It can now contribute to the merit function calculations later in the kernel.
if (fabs(t_old[1]-sp->t_obs[ii]) < DT_MAX2)
local = 1;
if (local)
// Only memorising model minima in the vicinity of observed minima (within DT_MAX2 days) - along the time axis:
{
M++; // Counter of model minima in the vicinity of observed minima in t dimension
if (M > M_MAX)
{
// Too many local minima - a fail:
return 1e30;
}
// Using parabolic approximatioin to find the precise location of the local model minimum in the [i-2 ... i] interval
//Fitting a parabola to the three last points:
double a = ((Vmod-V_old[1])/(sData[i].MJD-t_old[1]) - (V_old[1]-V_old[0])/(t_old[1]-t_old[0])) / (sData[i].MJD-t_old[0]);
double b = (V_old[1]-V_old[0])/(t_old[1]-t_old[0]) - a*(t_old[1]+t_old[0]);
double c = V_old[1] - a*t_old[1]*t_old[1] - b*t_old[1];
// Maximum point for the parabola:
t_mod[M-1] = -b/2.0/a;
V_mod[M-1] = a*t_mod[M-1]*t_mod[M-1] + b*t_mod[M-1] + c;
}
}
// Shifting the values:
t_old[0] = t_old[1];
V_old[0] = V_old[1];
t_old[1] = sData[i].MJD;
V_old[1] = Vmod;
}
#endif
#ifdef MINIMA_TEST
if (Nplot > 0)
{
// Determining if the previous time point was a local minimum
if (i < i1 + 2)
{
t_old[i-i1] = sData[i].MJD;
V_old[i-i1] = Vmod;
}
else
{
if (V_old[1]>V_old[0] && V_old[1]>=Vmod)
// We just found a brightness minimum (V maximum), between i-2 ... i
{
//!!! Assumes that the input data always starts from the same point (all_new.dat file):
double t = 58051.044624 + t_old[1];
// Only accepting minima within the time intervals (well) covered by observations:
if (t>=58051.044624 && t<=58051.117754 ||
t>=58051.977665 && t<=58052.185066 ||
t>=58053.078873 && t<=58053.528586 ||
t>=58054.093274 && t<=58054.514202 ||
t>=58055.234145 && t<=58055.354832 ||
t>=58056.181290 && t<=58056.278901)
{
N_minima++;
if (N_minima > MAX_MINIMA)
return -1;
Vmin[N_minima-1] = V_old[1] + delta_V[0];
}
}
// Shifting the values:
t_old[0] = t_old[1];
V_old[0] = V_old[1];
t_old[1] = sData[i].MJD;
V_old[1] = Vmod;
}
}
#endif
#ifdef MIN_DV
if (sData[i].MJD > DV_MARGIN && sData[i].MJD < sData[N_data-1].MJD-DV_MARGIN)
if (Vmod > Vmax)
Vmax = Vmod;
if (Vmod < Vmin)
Vmin = Vmod;
#endif
} // data points loop
} // for (iseg) loop
#ifdef MINIMA_TEST
if (Nplot > 0)
{
if (N_minima == 0)
return 0.0;
float Vbest[7];
// Finding the 7 deepest minima (largest Vmod maxima)
int N_min = 7;
if (N_minima < N_min)
N_min = N_minima;
for (int j=0; j<N_min; j++)
{
float Vmax = -1e30;
int kmax = -1;
for (int k=0; k<N_minima; k++)
{
if (Vmin[k] > Vmax)
{
Vmax = Vmin[k];
kmax = k;
}
} // k loop
if (kmax == -1)
return -1;
Vbest[j] = Vmax; // Memorizing the minimum
Vmin[kmax] = -2e30; // Erasing the minimum we found, so we can search for the next deepest minimum
} // j loop
// Computing the score: number of model minima which are deeper than the same ranking deepest observed minima:
// Full range: from 0 (worst) to 7 (best).
int score = 0;
if (N_minima == 0)
return (CHI_FLOAT)score;
if (Vbest[0]>=25.715) // Feature D
score++;
if (N_minima == 1)
return (CHI_FLOAT)score;
if (Vbest[1]>=25.254) // Feature E
score++;
if (N_minima == 2)
return (CHI_FLOAT)score;
if (Vbest[2]>=25.234) // Feature C
score++;
if (N_minima == 3)
return (CHI_FLOAT)score;
if (Vbest[3]>=25.212) // Feature A
score++;
if (N_minima == 4)
return (CHI_FLOAT)score;
if (Vbest[4]>=24.940) // Feature B
score++;
if (N_minima == 5)
return (CHI_FLOAT)score;
if (Vbest[5]>=24.846) // Feature F
score++;
if (N_minima == 6)
return (CHI_FLOAT)score;
if (Vbest[6]>=24.834) // Feature L
score++;
// Returning score (instead of the usual chi2):
return (CHI_FLOAT)score;
} // if Nplot>0
#endif // MINIMA_TEST
if (Nplot > 0)
return 0.0;
// Computing chi^2
CHI_FLOAT chi2m;
chi2a=0.0;
#ifdef RMSD
CHI_FLOAT SUM_w = 0.0;
#endif
for (m=0; m<N_filters; m++)
{
// Chi^2 for the m-th filter:
chi2m = sum_y2[m] - sum_y[m]*sum_y[m]/sum_w[m];
chi2a = chi2a + chi2m;
// Average difference Vdata-Vmod for each filter (used for plotting):
// In SEGMENT mode, computation is done here, over all the segments, as the model scaling (with its size) is fixed across all the segments
delta_V[m] = sum_y[m] / sum_w[m];
#ifdef RMSD
SUM_w = SUM_w + sum_w[m];
#endif
}
#ifdef RMSD // Computing RMSD:
chi2a = sqrt(chi2a / SUM_w);
#else // Normal case: computing chi^2:
chi2a = chi2a / (N_data - N_PARAMS - N_filters);
#endif
#ifdef NUDGE
// Here we will modify the chi2a value based on how close model minima are to the corresponding observed minima (in 2D - both t and V axes),
// and will punish if the number of model local minima gets too high.
float S_M = 0.0;
float P_tot = 1.0;
for (int imod=0; imod < M; imod++)
// Loop over all detected local model minima
{
for (int iobs=0; iobs < sp->N_obs; iobs++)
// Loop over all the observed minima in sp structure
{
float dt = fabs(t_mod[imod]-sp->t_obs[iobs]);
if (dt < DT_MAX2)
// Only local model minima are processed
{
if (dt > DT_MAX)
// dt is between DT_MAX and DT_MAX2; we use this transition area to punish for too many model minima; it doesn't contribute to nudging
{
// x=0 when data minimum enters the remote (DT_MAX2) vicinity of the iobs observed minimum, and becomes 1 when it enters the close (DT_MAX) vicinity:
float x = (DT_MAX2 - dt) / (DT_MAX2 - DT_MAX);
S_M = S_M + x*x*(-2.0*x+3.0); // Computing the effective number of model minima, using a cubic spline
}
else
// Inside the DT_MAX area
{
S_M = S_M + 1.0;
// !!! Only works properly if N_filters=1 !!!
float dV = V_mod[imod] + delta_V[0] - sp->V_obs[iobs];
#ifdef V1S
// One-sided treatment of dV: if model minimum is below the observed one, keep dV=0 (don't punish). Only punish when dV>0.
// This should promote minima which are at least as deep as the observed ones
if (dV > 0.0)
dV = 0.0;
#endif
// 2D distance of the model minimum from the observed one, with different scales for t and V axes, normalized to DT_MAX and DV_MAX, respectively:
float x = sqrt(dt*dt/DT_MAX/DT_MAX + dV*dV/DV_MAX/DV_MAX);
if (x < 1.0)
// The model minimum is inside the 2D vicinity area near the observed minimum
{
// float P_i = x*x*(-2.0*x+3.0); // Using a cubic spline for a smooth reward function
// Using inverted Lorentzian function instead, with the core radius L_RC=0..1 (L_RC2=L_RC^2)
// It is not perfect (at x=1 the derivative is not perfectly smooth, but good enogh for small L_RC)
float P_i = L_A * x*x/(x*x + L_RC2);
// Computing the cumulative reward function based on how close model minima are to observed ones.
// 0<P_MIN<1 sets how strong the reward is (the closer to 0, the stronger)
P_tot = P_tot * (P_MIN*(1.0 + P_MIN2*P_i));
}
}
}
}
}
P_tot = powf(P_tot, 1.0/sp->N_obs); // Normalizing the reward to the number of observed minima
// P_tot is the reward factor for how close all observed minima are to model minima. It varies between P_MIN (likely a perfect match) to 1 (no match)
if (P_tot < P_MIN)
// This might happen if there is more than one model minimum per observed one; we don't want to encourage that:
P_tot = P_MIN;
if (chi2a > CHI2_1)
P_tot = 1.0;
else if (chi2a > CHI2_0)
{
float x = (CHI2_1 - chi2a) / (CHI2_1 - CHI2_0);
float beta = x*x*(-2*x+3); // Using cubic spline for a smooth transition from the chi2a>CHI2_1 mode (P_tot=1) to chi2a<CHI2_0 mode (full P_tot)
P_tot = powf(P_tot, beta);
}
float P_M;
if (S_M < M_MAX2)
P_M = 1.0;
else if (S_M < M_MAX)
{
float x = (S_M-M_MAX2) / (M_MAX-M_MAX2);
// ??? I could introduce a const parameter to regulate the strength of the punishment
P_M = 1.0 + 3*x*x*(-2*x+3); // Using cubic spline to smoothen the punishment function for too many model minima; varies from 1 (no punishment) to 4 (maximum punishment)
}
else
P_M = 4.0; // Will need to change if the strength of punishment is an ajustable parameter
// !!! Need to fix edge effects!!!
// Also, should only reward when chi2 is good enough
// Applying the reward and the punishment to chi2:
chi2a = chi2a * P_tot * P_M;
#endif
#ifdef MIN_DV
double x = (Vmax-Vmin-DV_MIN1)/(DV_MIN2-DV_MIN1);
double P;
if (x < 0.0)
P = 1.0;
else if (x < 1.0)