From 60ec64c5648d67c164c6a0d3bc8300f36c12d7a5 Mon Sep 17 00:00:00 2001 From: jscparker Date: Sat, 8 Dec 2018 00:25:01 +0000 Subject: [PATCH] add arbitrary --- arbitrary/README.arbitrary | 87 + arbitrary/e_deriv_tst_1.adb | 596 +++ arbitrary/e_derivs.adb | 1051 ++++ arbitrary/e_derivs.ads | 239 + arbitrary/e_function_demo_1.adb | 2033 ++++++++ arbitrary/e_jacobi_eigen.adb | 304 ++ arbitrary/e_jacobi_eigen.ads | 124 + arbitrary/e_jacobi_eigen_demo_1.adb | 531 ++ arbitrary/e_jacobi_eigen_tst_1.adb | 127 + arbitrary/e_real_demo_1.adb | 702 +++ arbitrary/e_real_io_tst_2.adb | 200 + arbitrary/e_real_tst_1.adb | 435 ++ arbitrary/e_real_tst_2.adb | 459 ++ arbitrary/extended_real-e_rand.adb | 109 + arbitrary/extended_real-e_rand.ads | 24 + .../extended_real-elementary_functions.adb | 2033 ++++++++ .../extended_real-elementary_functions.ads | 97 + arbitrary/extended_real-io.adb | 704 +++ arbitrary/extended_real-io.ads | 107 + arbitrary/extended_real.adb | 4268 +++++++++++++++++ arbitrary/extended_real.ads | 752 +++ 21 files changed, 14982 insertions(+) create mode 100644 arbitrary/README.arbitrary create mode 100644 arbitrary/e_deriv_tst_1.adb create mode 100644 arbitrary/e_derivs.adb create mode 100644 arbitrary/e_derivs.ads create mode 100644 arbitrary/e_function_demo_1.adb create mode 100644 arbitrary/e_jacobi_eigen.adb create mode 100644 arbitrary/e_jacobi_eigen.ads create mode 100644 arbitrary/e_jacobi_eigen_demo_1.adb create mode 100644 arbitrary/e_jacobi_eigen_tst_1.adb create mode 100644 arbitrary/e_real_demo_1.adb create mode 100644 arbitrary/e_real_io_tst_2.adb create mode 100644 arbitrary/e_real_tst_1.adb create mode 100644 arbitrary/e_real_tst_2.adb create mode 100644 arbitrary/extended_real-e_rand.adb create mode 100644 arbitrary/extended_real-e_rand.ads create mode 100644 arbitrary/extended_real-elementary_functions.adb create mode 100644 arbitrary/extended_real-elementary_functions.ads create mode 100644 arbitrary/extended_real-io.adb create mode 100644 arbitrary/extended_real-io.ads create mode 100644 arbitrary/extended_real.adb create mode 100644 arbitrary/extended_real.ads diff --git a/arbitrary/README.arbitrary b/arbitrary/README.arbitrary new file mode 100644 index 0000000..ee8e10c --- /dev/null +++ b/arbitrary/README.arbitrary @@ -0,0 +1,87 @@ +-- +-- directory Arbitrary contains +-- A collection of arbitrary precision floating-point routines: +-- arithmetic, elementary functions, IO, and demos. +-- +-- package Extended_Real provides: +-- An arbitrary precision floating-point data type: e_Real. +-- +-- Lower limit is 28 decimals, no upper limit is enforced. +-- +-- All internal arithmetic is done on 64-bit Integers, so its most +-- efficient on 64-bit CPU's. The package is Pure. +-- Floating point attributes (Ada 95) are implemented as function calls. +-- The package exports standard floating point operators: +-- "*", "+", "/", "**", "Abs", "<", ">", "<=" , ">=", etc. +-- The standard operators make it easy to modify existing code to use +-- extended precision arithmetic. Procedure calls Mult(X,Y) and Square(X) +-- are also provided. They do multiplication "in-place", (overwrite +-- X with the result) and are somewhat faster than the equivalent X := X*Y, +-- and X := X*X. +-- +-- package Extended_Real.Elementary_Functions provides: +-- Sin, Cos, Sqrt, Arcsin, Arccos, Arctan, Log, Exp, Reciprocal (1/x), +-- Reciprocal_Nth_Root (x to the power of -1/N), Divide, and "**" for +-- extended arguments and exponents. Routines are Ada 95'ish. +-- +-- package Extended_Real.IO provides: +-- Text to extended-precision e_Real translation routines, and +-- e_Real to Text translation routines. +-- +-- package e_Derivs provides: +-- Extended precision routines for taking high order derivatives of +-- functions. Functions constructed from "*", "+", "/", "**", Sin, +-- Cos, Sqrt, Arcsin, Arccos, Arctan, Log, Exp, Compose = f(g(x)), +-- and Reciprocal can be differentiated to order specified by user. +-- +-- package Extended_Real.Rand provides: +-- a basic Random Number Generator. +-- +-- procedure e_real_demo_1.adb is: +-- an introductory routine that demonstrates use of Extended_Real. +-- +-- procedure e_function_demo_1.adb is: +-- an introductory routine that demonstrates use of +-- Extended_Real.Elementary_Functions. +-- +-- procedure e_jacobi_eigen_demo_1.adb demonstrates: +-- extended-precision eigen-decomposition on Hilbert's matrix using +-- package e_Jacobi_Eigen. +-- +-- package e_Jacobi_Eigen is: +-- a Jabobi iterative eigen-decomposition routine. +-- (Demonstrates how easy it is to upgrade a floating point routine +-- to extended precision.) e_Jacobi_Eigen uses Extended_Real. +-- +-- A decent optimization on gcc/GNAT is usually provided by: +-- gnatmake -gnatnp -O3 -march=native -funroll-loops xxx.adb +-- +-- Always do a preliminary run which exercises Assertions, and other Checks: +-- gnatmake -Wall -gnatwa -gnatVa -gnata -gnato -fstack-check -gnateE xxx.adb +-- +-- Because precision is arbitrary, Extended_Real is not specially +-- optimized for any particular precision. The present design works best +-- in the limit of 100's of decimal digits. +-- +-- Common applications: +-- 0. Estimation of error in lower precision floating-point calculations. +-- 1. Evaluation of constants for math routines and Table-driven algorithms. +-- 2. Evaluation of series solutions of special function, especially when +-- the terms are constructed of large factorials and exponentials. +-- 3. Evaluation of recurrance relations for special functions. +-- +-- Generics greatly reduce the work you have to do in modifying programs +-- to use extended floating point: +-- +-- 1. place generic formal declarations +-- of the required extended arithmetic functions at the the top of the +-- package or subprogram to be modified. +-- +-- 2. use the unary "-" and "+" routines that convert Real to Extended: +-- +-- so that declarations +-- Number : Generic_Formal_Type := +1.234; +-- and statements like +-- Z := (+4.567834E+012) * X; +-- will be acceptible to both Real and Extended types. +-- diff --git a/arbitrary/e_deriv_tst_1.adb b/arbitrary/e_deriv_tst_1.adb new file mode 100644 index 0000000..38db439 --- /dev/null +++ b/arbitrary/e_deriv_tst_1.adb @@ -0,0 +1,596 @@ + + +with e_Derivs; +with Ada.Numerics.Generic_Elementary_Functions; +with Extended_Real; +with Extended_Real.Elementary_Functions; +with Extended_Real.IO; +with Text_IO; use Text_IO; + +procedure e_deriv_tst_1 is + + type Real_8 is digits 15; + + package mth is new Ada.Numerics.Generic_Elementary_Functions (Real_8); + use mth; + package ext is new Extended_Real (Real_8); + use ext; + package fnc is new ext.Elementary_Functions (Sqrt, Log, Exp, Arcsin); + use fnc; + package eio is new ext.IO; -- extented IO + use eio; + + + subtype Real is e_Real; + + Max_Order_Of_Deriv : constant Positive := 40; + + package dif is new e_Derivs (Max_Order_Of_Deriv, Real, Real_8); + use dif; + + w, Phase : Real := +1.0; + Time, t : Real; + + FullDuration : constant Real := +1.0; + No_Of_Steps : constant Integer := 100; + + DeltaT : Real := FullDuration / (+Real_8(No_Of_Steps)); + + G, H : Derivatives := (others => +0.0); + + Deriv0, Deriv1, Deriv2, Deriv3, Deriv4, Deriv5, Deriv6 : Real; + Error : Array(Deriv_Index) of Real := (others => +0.0); + Max_Error : Real; + + Three : constant Real := +3.0; + +begin + +-- REMEMBER, Functions return REDUCED derivatives. + +--********************************************************************* +-- Test 1. H = Exp_d(Sin_d(t)) +--********************************************************************* +-- H = f(g(t)) = Exp(Sin(t)) +-- d^1 H = Cos(t)*Exp(Sin(t)) +-- d^2 H = (-Sin(t) + Cos(t)**2) * Exp(Sin(t)) +-- d^3 H = (-Cos(t) -3*Sin(t)*Cos(t) + Cos(t)**3) * Exp(Sin(t)) +-- d^4 H = ((-Cos(t) -3*Sin(t)*Cos(t) + Cos(t)**3) * Cos(t) + +-- (Sin(t) - 3*Cos(t)**2 + 3*Sin(t)**2 - 3*Sin(t)*Cos(t)**2))*Exp(Sin(t)) + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; + t := Time; + + G := Sin_d (t); + H := Compose (Exp_d (G(0)), G); -- Exp (Sin (t)) + + Deriv0 := Exp (Sin (t)); + + Deriv1 := Cos(t) * Exp(Sin(t)); + + Deriv2 := (-Sin(t) + Cos(t)**2) * Exp(Sin(t)); + + Deriv3 := (-Cos(t) - Three*Sin(t)*Cos(t) + Cos(t)**3) * Exp(Sin(t)); + + Deriv4 := ((-Cos(t) -Three*Sin(t)*Cos(t) + Cos(t)**3) * Cos(t) + + (Sin(t) - Three*Cos(t)**2 + Three*Sin(t)**2 + - Three*Sin(t)*Cos(t)**2))*Exp(Sin(t)); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + + for I in 0..4 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 1: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 2. H = Sin_d(t**3) +--********************************************************************* +-- H = f(g(t)) = Sin(t**3) +-- d^1 H = 3*t**2 * Cos(t**3) +-- d^2 H = 6*t * Cos(t**3) - 9*t**4 * Sin(t**3) + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; + + G := Time ** 3; + H := Compose (Sin_d (G(0)), G); -- Sin (Time**3) + + Deriv0 := Sin(Time**3); + Deriv1 := (+3.0)*Time**2 * COS(Time**3); + Deriv2 := (+6.0)*Time*Cos(Time**3) - (+9.0)*Time**4*Sin(Time**3); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + + for I in 0..2 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 2: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 3. H = Sin_d(t)**5 +--********************************************************************* +-- H = g(t)**5 = Sin(t)**5 +-- d^1 H = 5 * Cos(t) * Sin(t)**4 +-- d^2 H = 20 * Cos(t)**2 * Sin(t)**3 - 5 * Sin(t)**5 +-- d^3 H = -40 * Cos(t) * Sin(t)**4 + 60 * Cos(t)**3 * Sin(t)**2 +-- - 25 * Cos(t) * Sin(t)**4 + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Sin_d(Time) ** 5; + + Deriv0 := Sin(t)**5; + Deriv1 := (+5.0) * Cos(t) * Sin(t)**4 ; + Deriv2 := (+20.0) * Cos(t)**2 * Sin(t)**3 - (+5.0) * Sin(t)**5; + Deriv3 := (-40.0) * Cos(t) * Sin(t)**4 + (+60.0) * Cos(t)**3 * Sin(t)**2 + - (+25.0) * Cos(t) * Sin(t)**4; + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + + for I in 0..3 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 3: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 4. H = t**4 +--********************************************************************* +-- H = = t**4 +-- d^1 H = 4 * t**3 +-- d^2 H = 12 * t**2 +-- d^3 H = 24 * t +-- d^4 H = 24 +-- d^5 H = 0 +-- d^6 H = 0 + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := t ** 4; + + Deriv0 := t ** 4; + Deriv1 := (+4.0) * t**3; + Deriv2 := (+12.0) * t**2; + Deriv3 := (+24.0) * t; + Deriv4 := +24.0; + Deriv5 := Zero; + Deriv6 := Zero; + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + Error(5) := Abs (H(5) - Deriv5); + Error(6) := Abs (H(6) - Deriv6); + + for I in 0..6 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 4: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 5. H = Sin_d (w * t + phase) +--********************************************************************* +-- H = Sin (w * t + phase) +-- d^1 H = w**1 * Cos (w * t + phase) +-- d^2 H = -w**2 * Sin (w * t + phase) +-- d^3 H = -w**3 * Cos (w * t + phase) +-- d^4 H = w**4 * Sin (w * t + phase) +-- d^5 H = w**5 * Cos (w * t + phase) +-- d^6 H = -w**6 * Sin (w * t + phase) + + Max_Error := Zero; + w := +0.92345; + phase := +0.34567; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Sin_d (t, w, phase); + + Deriv0 := Sin (w * t + phase); + Deriv1 := w**1 * Cos (w * t + phase); + Deriv2 := -w**2 * Sin (w * t + phase); + Deriv3 := -w**3 * Cos (w * t + phase); + Deriv4 := w**4 * Sin (w * t + phase); + Deriv5 := w**5 * Cos (w * t + phase); + Deriv6 := -w**6 * Sin (w * t + phase); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + Error(5) := Abs (H(5) - Deriv5); + Error(6) := Abs (H(6) - Deriv6); + + for I in 0..6 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 5: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 6. H = Log_d (w * t + phase). +-- This also test Reciprocal : Real -> Derivative. +--********************************************************************* + + Max_Error := Zero; + w := +0.22345; + phase := +0.34567; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Log_d (t, w, phase); + + Deriv0 := Log (w * t + phase); + Deriv1 := w**1 / (w * t + phase); + Deriv2 := -w**2 / (w * t + phase)**2; + Deriv3 := (+2.0) * w**3 / (w * t + phase)**3; + Deriv4 := (-6.0) * w**4 / (w * t + phase)**4; + Deriv5 := (+24.0) * w**5 / (w * t + phase)**5; + Deriv6 := (-120.0) * w**6 / (w * t + phase)**6; + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + Error(5) := Abs (H(5) - Deriv5); + Error(6) := Abs (H(6) - Deriv6); + + for I in 0..6 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 6: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 7. H = Cos_d (w * t + phase) +--********************************************************************* + Max_Error := Zero; + w := +0.92345; + phase := +0.34567; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Cos_d (t, w, phase); + + Deriv0 := Cos (w * t + phase); + Deriv1 := -w**1 * Sin (w * t + phase); + Deriv2 := -w**2 * Cos (w * t + phase); + Deriv3 := w**3 * Sin (w * t + phase); + Deriv4 := w**4 * Cos (w * t + phase); + Deriv5 := -w**5 * Sin (w * t + phase); + Deriv6 := -w**6 * Cos (w * t + phase); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + Error(5) := Abs (H(5) - Deriv5); + Error(6) := Abs (H(6) - Deriv6); + + for I in 0..6 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 7: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 8. H = Sin_d / Cos_d; +--********************************************************************* + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Sin_d (t) / Cos_d (t); + + Deriv0 := Sin(t) / Cos(t); + Deriv1 := (+1.0) / Cos(t)**2; + Deriv2 := (+2.0) * Sin(t) / Cos(t)**3; + Deriv3 := (+2.0) / Cos(t)**2 + (+6.0) * Sin(t)**2 / Cos(t)**4; + Deriv4 := (+4.0) * Sin(t) / Cos(t)**3 + (+12.0) * Sin(t) / Cos (t)**3 + + (+24.0) * Sin(t)**3 / Cos(t)**5; + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + Error(4) := Abs (H(4) - Deriv4); + + for I in 0..4 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 8: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 9. H = Sqrt_d; +--********************************************************************* + Max_Error := Zero; + DeltaT := One / (+(No_Of_Steps + 1)); + + for I in 2..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; t := Time; + + H := Sqrt_d (t); + + Deriv0 := Sqrt(t); + Deriv1 := (+0.5) / Sqrt (t); + Deriv2 := -(+0.5)*(+0.5)/ ((t) * Sqrt (t)); + Deriv3 := (+0.5)*(+0.5)*(+0.5)*(+3.0)/ (t * t * Sqrt (t)); + --Deriv4 := -(+0.5)*(+0.5)*(+0.5)*(+0.5)*(+3.0)*(+5.0)/ (t * t * t * Sqrt (t)); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + --Error(4) := Abs (H(4) - Deriv4); + + for I in 0..3 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 9: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 10. H = Arcsin_d; tests Compose and "/". +--********************************************************************* + Max_Error := Zero; + DeltaT := One / (+Real_8(No_Of_Steps + 7)); + + for I in 1..No_Of_Steps loop + + Time := (+Real_8 (I)) * DeltaT; + t := Time; + + H := Arcsin_d (t); + + Deriv0 := Arcsin(t); + Deriv1 := One / Sqrt (One - t*t); + Deriv2 := t / ((One - t*t) * Sqrt (One - t*t)); + Deriv3 := One / ((One - t*t) * Sqrt (One - t*t)) + + (+3.0)*t*t / ((One - t*t)*(One - t*t) * Sqrt (One - t*t)); + + Deriv4 := (+9.0)* t / ((One - t*t)*(One - t*t)*Sqrt (One - t*t)) + + (+3.0)*(+5.0)*t*t*t / ((One - t*t)*(One - t*t)*(One - t*t)*Sqrt (One - t*t)); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + --Error(4) := Abs (H(4) - Deriv4); + + for I in 0..3 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 10: "); + put (e_Real_Image (Max_Error, Aft => 20)); + + +--********************************************************************* +-- Test 11. H = Arctan_d; tests Compose and "/". +--********************************************************************* + Max_Error := Zero; + DeltaT := One / (+(No_Of_Steps + 7)); + + for I in 1..No_Of_Steps loop + + Time := (+Real_8(I)) * DeltaT; + t := Time; + + H := Arctan_d (t); + + Deriv0 := Arcsin (t / Sqrt (One + t*t)); + Deriv1 := One / (One + t*t); + Deriv2 := -(+2.0) * t / ((One + t*t)**2); + Deriv3 := -(+2.0) / ((One + t*t)**2) + + (+8.0)*t*t / ((One + t*t)**3); + + Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs (H(2) - Deriv2); + Error(3) := Abs (H(3) - Deriv3); + + for I in 0..3 loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 11: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 12. H = Exp_d (Log_d (t)) +-- +-- Test the reduced derivs. Un-reduced get too large. +--********************************************************************* +-- H = f(g(t)) = Exp_d (Log_d (t)) +-- d^1 H = One +-- d^2 H = Zero + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I)) * (+0.5) + (+5.0); + t := Time; + + G := Log_d (t); + H := Compose (Exp_d (G(0)), G); -- Exp (Log (t)) + + Deriv0 := t; + + Deriv1 := One; + + Deriv2 := Zero; + + --Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs ((+2.0)*H(2) - Deriv2); + + for I in 3..Max_Order_Of_Deriv loop + Error(I) := Abs (H(I)); + end loop; + + for I in 0..Max_Order_Of_Deriv loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 12: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +--********************************************************************* +-- Test 13. H = Sin_d (Arcsin_d (t)) +-- +-- Test the reduced derivs. Un-reduced get too large. +--********************************************************************* +-- H = f(g(t)) = Sin_d (Arcsin_d (t)) +-- d^1 H = One +-- d^2 H = Zero + + Max_Error := Zero; + + for I in 0..No_Of_Steps loop + + Time := (+Real_8 (I + 1)) * DeltaT / (+Real_8 (I + 2)); + t := Time; + + G := Arcsin_d (t); + H := Compose (Sin_d (G(0)), G); -- Sin_d (Arcsin_d (t)) + + Deriv0 := t; + + Deriv1 := One; + + Deriv2 := Zero; + + --Un_Reduce (H); + + Error(0) := Abs (H(0) - Deriv0); + Error(1) := Abs (H(1) - Deriv1); + Error(2) := Abs ((+2.0)*H(2) - Deriv2); + + for I in 3..Max_Order_Of_Deriv loop + Error(I) := Abs (H(I)); + end loop; + + for I in 0..Max_Order_Of_Deriv loop + if Error(I) > Max_Error then + Max_Error := Error(I); + end if; + end loop; + + end loop; + new_line; put("Max error, test 13: "); + put (e_Real_Image (Max_Error, Aft => 20)); + +end; diff --git a/arbitrary/e_derivs.adb b/arbitrary/e_derivs.adb new file mode 100644 index 0000000..fcc318e --- /dev/null +++ b/arbitrary/e_derivs.adb @@ -0,0 +1,1051 @@ + +----------------------------------------------------------------------- +-- package body e_Derivs, high-order automatic differentiation of functions +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + +with Ada.Numerics; + +package body e_Derivs is + + pragma Assert (Deriv_Index'Last > 1); + + Real_Val, Factorial : Derivatives; + -- global constant arrays, initialized after the begin statement. + + Zero : constant Real := (+0.0); + One : constant Real := (+1.0); + Two : constant Real := (+2.0); + Half : constant Real := (+0.5); + + -------------------------- + -- Make_Constant_Arrays -- + -------------------------- + + -- Try to minimize inexact arithmetic. + + procedure Make_Constant_Arrays is + begin + + Real_Val(0) := Zero; + for I in 1..Deriv_Index'Last loop + Real_Val(I) := Real_Val(I-1) + One; + end loop; + + Factorial(0) := One; + for I in 1..Deriv_Index'Last loop + Factorial(I) := Real_Val(I) * Factorial(I-1); + end loop; + + end Make_Constant_Arrays; + + --------------- + -- Un_Reduce -- + --------------- + + -- Multiply by (Order of Derivative) factorial to make reduced derivative + -- into ordinary Derivative. + + procedure Un_Reduce (F : in out Derivatives) is + begin + for I in 2..Deriv_Index'Last loop + F(I) := F(I) * Factorial(I); + end loop; + end Un_Reduce; + + ------------------ + -- Make_Reduced -- + ------------------ + + -- Divide by (Order of Derivative) factorial to make true derivative into + -- Leibnitz reduced Derivative. + + procedure Make_Reduced (F : in out Derivatives) is + begin + for I in 2..Deriv_Index'Last loop + F(I) := F(I) / Factorial(I); + end loop; + end Make_Reduced; + + --------- + -- "+" -- + --------- + + function "+" (F, G : in Derivatives) return Derivatives is + Result : Derivatives; + begin + for I in Deriv_Index loop + Result(I) := F(I) + G(I); + end loop; + return Result; + end "+"; + + --------- + -- "-" -- + --------- + + function "-" (F, G : in Derivatives) return Derivatives is + Result : Derivatives; + begin + for I in Deriv_Index loop + Result(I) := F(I) - G(I); + end loop; + return Result; + end "-"; + + --------- + -- "*" -- + --------- + + -- Derivatives, up to n, of H(t) := F(t) * G(t). Notation: (d/dt)**n = d^n. + -- so write (d/dt)**n (F(t) * G(t)) == d^n (F*G) = d^n (H). + -- Returns the Leibnitz Reduced derivative of H. This routine assumes + -- that F and G are already in reduced form (i.e. (d/dt)**k F(t) / k! is + -- the k-th reduced derivative of F, and is stored in the k-th place of + -- array F). Notice that this means that the j-th reduced derivative of + -- H is BackwardDotProduct (F, G) := SUM(k in 0..j){F(k) * G(j-k)}. The + -- pascal coeffients are cancelled out by the factorials in the reduced + -- derivatives. If you need all derivatives from 0 to n, then this + -- routine is the most efficient way. + -- If you just need the n-th derivative it might be faster to avoid + -- Leibnitz reduced derivatives. + -- + function "*" (F, G : in Derivatives) return Derivatives is + H : Derivatives := (others => Zero); + Sum : Real := Zero; + begin + for Order in Deriv_Index loop + Sum := Zero; + for I in 0..Order loop + Sum := Sum + F(I) * G(Order - I); + end loop; + H(Order) := Sum; + end loop; + + return H; + + end "*"; + + ------------- + -- Compose -- + ------------- + + -- Routine gets derivatives of composition of F and G: H(t) := (F(G(t)). + -- Want all derivatives up to n. Notation: the j-th derivative of H is + -- written d^j (H) := (d/dt)**j H(t), and the j-th derivative of F is written + -- Fj. To get recursive formula for d^n (H), use + -- d^n (H) := d^(n-1) (G1 * F1(G(t))). + -- Next use the formula for the (n-1)th derivative of the product of + -- G1 and F1(G(t)). then have a formula for the n-th derivative + -- of F(G(t)) in terms of the (n-1)th derivative of the composition of 2 + -- functions, F1 and G: F1(G(t)). Can do it with recursive calls, but + -- here calculate the intermediate quantities explicitly, so that inner + -- inner loops are optimized. + -- Suppose want d^n H(t) := d^n (F(G(t)). Start with + -- + -- d^0 Fn-0(G(t)) (j=n-0) + -- d^0 Fn-1(G(t)) d^1 Fn-1(G(t)) (j=n-1) + -- d^0 Fn-2(G(t)) d^1 Fn-2(G(t)) d^2 Fn-2(G(t)) (j=n-2) + -- + -- .... + -- + -- d^0 F1(G(t)) d^1 F1(G(t)) d^2 F1(G(t)) ... d^n-1 F1(G(t)) (j=1) + -- d^0 F0(G(t)) d^1 F0(G(t)) d^2 F0(G(t)) ... d^n-1 F0(G(t)) (j=0) + -- + -- with a missing d^n F0(G(t)) in the final row and column. The final row + -- is of course the desired result. + -- + -- Each row above is calculated from the previous row by the recursive formula + -- given above. For example, once have the j=1 row can get the + -- desired result using: + -- + -- d^n (H) = d^(n-1) (G1 * F1(G(t))) := + -- + -- n-1 + -- = SUM { d^(n-1-k) G1 * d^k (F1(G(t)) * Coeff(n-1,k) } + -- k=0 + -- + -- Above, Coeff(n-1,j) is the Pascal's triangle coefficient (n-1)!/k!(n-1-k)!. + -- Can remove this entirely from the sum by using Leibnitz reduced form: + -- divide each argument of d^j by j!. Then multiply the final sum by (n-1)! + -- to get the true derivative. Avoid the final multiplication by (n-1)! if + -- you want the reduced derivative of H. (Still need to multiply the result + -- by a factor of 1/n to make d^n (H) the reduced derivative of H.) + -- This saves much time if you only rarely convert from the reduced form to the + -- true derivatives. To do it this way, must put G1 into reduced + -- form. are given F and G in reduced form. Must shift G array + -- down one place, and multiply the components by (k+1) (k is the index) to + -- adjust the 1/(k+1)! term to 1/k!. If d^k (F1(G(t)) is the reduced + -- derivative of the argument F1(G(t), then the formula for the reduced + -- derivatives of H := F(G(t)) becomes, + -- + -- n-1 + -- d^n (H) = SUM { d^(n-1-k) G1 * d^k (F1(G(t)) } / n + -- k=0 + -- + -- == SUM { G1(n-1-k) * DF(1,k) } / n + -- + -- Below the array DF(j,k) will hold d^k Fj(G(t)). For example, the final row + -- above will be placed into DF(1,k), where k is in 0..n-1. Row j of DF(j,k) + -- is calculated from row j+1 by setting H := Fj(G(t)) using + -- + -- k-1 + -- d^k (H) = SUM { d^(k-1-i) G1 * d^i (Fj+1(G(t)) } * (j+1) / k + -- i=0 + -- + -- The j+1 term comes from the fact that Fj is also reduced, and when we + -- took the true derivative of it, had to divide and multiply by j+1 to + -- keep it a reduced derivative. Notice that the affect of this repeated + -- multiplication of Fj+1 by j+1 is simply to unreduce F. If F had been + -- introduced as unreduced to begin with, then the formula would be identical + -- but without the j+1 factor. So its both more efficient and more accurate + -- to unreduce F at the beginning rather than along the way. If do that + -- then the final formula for the reduced derivative d^k Fj(G(t)) becomes: + -- + -- d^k (H) := SUM { G1(k-1-i) * DF(j+1,i) } / k := DF(j,k) + -- + + function Compose + (F, G : in Derivatives) + return Derivatives + is + G1, F0, H : Derivatives; + DF : array (Deriv_Index, Deriv_Index) of Real; + Col, J, K : Deriv_Index; + Sum : Real; + begin + -- The reduced derivatives of F and G are placed in arrays F and G. Array + -- F holds the reduced derivatives of function F evaluated at G(t), not + -- the derivatives of F(G(t)), which want to calculate. now need the + -- array of reduced derivatives of the 1st derivative of G, called G1, and + -- need to unreduce F, as explained above. + + for I in 1..Deriv_Index'Last loop + G1(I-1) := G(I) * Real_Val(I); + end loop; + for I in Deriv_Index loop + F0(I) := F(I) * Factorial(I); + end loop; + + --************************************************************************ + -- Get the first Column of DF, the ones with the 0-th derivatives of Fk(G(t)). + -- The col j is constant at 0, and k varies from Max_Order_Of_Deriv to 0. + --************************************************************************ + Col := 0; + for Row in Deriv_Index loop + k := Deriv_Index'Last - Row; + DF(k, Col) := F0(k); + end loop; + + --************************************************************************ + -- Get the next rows of DF, 1..Max_Order_Of_Deriv. Row and column refer to the + -- Rows and Cols of the table above. k and j are the indices of DF, which + -- match the k and j in d^k Fj(G(t)). + --************************************************************************ + for Row in 1..Deriv_Index'Last loop + j := Deriv_Index'Last - Row; + for Col in 1..Row loop + k := Col; + + --************************************************************************ + -- Calculate d^k Fj(G(t)) := DF(j,k) given DF(j+1, k). + -- SUM { G1(k-1-i) * DF(j+1,k) } / k. + --************************************************************************ + Sum := Zero; + for i in 0..k-1 loop + Sum := Sum + G1(k-1-i) * DF(j+1,i); + end loop; + DF(j,k) := Sum / Real_Val (k); -- "/" always more accurate, and k>0 + --DF(j,k) := Sum * Inverse_Of(k); + + end loop; + end loop; + + --************************************************************************ + -- The desired derivatives are the final row := Max_Order_Of_Deriv, (j=0) + -- of DF. This is a reduced form. + --************************************************************************ + for I in Deriv_Index loop + H(I) := DF(0, I); + end loop; + + return H; + + end Compose; + + ---------------- + -- Reciprocal -- + ---------------- + + -- Returns reduced derivatives of 1 / F(t), given reduced derivs of F. + -- Just use composition of A(t) = 1/t with F(t): H(t) = A(F(t)) = 1/F(t), + -- so H(t) = Compose (Reciprocal, F). + -- + function Reciprocal (F : in Derivatives) return Derivatives is + begin + if F(0) = Zero then + raise d_Argument_Error; + end if; + + return Compose (Reciprocal (F(0)), F); + + end Reciprocal; + + --------- + -- "/" -- + --------- + + -- Returns reduced derivatives of F(t) / G(t), given reduced derivs of F, G. + -- Just use: H(t) = F(t) * Reciprocal (G(t)). + -- + function "/" (F, G : in Derivatives) return Derivatives is + begin + if G(0) = Zero then + raise d_Argument_Error; + end if; + + return F * Reciprocal (G); + + end "/"; + + ---------- + -- "**" -- + ---------- + + -- Get reduced derivatives of F(t) to an integer Power: F(t)**n. + -- H(t) = En(F(t)) where En(t') = (t')**n, so that H(t) = F(t)**n. So use + -- Compose (En, F), where En = F(0)**n. + -- (or H(t) = Pow_n(F(t)). + + function "**" + (F : in Derivatives; + Exponent : in Natural) + return Derivatives + is + begin + + return Compose (F(0)**Exponent, F); + + end "**"; + + ---------- + -- "**" -- + ---------- + + -- Get reduced derivatives of Time to a non-negative integer Power. + -- Follow the Ada standard: Zero**0 is One by definition. + -- Anything to the 0 is defined to be 1. + -- (Reduced derivs are just pascals triangle * t**n.) + + function "**" + (Time : in Real; + Exponent : in Natural) + return Derivatives + is + Derivs : Derivatives := (others => Zero); + Power_Of_t : Derivatives := (others => Zero); + Coeff : Derivatives; + Exp_Of_t, Exp : Integer := Exponent; + Highest_Order : Deriv_Index := Deriv_Index'Last; + begin + + if Exponent = 0 then + Derivs (0) := One; -- Even Zero**0 is 1. All derivs are Zero. + return Derivs; + end if; + + -- Exponent > 0, so Time=0 implies Time**N = Zero, but the N-th deriv is N! + -- The N-th deriv in reduced form is One. All other derivs are Zero. + + if Time = Zero then + if Exponent <= Integer (Deriv_Index'Last) then + Derivs (Deriv_Index (Exponent)) := One; -- All other init to Zero. + end if; + return Derivs; -- already reduced. + end if; + + -- Now know that Exponent > 0, and Time /= Zero. + -- Of course the high powers of time can under/overflow. + + Exp_Of_t := Exponent; + Power_Of_t(0) := Time ** Exp_Of_t; + Highest_Order := Deriv_Index'Last; + + for I in 1..Deriv_Index'Last loop + Exp_Of_t := Exp_Of_t - 1; + if Exp_Of_t > 0 then + Power_Of_t(I) := Time ** Exp_Of_t; -- slow but better accuracy. + else + Power_Of_t(I) := One; + Highest_Order := I; + exit; + end if; + end loop; + + + -- Coeff has been initialized to Zero: + + Coeff (0) := One; + Exp := Exponent; + + for I in 1..Highest_Order loop + if Exp > 0 then + Coeff(I) := Coeff(I-1) * (+Real_8 (Exp)); + else + exit; + end if; + Exp := Exp - 1; + end loop; + + Derivs(0) := Power_Of_t(0); + for I in 1..Highest_Order loop + Derivs(I) := Power_Of_t(I) * Coeff(I); + end loop; + + Make_Reduced (Derivs); + + return Derivs; + + end "**"; + + ---------------- + -- Reciprocal -- + ---------------- + + -- Get reduced derivatives of One / Time. Remember, high order derivs go as + -- (1/Time)**Order. Easy get underflow/overflow if Time too large or small. + -- + function Reciprocal + (Time : in Real) + return Derivatives + is + Result : Derivatives; + Coeff : Real := One; + begin + + if Time = Zero then + raise d_Argument_Error; + end if; + + Result (0) := One / Time; + for I in 1..Deriv_Index'Last loop + Coeff := -Coeff; + Result (I) := Coeff / Time ** (Integer(I+1)); --detectable accuracy imprv. + --Result (I) := -Result (I-1) / Time; + end loop; + -- Notice that neglect the I! coefficient of Result, which is + -- lost when Result(I) is divided by I! to get reduced deriv. + + return Result; + + end Reciprocal; + + ------------ + -- Sqrt_d -- + ------------ + + function Sqrt_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) + return Derivatives + is + Result : Derivatives; + Arg : Real; + begin + + Arg := Time * Frequency + Phase; + + -- Can't take first deriv at 0.0: + + if Arg = Zero then + raise d_Argument_Error; + end if; + + Result (0) := Sqrt (Arg); + + Result(1) := Result (0) * Half * Frequency / Arg; + + for I in Deriv_Index'First+2 .. Deriv_Index'Last loop + Result(I) := -Result (I-1) * (Real_Val (I-1) - Half) * Frequency; + Result(I) := Result (I) / (Arg * Real_Val (I)); + end loop; + + -- reduced by the / Real_Val (I) above. + + return Result; + + end Sqrt_d; + + -------------- + -- Arctan_d -- + -------------- + + -- Reduced derivs of Arctan. + -- + function Arctan_d + (Time : in Real) + return Derivatives + is + Result : Derivatives; + One_Plus_Arg_Squared : Derivatives := (others => Zero); + One_Over : Derivatives; + Arg : constant Real := Time; + begin + + -- Reduced derivatives (divide by order of derivative factorial): + One_Plus_Arg_Squared (Deriv_Index'First) := One + Arg*Arg; + One_Plus_Arg_Squared (Deriv_Index'First+1) := Two * Arg; + One_Plus_Arg_Squared (Deriv_Index'First+2) := One; -- reduced by / 2! + + One_Over := Reciprocal (One_Plus_Arg_Squared (0)); + + Result := Compose (One_Over, One_Plus_Arg_Squared); + + for I in reverse Deriv_Index'First+1 .. Deriv_Index'Last loop + Result (I) := Result (I-1) / Real_Val (I); + end loop; + + Result (Deriv_Index'First) := Arctan (Arg); + + return Result; + + end Arctan_d; + + -------------- + -- Arcsin_d -- + -------------- + + -- Reduced derivs of Arcsin. Arg must be in (-1.0, 1.0). Result + -- of Arcsin is in range -pi/2 .. pi/2. + -- + function Arcsin_d + (Time : in Real) + return Derivatives + is + Result : Derivatives; + One_Minus_Arg_Squared : Derivatives := (others => Zero); + Reciprocal_Square_Root : Derivatives; + Arg : constant Real := Time; + begin + + -- Can't take derivs at -1.0 or 1.0: + + if (Arg <= -One) or (Arg >= One) then + raise d_Argument_Error; + end if; + + -- Reduced derivatives (divide by order of derivative factorial): + One_Minus_Arg_Squared (Deriv_Index'First) := One - Arg*Arg; + One_Minus_Arg_Squared (Deriv_Index'First+1) := -Two * Arg; + One_Minus_Arg_Squared (Deriv_Index'First+2) := -One; + + Reciprocal_Square_Root := Reciprocal (Sqrt_d (One_Minus_Arg_Squared (0))); + + Result := Compose (Reciprocal_Square_Root, One_Minus_Arg_Squared); + + for I in reverse Deriv_Index'First+1 .. Deriv_Index'Last loop + Result (I) := Result (I-1) / Real_Val (I); + end loop; + + Result (Deriv_Index'First) := ArcSin (Arg); + + return Result; + + end Arcsin_d; + + -------------- + -- Arccos_d -- + -------------- + + -- Reduced derivs of Arccos. Arg must be in (-1.0, 1.0). Result + -- of Arccos is in range 0.0 .. pi. Arccos = Pi/2 - Arcsin, so call + -- Arcsin. + + function Arccos_d + (Time : in Real) return Derivatives + is + Result : Derivatives; + Arg : constant Real := Time; + begin + + -- Can't take derivs at -1.0 or 1.0: + + if (Arg <= -One) or (Arg >= One) then + raise d_Argument_Error; + end if; + + Result := Arcsin_d (Arg); + + for I in Deriv_Index'First+1 .. Deriv_Index'Last loop + Result (I) := -Result (I); + end loop; + + Result (Deriv_Index'First) := ArcCos (Arg); + + return Result; + + end Arccos_d; + + ----------- + -- Sin_d -- + ----------- + + -- Get reduced derivatives of Sin + -- + function Sin_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) + return Derivatives + is + SI, CO : Real; + FreqPower : Real := One; + Index_Is_Even : Boolean := True; + Sinusoid : Derivatives; + begin + SI := Sin (Frequency*Time + Phase); + CO := Cos (Frequency*Time + Phase); + + Index_Is_Even := True; + FreqPower := One; + for I in Deriv_Index loop + if Index_Is_Even then + Sinusoid(I) := FreqPower * SI; + FreqPower := FreqPower * Frequency; + else + Sinusoid(I) := FreqPower * CO; + FreqPower := -FreqPower * Frequency; + end if; + Index_Is_Even := not Index_Is_Even; + end loop; + + Make_Reduced (Sinusoid); + + return Sinusoid; + + end Sin_d; + + ----------- + -- Cos_d -- + ----------- + + -- Get reduced derivatives of Cos + -- + function Cos_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) + return Derivatives + is + SI, CO : Real; + FreqPower : Real := One; + Index_Is_Even : Boolean := True; + CoSinusoid : Derivatives; + begin + SI := Sin (Frequency*Time + Phase); + CO := Cos (Frequency*Time + Phase); + + Index_Is_Even := True; + FreqPower := One; + for I in Deriv_Index loop + if Index_Is_Even then + CoSinusoid(I) := FreqPower * CO; + FreqPower := -FreqPower * Frequency; + else + CoSinusoid(I) := FreqPower * SI; + FreqPower := FreqPower * Frequency; + end if; + Index_Is_Even := not Index_Is_Even; + end loop; + + Make_Reduced (CoSinusoid); + + return CoSinusoid; + + end Cos_d; + + ----------- + -- Exp_d -- + ----------- + + -- Get reduced derivatives Exp, the envelope. + -- + function Exp_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) + return Derivatives + is + Val, Arg : Real; + FreqPower : Real := One; + Expon : Derivatives; + begin + Arg := Frequency*Time + Phase; + -- May want to do some special casing with this. If large negative + -- for example, set Val to Zero, etc. + + Val := Exp (Arg); + + FreqPower := One; + for I in Deriv_Index loop + Expon(I) := FreqPower * Val; + FreqPower := FreqPower * Frequency; + end loop; + + Make_Reduced (Expon); + + return Expon; + + end Exp_d; + + ----------- + -- Log_d -- + ----------- + + -- Get reduced derivatives of natural Log (base e). + -- + function Log_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) + return Derivatives + is + Arg, FreqPower : Real; + Result : Derivatives; + begin + Arg := Frequency*Time + Phase; + + if Arg <= Zero then + raise d_Argument_Error; + end if; + + Result := Reciprocal (Arg); + -- Derivatives of 1/t evaluated at t=arg + + for I in reverse 1..Deriv_Index'Last loop + Result(I) := Result(I-1) / Real_Val(I); + end loop; + -- Derivatives of 1/t are higher derivatives of Log(t). + -- Need to properly reduce tho: turn (I-1)! into I!. + + Result(0) := Log (Arg); + + FreqPower := One; + for I in 1..Deriv_Index'Last loop + FreqPower := FreqPower * Frequency; + Result(I) := FreqPower * Result(I); + end loop; + + return Result; + + end Log_d; + + ---------------- + -- Taylor_Sum -- + ---------------- + + -- Want Sum = B_0 + B_1*t + B_2*t**2 + ... + B_n*t**n. + -- or in Horner's form: Sum = B_0 + t*(B_1 + ... + t*(B_n-1 + t*B_n)))))). + -- This is easily written as matrix equation, with Sum = S_0: + -- + -- S_n = B_n; S_n-1 = B_n-1 + t*S_n; S_1 = B_1 + t*S_2; S_0 = B_0 + t*S_1; + -- + -- In matrix form, vector S is the solution to matrix equation M*S = B, + -- where B = (B_0,...,B_n), S = (S_0,...,S_n) and matrix M is equal to + -- the unit matrix I minus t*O1, where O1 is all 1's on the 1st upper off- + -- diagonal. + -- + -- M is not diag. domin. if t>>1, so: + -- This form is chosen because the solution vector S can + -- be improved numerically by iterative refinement with Newton's + -- method: + -- S{k+1} = S{k} + M_inverse * (B - M*S{k}) + -- + -- where S = M_inverse * B is the calculation of S given above. If the + -- said calculation of S is numerically imperfect, then the iteration above + -- may produce improved values of S. If the Coefficients of + -- the polynomial B are numerically poor, then this effort will be wasted. + -- Its often the case with + -- + -- Iterative refinement + -- + -- If No_Of_Iterations=0 then usual solution is returned. + -- If No_Of_Iterations=1 then solution is refined iteratively once. + -- + -- This version refines residual rather than actual solution, (so that + -- can subtract small quantities from small, rather than large.) + -- If y = exact error in 1st iteration: y = S{1} - S{inf}, then y is the + -- solution of M*y = d_1 where d_1 = B - M*S{1}. + -- Let M_inv denote approximate inverse of M. Iterate for y using + -- + -- Delta_Y_{k+1} == Y_{k+1} - Y_k = M_inv*(d_1 - M*Y_k). + -- + -- Remember Y = SUM (Delta_Y_k's). Here's the actual method: + -- + -- Delta_Y_1 = M_inv*d_1 + -- Let d_2 == d_1 - M*Delta_Y_1 + -- Delta_Y_2 = M_inv*(d_1 - M*Delta_Y_1) = M_inv*d_2 + -- Let d_3 == d_2 - M*Delta_Y_2 + -- Delta_Y_3 = M_inv*(d_1 - M*Delta_Y_1 - M*Delta_Y_2) = M_inv*d_3 + -- + -- so: d_k = d_{k-1} - M*Delta_Y_{k-1}; Delta_Y_k = M_inv*d_k + -- + -- Sum the Delta_Y_k's to get the correction to S(1): Y = SUM (Delta_Y_k's). + -- notice are iterating for the error in the error in the error... + -- First method doesn't seem to work. + -- Second method works best in simple tests, so use it. + -- + function Taylor_Sum + (t : in Real; + F : in Derivatives; + Max_Index : in Deriv_Index := Deriv_Index'Last; + No_Of_Iterations : in Natural := 0) + return Real + is + S_first : Derivatives; + D_k, Delta_Y_k : Derivatives; + Y_f : Derivatives := (others => Zero); --init impor. + B : Derivatives renames F; + + -- Get Product = M*S(k): + + function M_times + (S : in Derivatives; + t : in Real; + Coeff_Last : in Deriv_Index := Deriv_Index'Last) + return Derivatives + is + Product : Derivatives; + begin + for n in Coeff_Last .. Deriv_Index'Last loop + Product(n) := Zero; + end loop; + Product(Coeff_Last) := S(Coeff_Last); + if Deriv_Index'First < Coeff_Last then + for n in reverse Deriv_Index'First..Coeff_Last-1 loop + Product(n) := S(n) - t * S(n+1); + end loop; + end if; + return Product; + end; + + -- Solve for S in the matrix equation M*S = B + + function M_inv_times + (B : in Derivatives; + t : in Real; + Coeff_Last : in Deriv_Index := Deriv_Index'Last) + return Derivatives + is + S : Derivatives; + begin + for n in Coeff_Last .. Deriv_Index'Last loop + S(n) := Zero; + end loop; + S(Coeff_Last) := B(Coeff_Last); + if Deriv_Index'First < Coeff_Last then + for n in reverse Deriv_Index'First..Coeff_Last-1 loop + S(n) := B(n) + t * S(n+1); + end loop; + end if; + return S; + end; + + function Difference + (A : in Derivatives; + B : in Derivatives; + Coeff_Last : in Deriv_Index := Deriv_Index'Last) + return Derivatives + is + S : Derivatives; + begin + for n in Coeff_Last .. Deriv_Index'Last loop + S(n) := Zero; + end loop; + for n in reverse Deriv_Index'First..Coeff_Last loop + S(n) := A(n) - B(n); + end loop; + return S; + end; + + begin + -- Get S{1} = S_first, 1st soln, as defined above. + + S_first := M_inv_times (B, t, Max_Index); --does init ok + + If No_Of_Iterations > 0 then + + -- get d_k = B - M*S_first, the 1st residual: + + D_k := Difference (B, M_times (S_first, t, Max_Index), Max_Index); -- D_1 + + -- get Delta_Y = M_inv*D_k: + + Delta_Y_k := M_inv_times (D_k, t, Max_Index); + Y_f := Delta_Y_k; + + for Iteration in 1..No_Of_Iterations loop + + -- get d_k = d_k - M*Delta_Y_k + + D_k := Difference(D_k, M_times (Delta_Y_k, t, Max_Index), Max_Index); + + -- get Delta_Y = M_inv*D_k: + + Delta_Y_k := M_inv_times (D_k, t, Max_Index); + + -- Increment solution for full correction to S_first, Y_f: + + for I in Deriv_Index'First..Max_Index loop + Y_f(I) := Y_f(I) + Delta_Y_k(I); + end loop; + + end loop; + + end if; + + --for I in Deriv_Index'First..Max_Index loop + -- S(I) := Y_f(I) + S_first(I); + --end loop; + + --if Max_Index < Deriv_Index'Last then + --for I in Max_Index+1 ..Deriv_Index'Last loop + -- S(I) := Zero; + --end loop; + --end if: + + return Y_f(Deriv_Index'First) + S_first(Deriv_Index'First); + + end Taylor_Sum; + + ------------------ + -- Taylor_Sum_2 -- + ------------------ + + -- Want Sum = B_0 + B_1*t + B_2*t**2 + ... + B_n*t**n. + -- or in Horner's form: Sum = B_0 + t*(B_1 + ... + t*(B_n-1 + t*B_n)))))). + -- This is easily written as matrix equation, with Sum = S_0: + -- + -- S_n = B_n; S_n-1 = B_n-1 + t*S_n; S_1 = B_1 + t*S_2; S_0 = B_0 + t*S_1; + -- + -- In matrix form, vector S is the solution to matrix equation M*S = B, + -- where B = (B_0,...,B_n), S = (S_0,...,S_n) and matrix M is equal to + -- the unit matrix I minus t*O1, where O1 is all 1's on the 1st upper off- + -- diagonal. + -- + function Taylor_Sum_2 + (t : in Real; + F : in Derivatives; + Max_Index : in Deriv_Index := Deriv_Index'Last) return Real is + + S_first, Bu : Derivatives; + B : Derivatives renames F; + + -- Solve for S in the matrix equation M*S = B + + function M_inv_times + (B : in Derivatives; + t : in Real; + Coeff_Last : in Deriv_Index := Deriv_Index'Last) + return Derivatives + is + S : Derivatives; + begin + for n in Coeff_Last .. Deriv_Index'Last loop + S(n) := Zero; + end loop; + S(Coeff_Last) := B(Coeff_Last); + if Deriv_Index'First < Coeff_Last then + for n in reverse Deriv_Index'First..Coeff_Last-1 loop + S(n) := B(n) + t * S(n+1) / Real_val(n+1); + end loop; + end if; + return S; + end; + + begin + + Bu := B; + un_reduce(Bu); + + -- Get S{1} = S_first, 1st soln, as defined above. + + S_first := M_inv_times (Bu, t, Max_Index); --does init ok + + return S_first(Deriv_Index'First); + + end Taylor_Sum_2; + + function Derivative_Of + (F : in Derivatives; + Order_Of_Deriv : in Deriv_Index) + return Derivatives is + + Result : Derivatives := F; + + begin + + if Order_Of_Deriv = 0 then return Result; end if; + + for Order in 1..Order_Of_Deriv loop + + -- shft rgt + for n in Deriv_Index'First..Deriv_Index'Last-1 loop + Result(n) := Result(n+1); + end loop; + Result(Deriv_Index'Last) := Zero; + + + -- reduce + for n in Deriv_Index'First+1..Deriv_Index'Last-Order loop + Result(n) := Result(n) * Real_Val(n+1); + end loop; + + end loop; + + return Result; + + end Derivative_Of; + + -- Integration const is set to 0. + + function Integral_Of + (F : in Derivatives) + return Derivatives is + + Result : Derivatives := F; + + begin + + -- shft left + for n in reverse Deriv_Index'First+1..Deriv_Index'Last loop + Result(n) := Result(n-1); + end loop; + Result(Deriv_Index'First) := Zero; + + -- reduce + for n in Deriv_Index'First+2..Deriv_Index'Last loop + Result(n) := Result(n) / Real_Val (n); + end loop; + + return Result; + + end Integral_Of; + +begin + + Make_Constant_Arrays; + +end e_Derivs; diff --git a/arbitrary/e_derivs.ads b/arbitrary/e_derivs.ads new file mode 100644 index 0000000..6e0b17e --- /dev/null +++ b/arbitrary/e_derivs.ads @@ -0,0 +1,239 @@ + +-- PACKAGE e_Derivs +-- +-- Automatic differentiation routines for calculating arbitrary order +-- derivatives of arithmetical expressions constructed out of operators +-- "+", "-", "*", "/", "**", Compose, Sin, Cos, Exp, Log, Reciprocal, +-- Sqrt, Arccos, Arsin, Arctan, or any user defined function. Also +-- includes routines for summing the Taylor series for the value of the +-- Function, its Integrals, and Derivatives at arguments near to those +-- at which the original Function, and its derivs were evaluated. +-- +-- Warnings: +-- High order differentiation is prone to overflows/underflows and +-- loss of accuracy, especially near singularities. For example +-- the high order derivs of 1/t, log(t) or Sqrt(t) go as high powers +-- of 1/t, so if t is small or large enough, then exceptions happen. +-- Also note that numerical accuracy can degrade rapidly with order of +-- derivative. Nothing clever has been done to improve numerical accuracy. +-- If you use 15 digit floats, (with Real'Epsilon around 1.0e-15), then error +-- many times this magnitude accumulates, so that multiplying by, say, +-- 16! to get Un-reduced (true) derivatives can destroy all accuracy. +-- +-- Results are returned as Leibniz Reduced Derivatives (n-th derivative +-- divided by n!), which are usually better behaved numerically than +-- the high order Un-reduced derivatives. +-- +-- Here is how it works: you place the first N derivatives of some +-- function F(t) in array F, and the first N derivatives of another function +-- G(t) in array G, then the operator "*" will calculate the 1st N +-- derivatives of the product F(t)*G(t) and place the result into array H when +-- you write H := F * G. Similarly, H := Compose (F, G) places the first N +-- derivatives of the composition of the 2 functions F(G(t)) into array H, +-- provided F is the set of derivatives of F evaluated at G(t). Routines +-- are provided to help calculate high order derivatives of elementary +-- functions such as Sin and Log. (The routines have names like Sin_d and +-- Log_d, respectively.) +-- You do not have to use the functions provided. If you determine the +-- derivatives of a function numerically, (for example a function that is the +-- solution of a numerically integrated differential equ.) and place them in an +-- array of type Derivatives, then this works just as well. +-- +-- The program does not parse expressions for you. You must translate +-- expressions like F(G(t)) into: Compose (F, G). However, most other steps in +-- the translations use the original expression in unmodified form. Use +-- Parentheses to indicate precedence of the operators. +-- +-- The derivatives are input and output in Leibniz Reduced form. +-- (To get Leibnitz reduced derivatives you divide the N-th derivative by N!.) +-- All computation is done internally in reduced form, +-- because this is usually the most efficient way. You can translate +-- between Reduced and Unreduced forms of derivatives with procedures +-- Make_Reduced, and Un_Reduce. +-- +-- Notice that the reduced derivatives of H(t) are also the Taylor coefficients +-- of function H(t). More precisely, the Taylor's series for H(t), (if we know +-- (d/dt)**N H(t_0), the set of derivatives of H evaluated at point t_0), is +-- the sum over coefficents (t - t_0)**N * (d/dt)**N H(t_0) / N!. The functions +-- in this package can be used to directly calculate (d/dt)**N H(t_0) / N!, +-- if H is composed of functions whose derivatives are known. For example, +-- if we know the derivatives of F and of G, then we can calculate Taylor +-- series for H(t) = F(G(t)). So the routines in this package might be thought +-- of as routines for gettin Taylor series coefficients of complicated functions, +-- given Taylor Series of the simpler functions from which they are constructed. +-- For example suppose you numerically integrate 2 differential equations for +-- F and G, returning the high order derivatives of F and G along the way. +-- Then if you want (say) the integral of F*G, you calculate the Taylor +-- series of H = F*G at various points t_0, and then calculate the area under +-- the Taylor Series polynomial by elementary means. Similarly, you can +-- numerically integrate differential equations by the Taylor series method +-- if these routines are used to calculate the derivatives of the +-- function that defines the differential equation. +-- +-- Exceptions: +-- Some functions raise d_Argument_Error if arguments are out of range. +-- +generic + + Max_Order_Of_Deriv : Positive; + + type Real is private; + + type Real_8 is digits <>; + + -- Never have to enter these explicitly: + with function Sin (X : Real) return Real is <>; + with function Cos (X : Real) return Real is <>; + with function Exp (X : Real) return Real is <>; + with function Log (X : Real) return Real is <>; -- base e + with function Sqrt (X : Real) return Real is <>; + with function Arcsin (X : Real) return Real is <>; + with function Arccos (X : Real) return Real is <>; + with function Arctan (X : Real) return Real is <>; + + with function "+" (X : Real_8) return Real is <>; + -- use for translating 15 digit float (Real_8) to extended (Real). + -- used here only for making One_d, Half, etc. + + with function "-" (X : Real) return Real is <>; + with function "+" (X, Y : Real) return Real is <>; + with function "-" (X, Y : Real) return Real is <>; + with function "*" (X, Y : Real) return Real is <>; + with function "/" (X, Y : Real) return Real is <>; + with function "<=" (X, Y : Real) return Boolean is <>; + with function ">=" (X, Y : Real) return Boolean is <>; + with function "=" (X, Y : Real) return Boolean is <>; + with function "**" (X : Real; Exponent : Natural) return Real is <>; + --need Equality test for Reciprocal. + +package e_Derivs is + + One_d : constant Real := (+1.0); + Zero_d : constant Real := (+0.0); + + subtype Deriv_Index is Integer range 0..Max_Order_Of_Deriv; + + type Derivatives is array(Deriv_Index) of Real; + + function "+" (F, G : in Derivatives) return Derivatives; + -- Returns reduced derivatives of F(t) + G(t), given reduced derivs of F, G. + + function "-" (F, G : in Derivatives) return Derivatives; + -- Returns reduced derivatives of F(t) - G(t), given reduced derivs of F, G. + + function "*" (F, G : in Derivatives) return Derivatives; + -- Returns reduced derivatives of F(t) * G(t), given reduced derivs of F, G. + + function "/" (F, G : in Derivatives) return Derivatives; + -- Returns reduced derivatives of F(t) / G(t), given reduced derivs of F, G. + + function "**" + (F : in Derivatives; + Exponent : in Natural) + return Derivatives; + + function Compose (F, G : in Derivatives) return Derivatives; + -- Returns reduced derivatives of F(G(t)), given reduced derivs of F, G. + -- F must hold the reduced derivs of F(t'), evaluated at t'=G(t). G holds + -- reduced derivs of G(t) pre-evaluated at whatever t the user desires. + -- So the derivatives of F(G(t)) are returned by: Compose(F(t'),G). + + function Reciprocal (F : in Derivatives) return Derivatives; + -- Returns reduced derivatives of 1 / F(t), given reduced derivs of F. + + procedure Make_Reduced (F : in out Derivatives); + -- Make reduced derivatives out of ordinary derivs, by dividing by Order!. + + procedure Un_Reduce (F : in out Derivatives); + -- Make ordinary derivatives out of reduced derivs, by multiplying by Order! + + d_Argument_Error : exception; + -- A little checking of arguments is done, but most is left to the + -- functions of type Real that the procedures here are built out of. + + + --************************************************************************** + -- Use taylor series to evaluate the Function, its Integrals and Derivatives + -- at argument t_1 /= t_0 (where t_0 is the pt. at which the original Function + -- and its derivs were evaluated). + --************************************************************************** + + function Taylor_Sum + (t : in Real; + F : in Derivatives; + Max_Index : in Deriv_Index := Deriv_Index'Last; + No_Of_Iterations : in Natural := 0) + return Real; + -- Given F and reduced derivs of F at t_0, routine sums taylor series for + -- F(t_1), where t_1 = t + t_0. (Summing polys is always highly error prone.) + + function Taylor_Sum_2 + (t : in Real; + F : in Derivatives; + Max_Index : in Deriv_Index := Deriv_Index'Last) return Real; + -- Slightly different algor. + + function Derivative_Of + (F : in Derivatives; + Order_Of_Deriv : in Deriv_Index) + return Derivatives; + -- Returns reduced derivatives of (d/dt)^k F(t), given reduced derivs of F, + -- where k = Order_Of_Deriv. + + function Integral_Of + (F : in Derivatives) + return Derivatives; + -- Integration const is set to 0. To get area under curve F(t), + -- say in [t_0 - dt, t_0 + dt], let F be reduced derivs of F(t) at t_0, + -- area = Taylor_Sum(dt, Integral_Of(F)) - Taylor_Sum(-dt, Integral_Of(F)). + + --************************************************************************** + -- The following functions operate on argument of type Real rather than type + -- Derivative. They return the high order *reduced* derivatives of common + -- functions to make it easier to construct more complicated functions. + -- Their arguments are first order polynomials of "time", also for convenience. + -- I.e., Sin_d returns reduced derivs of Sin (constant1 * time + constant2). + -- Could use the Composition operator, but that would be inefficient. + --************************************************************************** + + function "**" + (Time : in Real; + Exponent : in Natural) return Derivatives; + + function Reciprocal (Time : in Real) return Derivatives; + + function Sin_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) return Derivatives; + + function Cos_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) return Derivatives; + + function Exp_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) return Derivatives; + + function Log_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) return Derivatives; + + function Sqrt_d + (Time : in Real; + Frequency : in Real := One_d; + Phase : in Real := Zero_d) return Derivatives; + + function Arctan_d + (Time : in Real) return Derivatives; + + function Arcsin_d + (Time : in Real) return Derivatives; + + function Arccos_d + (Time : in Real) return Derivatives; -- TEST THIS. + +end e_Derivs; diff --git a/arbitrary/e_function_demo_1.adb b/arbitrary/e_function_demo_1.adb new file mode 100644 index 0000000..c989cb5 --- /dev/null +++ b/arbitrary/e_function_demo_1.adb @@ -0,0 +1,2033 @@ +with Extended_Real; +with Extended_Real.Elementary_Functions; +with Extended_Real.IO; +with Ada.Numerics.Generic_Elementary_Functions; +with Text_IO; use Text_IO; + +procedure e_function_demo_1 is + + type Real is digits 15; + + package mth is new Ada.Numerics.Generic_Elementary_Functions (Real); + use mth; + package ext is new Extended_Real (Real); + use ext; + package fnc is new Ext.Elementary_Functions (Sqrt, Log, Exp, Arcsin); + use fnc; + package eio is new Ext.IO; + use eio; + package rio is new Text_IO.Float_IO (Real); + use rio; + + + Delta_2 : e_Real; + Z0, Z1, Z2, Z3, Z4, Difference1 : e_Real; + Junk1 : constant e_Real := E_Quarter_Pi * Make_Extended (0.273); + Junk2 : constant e_Real := E_Quarter_Pi * Make_Extended (0.233); + Junk3 : constant e_Real := E_Inverse_Sqrt_2; -- 1/SQRT(2) + + + Test_Vector_Seed : Real; + + N : Positive; + + Two_Digit : constant e_Digit := Make_e_Digit (2.0); + Half : constant e_Real := Make_Extended (0.5); + + Limit : constant Integer := 1200; + -- Number of test runs for some procedures. Can't exceed 1200 because + -- some tests use (10.0**0.25)**Limit. + + e_Real_Decimals : constant := Desired_Decimal_Digit_Precision; + + + ----------- + -- Pause -- + ----------- + + procedure Pause (s0,s1,s2,s3,s4,s5,s6,s7,s8,s9 : string := "") is + Continue : Character := ' '; + begin + new_line; + if S0 /= "" then put_line (S0); end if; + if S1 /= "" then put_line (S1); end if; + if S2 /= "" then put_line (S2); end if; + if S3 /= "" then put_line (S3); end if; + if S4 /= "" then put_line (S4); end if; + if S5 /= "" then put_line (S5); end if; + if S6 /= "" then put_line (S6); end if; + if S7 /= "" then put_line (S7); end if; + if S8 /= "" then put_line (S8); end if; + if S9 /= "" then put_line (S9); end if; + new_line; + begin + put ("Type a character to continue: "); + get_immediate (Continue); + exception + when others => null; + end; + new_line; + end Pause; + + ---------------------------------- + -- Print_Extended_Real_Settings -- + ---------------------------------- + + procedure Print_Extended_Real_Settings + is + Bits_In_Radix : constant := Desired_No_Of_Bits_In_Radix; + begin + new_line(1); + put (" Desired_Decimal_Digit_Precision ="); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + new_line(1); + put ("Number of decimal digits of precision requested: "); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + put ("Number of digits in use (including 2 guard digits): "); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + new_line(1); + put ("These digits are not decimal; they have Radix: 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); + new_line(1); + put ("In other words, each of these digits is in range: 0 .. 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); put (" - 1."); + new_line(1); + put ("Number of decimal digits per actual digit is approx: 9"); + new_line(2); + put("Guard digits (digits of extra precision) are appended to the end of"); + new_line(1); + put("each number. There are always 2 guard digits. This adds up to 18"); + new_line(1); + put("decimal digits of extra precision. The arithmetic operators, (""*"","); + new_line(1); + put("""/"", ""+"" etc) usually produce results that are correct to all"); + new_line(1); + put("digits except the final (guard) digit."); + + Pause; + + new_line(2); + put("If a number is correct to all digits except the final (guard) digit,"); + new_line(1); + put("expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon / (One+One)**Bits_In_Radix, aft => 10)); + new_line(2); + put("If you lose 2 digits of accuracy (i.e. both guard digits) instead"); + new_line(1); + put("of 1 (as in the above case) then you lose another 9 decimal digits"); + new_line(1); + put("of accuracy. In this case expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon, aft => 10)); + new_line(1); + + new_line(1); + put ("The above number, by the way, is: e_Real_Model_Epsilon."); + new_line(2); + put ("Most computationally intensive floating pt. calculations will"); + new_line(1); + put ("lose 2 guard digits of accuracy at the minimum."); + + Pause; + + end Print_Extended_Real_Settings; + + --------------------------- + -- Test_Cos_and_Arccos_4 -- + --------------------------- + + -- Test rel. error in large Z1 limit. + + procedure Test_Cos_and_Arccos_4 is + I1 : constant Integer := -Limit; + I2 :constant Integer := 0; + Max_Error : e_Real; -- init essential + + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := (Z1 / (Z1 + (+(1.77827941**I)))); + Z2 := Cos (Arccos (Z1)); + + Difference1 := Abs ((Z2 - Z1) / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Cos_and_Arccos_4; + + + -- Test rel. error in small Z1 limit. + + procedure Test_Cos_and_Arccos_3 is + I1 : constant Integer := -Limit; + I2 :constant Integer := 0; + Max_Error : e_Real; -- init essential + + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := Abs(Z1); + Z1 := -(Z1 / (Z1 + (+(1.77827941**I))**2)); + Z2 := Cos (Arccos (Z1)); + + Difference1 := Abs ((Z2 - Z1) / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Cos_and_Arccos_3; + + + -- Can't test relative error in small Z1 limit, only absolute, + -- because Cos (Arccos (Z1)) simply doesn't reproduce Z1 for finite + -- numbers of digits. Recall Arcos just calls Arcsin. + + procedure Test_Cos_and_Arccos_2 is + I1 : constant Integer := -Limit/2; + I2 :constant Integer := Limit/2; + Max_Error : e_Real; -- init essential + + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := (Z1 / (Z1 + (+(1.77827941**I)))); + Z2 := Cos (Arccos (Z1)); + + Difference1 := Abs (Z2 - Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Cos_and_Arccos_2; + + -- Can't test relative error in small Z1 limit, only absolute, + -- because Cos (Arccos (Z1)) simply doesn't reproduce Z1 for finite + -- numbers of digits. Recall Arcos just calls Arcsin. + + procedure Test_Cos_and_Arccos_1 is + I1 : constant Integer := -Limit/2; + I2 :constant Integer := Limit/2; + Max_Error : e_Real; -- init essential + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := Abs(Z1); + Z1 := -(Z1 / (Z1 + (+(1.77827941**I))**2)); + Z2 := Cos (Arccos (Z1)); + + Difference1 := Abs (Z2 - Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Cos_and_Arccos_1; + + + -- Superficial test of Sin(X) and Arcsin(X). + + procedure Test_Sin_and_Arcsin_1 is + I1 : constant Integer := -Limit; + I2 : constant Integer := 0; + Max_Error : e_Real; -- init essential + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := Z1 / (Z1 + (+(1.77827941**I))); + Z2 := Sin (Arcsin (Z1)); + + Difference1 := Abs ((Z2 - Z1) / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Arcsin_1; + + + procedure Test_Sin_and_Arcsin_2 is + I1 : constant Integer := -Limit; + I2 : constant Integer := Limit+5; + Max_Error : e_Real; -- init essential + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := Z1 / (Z1 + (+(1.77827941**I))); + Z2 := Sin (Arcsin (Z1)); + + Difference1 := Abs ((Z2 - Z1) / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Arcsin_2; + + + + + procedure Test_Sin_and_Arcsin_3 is + Max_Error : e_Real; -- init essential + I1 : constant Integer := -Limit/2; + I2 : constant Integer := Limit+5; + begin + Z1 := Junk1; + + for I in I1..I2 loop + Z1 := Abs(Z1); + Z1 := -Z1 / (Z1 + (+(1.77827941**I))); + Z2 := Sin (Arcsin (Z1)); + + Difference1 := Abs (Z2/Z1 - One); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Arcsin_3; + + + -- test: Sin(X)**2 + Cos(X)**2 - 1. + + procedure Test_Sin_and_Cos_0 + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Junk2 + Z1; + + Z2 := Sin(Z1)**2 + Cos(Z1)**2; + + Difference1 := Abs (One - Z2); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Cos_0; + + -- test: Sin(X)**2 + Cos(X)**2 - 1. + + procedure Test_Sin_and_Cos_2 + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + Shift : constant e_Digit := Make_e_Digit (2.0**(-27)); + begin + Z1 := e_Inverse_Sqrt_2; + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Shift * (Shift * e_Real_Model_Epsilon); + + Z2 := Sin(Z1)**2 + Cos(Z1)**2; + + Difference1 := Abs (One - Z2); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Cos_2; + + + procedure Test_Tan_and_Arctan_2 + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 4_000; + begin + Z1 := Abs (Junk1 * (+Test_Vector_Seed)); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Junk2; + Z2 := Arctan (Z1); + --Z4 := Sin(Z2) / Cos(Z2); -- Z1 - Sin(Z2) / Cos(Z2) not ok if cos=0 + Z3 := Sin(Z2); + Z4 := Cos(Z2) * Z1; + Difference1 := Abs ((Z3 - Z4) / Z3); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Tan_and_Arctan_2; + + + procedure Test_Tan_and_Arctan + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 1000; + Digit_Trials : constant e_Digit := Make_e_Digit (Real (No_of_Trials)); + dZ : constant e_Real := Two_Digit * e_Quarter_pi / Digit_Trials; + begin + + for I in 1 .. No_of_Trials loop + Z1 := Make_e_Digit (Real(I)) * dZ; + + --Z2 := Sin(Z1) / Cos(Z1); + Z2 := Divide (Sin(Z1), Cos(Z1)); + Z4 := Arctan (Z2); + Difference1 := Abs ((Z4 - Z1) / (Z1)); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + Z1 := Abs (Junk1 * (+Test_Vector_Seed)); + for I in 0 .. No_of_Trials loop + Z1 := Z1 / (Z1 + One); + Z1 := Two_Digit * Z1 * e_Quarter_pi; + + --Z2 := Sin(Z1) / Cos(Z1); + Z2 := Divide (Sin(Z1), Cos(Z1)); + Z4 := Arctan (Z2); + Difference1 := Abs ((Z4 - Z1) / (Z1)); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + Z1 := One / Abs (Junk1 * (+Test_Vector_Seed)); + for I in 0 .. No_of_Trials loop + Z1 := Z1 / (Z1 + One); + Z1 := Two_Digit * Z1 * e_Quarter_pi; + + --Z2 := Sin(Z1) / Cos(Z1); + Z2 := Divide (Sin(Z1), Cos(Z1)); + Z4 := Arctan (Z2); + Difference1 := Abs ((Z4 - Z1) / (Z1)); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + Z1 := Abs (Junk1 * (+Test_Vector_Seed)); + for I in 0 .. No_of_Trials loop + Z1 := Z1 / (Z1 + (+(1.77827941**I))); + Z1 := Two_Digit * Z1 * e_Quarter_pi; + --Z1 := Z1 / (Z1 + One); + + --Z2 := Sin(Z1) / Cos(Z1); + Z2 := Divide (Sin(Z1), Cos(Z1)); + Z4 := Arctan (Z2); + Difference1 := Abs ((Z4 - Z1) / (Z1)); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + + Z1 := Abs (Junk1 * (+Test_Vector_Seed)); + for I in -No_of_Trials .. 0 loop + Z1 := Z1 / (Z1 + (+(1.77827941**I))); + Z1 := Two_Digit * Z1 * e_Quarter_pi; + --Z1 := Z1 / (Z1 + One); + + --Z2 := Sin(Z1) / Cos(Z1); + Z2 := Divide (Sin(Z1), Cos(Z1)); + Z4 := Arctan (Z2); + Difference1 := Abs ((Z4 - Z1) / (Z1)); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Tan_and_Arctan; + + + procedure Test_Sin_and_Cos_1 + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Junk2; + + Z2 := Two_Digit * Sin(Z1) * Cos(Z1); + Z3 := Sin (Two_Digit * Z1); + + --Difference1 := Abs (Z3 / Z2 - One); + Difference1 := Abs ((Z3 - Z2) / Z2); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sin_and_Cos_1; + + -------------- + -- Test_Log -- + -------------- + + procedure Test_Log + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Log (Z1); + Z2 := Exp (Z2); -- should bring it back to Z1 + + Difference1 := Abs (One - Z2 / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Log; + + + ---------------- + -- Test_Log_2 -- + ---------------- + + -- Verified that Log(2) is correct by independent calc of Log_2. + -- Now check other arguments using Log(A*B) = ... + + procedure Test_Log_2 (Test_Vector_Seed : Real) is + Max_Error : e_Real; -- init essential + Log_Z0 : e_Real; + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + Log_Z0 := Log (Z0); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Log (Z1*Z0); + Z3 := Log (Z1) + Log_Z0; + + Difference1 := Abs (One - Z3 / Z2); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + + end Test_Log_2; + + + ----------------------------- + -- Test_Exp_w_Integer_Args -- + ----------------------------- + + -- Verified that Exp is correct by independent calc of Log_2. + -- Now check other arguments using Exp(A+B) = ... + + procedure Test_Exp_w_Integer_Args + (Test_Vector_Seed : Integer := 0) + is + Max_Error : e_Real; -- init essential + Exp_Z0 : e_Real; + No_of_Trials : constant Integer := 500_000; + begin + Z0 := (+Real (2 + Test_Vector_Seed)); + Z1 := (+0.0); + + Exp_Z0 := Exp (Z0); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Exp (Z1 + Z0); + Z3 := Exp (Z1) * Exp_Z0; + + Difference1 := Abs (Z3/Z2 - One); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Exp_w_Integer_Args; + + -------------- + -- Test_Exp -- + -------------- + + procedure Test_Exp + (Test_Vector_Seed : Real) + is + Max_Error, Exp_Z0 : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk3 * (+Test_Vector_Seed); + Exp_Z0 := Exp (Z0); + + for I in 1..No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Exp (Z1 + Z0); + Z3 := Exp (Z1) * Exp_Z0; + + Difference1 := Abs (Z3/Z2 - One); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Exp; + + ----------------- + -- Test_Sqrt_2 -- + ----------------- + + -- uses ** to calculate SQRT(X). Square and + -- compare with X; square and divide by X to compare w. 1.0. + + procedure Test_Sqrt_2 + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z1**(Half); + + Difference1 := Abs (One - (Z2 * Z2) / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sqrt_2; + + ---------------------------- + -- Test_Reciprocal_Sqrt_2 -- + ---------------------------- + + -- uses ** to calculate SQRT(X). Square and + -- compare with X; square and divide by X to compare w. 1.0. + + procedure Test_Reciprocal_Sqrt_2 + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 10_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z1**(-Half); + + --Difference1 := Abs (One/Z1 - (Z2 * Z2)) * Z1; + Difference1 := Abs (One - (Z2 * Z2) * Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Reciprocal_Sqrt_2; + + -------------------------- + -- Test_Reciprocal_Sqrt -- + -------------------------- + + -- uses Newton's method to calculate SQRT(X). Square and + -- compare with X; square and divide by X to compare w. 1.0. + + procedure Test_Reciprocal_Sqrt + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Reciprocal_Sqrt (Z1); + + Difference1 := Abs (One - (Z2 * Z2) * Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Reciprocal_Sqrt; + + --------------- + -- Test_Sqrt -- + --------------- + + -- uses Newton's method to calculate SQRT(X). Square and + -- compare with X; square and divide by X to compare w. 1.0. + + procedure Test_Sqrt + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1..No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Sqrt (Z1); + Z2 := Z2 * Z2; + + --Difference1 := Abs ((Z1 - Z2) / Z1); + Difference1 := Abs (One - Z2 / Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Sqrt; + + --------------------- + -- Test_Reciprocal -- + --------------------- + + -- uses Newton's method to calculate Inverse of (X). + -- compare with X; mult. by X to compare w. 1.0. + + procedure Test_Reciprocal + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Reciprocal(Z1); + + Difference1 := Abs (One - Z1 * Z2); + --Difference1 := Abs (One / Z1 - Z2) * Z1; + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Reciprocal; + + ---------------------- + -- Test_Divide_stnd -- + ---------------------- + + -- uses Newton's method to calculate X/Y + -- compare with X; mult. by Y to compare w. X. + + procedure Test_Divide_stnd + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z2 := Junk1 * (+Test_Vector_Seed); + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z2 + Z2 + Junk1; + Z3 := Z2 / Z1; + --Z3 := Divide (Z2, Z1); + + Difference1 := Abs (Z2 - Z3 * Z1) / Z2; + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + end loop; + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z2 + Z2 + Junk1; + Z3 := Z1 / Z2; + --Z3 := Divide (Z1, Z2); + + Difference1 := Abs (Z1 - Z3 * Z2) / Z1; + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Divide_stnd; + + + ----------------- + -- Test_Divide -- + ----------------- + + -- uses Newton's method to calculate X/Y + -- compare with X; mult. by Y to compare w. X. + + procedure Test_Divide + (Test_Vector_Seed : Real) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z2 := Junk1 * (+Test_Vector_Seed); + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z2 + Z2 + Junk1; + --Z3 := Z2 / Z1; + Z3 := Divide (Z2, Z1); + + --Difference1 := Abs (Z2 - Z3 * Z1) / Z2; + Difference1 := Divide (Abs (Z2 - Z3 * Z1), Z2); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + end loop; + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Z0; + Z2 := Z2 + Z2 + Junk1; + --Z3 := Z1 / Z2; + Z3 := Divide (Z1, Z2); + + --Difference1 := Abs (Z1 - Z3 * Z2) / Z1; + Difference1 := Divide (Abs (Z1 - Z3 * Z2), Z1); + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Divide; + + --------------- + -- Test_Root -- + --------------- + + -- uses Newton's method to calculate Inverse of Nth root of (X). + -- compare with X; ** and mult by X to compare w. 1.0. + + procedure Test_Root + (Test_Vector_Seed : Real; N : Positive) + is + Max_Error : e_Real; -- init essential + No_of_Trials : constant Integer := 50_000; + begin + Z1 := Junk1 * (+Test_Vector_Seed); + Z0 := Junk2 * (+Test_Vector_Seed); + + for I in 1..No_of_Trials loop + Z1 := Z0 + Z1; + + Z2 := Reciprocal_Nth_Root (Z1, N); + Z3 := Z2 ** N; -- so should be just Reciprocal of Z1 + + Difference1 := One - Z1 * Z3; + + if Are_Not_Equal (Difference1, Zero) then + if Difference1 > Max_Error then + Max_Error := Difference1; + end if; + end if; + + end loop; + + put ("Estimated Max Error: "); + put (e_Real_Image (Max_Error, Aft => Integer'Min (40, e_Real_Decimals))); + new_line(1); + + end Test_Root; + +begin + + Print_Extended_Real_Settings; + + new_line; + Pause ( + "Some tests of Sin and Cos. ", + "10_000 trials are performed for each number printed below.", + "Test 1. calculate: Sin**2 + Cos**2 - 1." + ); + new_line(2); + + Test_Sin_and_Cos_2; + + Test_Vector_Seed := 1.2345657891234E+24; -- near max arg for Cos + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+0; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-7; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-31; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-93; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-233; + Test_Sin_and_Cos_0 (Test_Vector_Seed); + + + new_line; + put ("Sin (2X) - 2*Sin (X)*Cos (X):"); + new_line(2); + + + Test_Vector_Seed := 1.2345657891234E+24; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+0; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-6; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Sin_and_Cos_1 (Test_Vector_Seed); + + + -- Arcsin_Stuff + + + Pause ("Some tests of Sin and Arcsin. "); + new_line(2); + + Test_Sin_and_Arcsin_1; + Test_Sin_and_Arcsin_2; + Test_Sin_and_Arcsin_3; + + new_line; + put_Line ("Some tests of Cos and Arccos. "); + new_line; + + Test_Cos_and_Arccos_1; + Test_Cos_and_Arccos_2; + Test_Cos_and_Arccos_3; + Test_Cos_and_Arccos_4; + + Pause + ("Test of Sin (Arctan(X)) / Cos (Arctan(X)) = X", + "4_000 trials are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 2.2345657891234E00; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E00; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 5.2345657891234E-1; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-10; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-99; + Test_Tan_and_Arctan_2 (Test_Vector_Seed); + + Pause + ("Test of Arctan (Sin(X) / Cos(X)) = X.", + "5_000 trials are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+54; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 2.2345657891234E00; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E00; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 5.2345657891234E-1; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-10; + Test_Tan_and_Arctan (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-99; + Test_Tan_and_Arctan (Test_Vector_Seed); + + + Pause + ("Test of Exp (Log (X)) = X for small and large arguments.", + "10_000 trials are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+99; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E00; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-10; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-34; + Test_Log (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-99; + Test_Log (Test_Vector_Seed); + + + + Pause ("Test of Log (X*Y) = Log (X) + Log (Y).", + "10_000 trials are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+99; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+18; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+0; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-10; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-34; + Test_Log_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-99; + Test_Log_2 (Test_Vector_Seed); + + + -- Exp_Stuff + + Pause ( + "Test Exp (X+Y) = Exp (X) * Exp (Y).", + "10_000 trials are performed for each number printed below:" + ); + + + Test_Vector_Seed := +0.2345657891234E+1; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E+4; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E+3; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E+3; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E-4; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E-4; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E+1; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E+1; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E+0; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E-1; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E-1; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E-10; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := +1.2345657891234E-16; + Test_Exp(Test_Vector_Seed); + + Test_Vector_Seed := -1.2345657891234E-16; + Test_Exp(Test_Vector_Seed); + + + new_line; + pause ("Start simple test of Log and Exp routines. "); + new_line; + + put_line("compare exp(0.25) with actual:"); + put (Exp(0.25)); new_line; + put (Make_Real(Exp(Make_Extended(0.25)))); new_line; + + put_line("compare exp(0.2) with actual:"); + put (Exp(0.2)); new_line; + put (Make_Real(Exp(Make_Extended(0.2)))); new_line; + + put_line("compare exp(0.28) with actual:"); + put (Exp(0.28)); new_line; + put (Make_Real(Exp(Make_Extended(0.28)))); new_line; + + put_line("compare exp(1.0) with actual:"); + put (Exp(1.0)); new_line; + put (Make_Real(Exp(Make_Extended(1.0)))); new_line; + + put_line("compare exp(20.0) with actual:"); + put (Exp(20.0)); new_line; + put (Make_Real(Exp(Make_Extended(20.0)))); new_line; + + put_line("compare exp(-20.0) with actual:"); + put (Exp(-20.0)); new_line; + put (Make_Real(Exp(Make_Extended(-20.0)))); new_line; + + put_line("compare log(2.0) with actual:"); + put (Log(2.0)); new_line; + put (Make_Real(Log(Make_Extended(2.0)))); new_line; + + put_line("compare log(1.01) with actual:"); + put (Log(1.01)); new_line; + put (Make_Real(Log(Make_Extended(1.01)))); new_line; + + put_line("compare log(1.0E34) with actual:"); + put (Log(1.0E34)); new_line; + put (Make_Real(Log(Make_Extended(1.0E34)))); new_line; + new_line(1); + + new_line(1); + Pause ("Check Sin and Cos at a few specific arguments."); + new_line(1); + + put ("Check Sin (0.25*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (E_Quarter_Pi) - Sqrt(Make_Extended(0.5)); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (0.50*Pi) = 1.0. Estimated error = "); + Delta_2 := One - Sin (Two_Digit * E_Quarter_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (0.75*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit(5.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (1.25*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit(5.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (1.5*Pi) = (-1.0). Estimated error = "); + Delta_2 := Sin (Make_e_Digit(6.0)*E_Quarter_Pi) + One; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (1.75*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit (7.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (2.25*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit(9.0)*E_Quarter_Pi) - e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (2.5*Pi) = 1.0. Estimated error = "); + Delta_2 := Sin (Make_e_Digit(10.0) * E_Quarter_Pi) - One; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (2.75*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit(11.0)*E_Quarter_Pi) - e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (3.0*Pi) = 0.0. Estimated error = "); + Delta_2 := Sin (Make_e_Digit(12.0)*E_Quarter_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (3.0*Pi) = (-1.0). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(12.0)*E_Quarter_Pi) + One; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (2.75*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(11.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (2.5*Pi) = 0.0. Estimated error = "); + Delta_2 := Cos (Make_e_Digit(10.0)*E_Quarter_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (2.25*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(9.0)*E_Quarter_Pi) - e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (2.0*Pi) = 1.0. Estimated error = "); + Delta_2 := Cos (Make_e_Digit(8.0)*E_Quarter_Pi) - One; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (1.75*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(7.0)*E_Quarter_Pi) - e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (1.5*Pi) = 0.0. Estimated error = "); + Delta_2 := Cos (Make_e_Digit(6.0)*E_Quarter_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (1.25*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(5.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (1.0*Pi) =-1.0. Estimated error = "); + Delta_2 := Cos (Make_e_Digit(4.0)*E_Quarter_Pi) + One; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (0.5*Pi) = 0.0. Estimated error = "); + Delta_2 := Cos (Two_Digit*E_Quarter_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (0.25*Pi) = Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (E_Quarter_Pi) - Sqrt(Make_Extended(0.5)); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (-0.25*Pi) = Sqrt(0.5) Estimated error = "); + Delta_2 := Cos (-E_Quarter_Pi) - Sqrt(Make_Extended(0.5)); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Cos (129.25*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Cos (Make_e_Digit(517.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check Sin (129.75*Pi) =-Sqrt(0.5). Estimated error = "); + Delta_2 := Sin (Make_e_Digit (519.0)*E_Quarter_Pi) + e_Inverse_Sqrt_2; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + new_line(1); + Pause ("Some more spot checks."); + new_line(1); + + Z0 := E_Pi; + for I in 1..10 loop + Z0 := Z0 * Z0; + end loop; + Z1 := E_Pi**(2**10); + + put ("Check Pi**1024 - Pi*Pi*Pi*..*Pi*Pi*Pi: Estimated error = "); + Delta_2 := One - Z1/Z0; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Check 2.0 * (1/SQRT(2.0)**2) - 1.0: Estimated error = "); + Delta_2 := e_Inverse_Sqrt_2**2 - (+0.5); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Arccos(1/sqrt(2)) - Arcsin(1/sqrt(2)): Estimated error = "); + Delta_2 := Arccos(E_Inverse_Sqrt_2) - Arcsin(E_Inverse_Sqrt_2); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Arccos(-1/sqrt(2)) - 1.5 Pi/4: Estimated error = "); + Delta_2 := Arccos(-e_Inverse_Sqrt_2) - Two_Digit*e_Quarter_Pi - e_Quarter_Pi; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Arcsin(-1/sqrt(2)) + Pi/4: Estimated error = "); + Delta_2 := Arcsin(-e_Inverse_Sqrt_2) + e_Quarter_Pi; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Arccos(0) - Pi/2: Estimated error = "); + Delta_2 := Arccos(Zero) - Two_Digit*e_Quarter_Pi; + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + put ("Arccos(-1) - Pi: Estimated error = "); + Delta_2 := (Arccos(-One) - e_Pi); + put (e_Real_Image (Delta_2, Aft => Integer'Min (10, e_Real_Decimals))); + new_line(1); + + + Pause ( + "Test Exp (X+Y) = Exp (X) * Exp (Y) using integer valued Arguments.", + "500_000 trials are performed for each number printed below:" + ); + + Test_Exp_w_Integer_Args; + + + new_line; + Pause ( + "Test of Sqrt routine.", + "50_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+134; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+74; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-4; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-14; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-34; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-70; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-173; + Test_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-273; + Test_Sqrt (Test_Vector_Seed); + + new_line; + Pause ( + "Test of Sqrt routine using the ""**"" operator.", + "10_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+134; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+74; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-4; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-14; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-34; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-70; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-173; + Test_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-273; + Test_Sqrt_2 (Test_Vector_Seed); + + new_line; + Pause ( + "Test of the standard ""/"" routine.", + "Test calculates A - (A/B)*B.", + "Use of both ""/"" and ""*"" usually means a loss of 2 guard digits accuracy.", + "100_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+273; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+123; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+08; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-18; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-28; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-39; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-78; + Test_Divide_stnd(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-178; + Test_Divide_stnd(Test_Vector_Seed); + + new_line; + Pause ( + "Test of Newton-Raphson Divide (A,B) routine.", + "Test calculates A - Divide (A,B)*B. The Newton-Raphson Divide is designed", + "to minimize A - Divide (A,B)*B, and does so consistently better than the", + "stnd ""/"" (schoolboy algorithm). It doesn't do the division more accurately.", + "It is however quite a bit faster than the standard ""/"" operator.", + "100_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+273; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+123; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+08; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-18; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-28; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-39; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-78; + Test_Divide(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-178; + Test_Divide(Test_Vector_Seed); + + + new_line; + Pause ( + "Test of Reciprocal routine Reciprocal(A) = 1/A.", + "Test calculates 1.0 - A*Reciprocal(A). The Newton-Raphson iteration is designed", + "to minimize 1.0 - A*Reciprocal(A), and does so consistently better than using", + "the standard ""/"" to get 1.0/A. It doesn't do the division more accurately.", + "It is however quite a bit faster than the standard ""/"" operator.", + "50_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+273; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+123; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+10; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+08; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-18; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-28; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-39; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-78; + Test_Reciprocal(Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-178; + Test_Reciprocal(Test_Vector_Seed); + Pause ( + "Test of Reciprocal_Sqrt routine.", + "50_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+273; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+173; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-4; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-14; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-34; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-70; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-173; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-273; + Test_Reciprocal_Sqrt (Test_Vector_Seed); + + new_line; + Pause ( + "Test of 1 / Sqrt routine using the ""**"" operator.", + "10_000 tests are performed for each number printed below:" + ); + + Test_Vector_Seed := 1.2345657891234E+273; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+173; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-4; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-14; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-34; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 9.9345657891234E-70; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-173; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345657891234E-273; + Test_Reciprocal_Sqrt_2 (Test_Vector_Seed); + + + Pause ( + "Test of Reciprocal_Nth_Root routine.", + "5_000 tests are performed for each number printed below:" + ); + + for N in 1..4 loop + + new_line; + put ("N = "); put(Integer'Image(N)); new_line; + + Test_Vector_Seed := 1.2345657891234E+123; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+18; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+3; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+1; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-1; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-3; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 9.9345657891234E-14; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-17; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-33; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-70; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-112; + Test_Root(Test_Vector_Seed, N); + + end loop; + + for M in 17..18 loop + + N := 123*M; + + new_line; + put ("N = "); put(Integer'Image(N)); new_line; + + Test_Vector_Seed := 1.2345657891234E+34; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+27; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E+8; + Test_root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-8; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-17; + Test_Root(Test_Vector_Seed, N); + + Test_Vector_Seed := 1.2345657891234E-33; + Test_Root(Test_Vector_Seed, N); + + end loop; + + -- Reciprocal(Z2) is generally 2 or more times faster than "/" + + -- declare Char : Character := ' '; begin + --Z1 := Junk1 * (+1.23456789123E+12); + --Z2 := Junk2 * (+1.234567891234E+00); + --new_line; + --put_Line ("Benchmark of division. Enter a real 1.0 to start: "); + --get (Char); + --put_line("Start bench 3, 5_000_000 divisions: "); + --for I in 1..5_000_000 loop + --Z1 := Z1 / Z2; + --end loop; + --put_line("End of bench 3."); + --new_line; + --put_Line ("Benchmark of alt. division. Enter a real 1.0 to start: "); + --get (Char); + --put_line("Start bench 4, 5_000_000 divisions: "); + --for I in 1..5_000_000 loop + --Z1 := Z1 * Reciprocal(Z2); + --end loop; + --put_line("End of bench 4."); + --end; + +end; + diff --git a/arbitrary/e_jacobi_eigen.adb b/arbitrary/e_jacobi_eigen.adb new file mode 100644 index 0000000..2ff1ccd --- /dev/null +++ b/arbitrary/e_jacobi_eigen.adb @@ -0,0 +1,304 @@ + +----------------------------------------------------------------------- +-- package body e_Jacobi_Eigen, extended precision Jacobi eigen-decomposition +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + + +package body e_Jacobi_Eigen is + + Reciprocal_Epsilon : constant Real := One / e_Real_Model_Epsilon; + + --------------------------------- + -- Get_Jacobi_Rotation_Factors -- + --------------------------------- + + -- all underflows are OK here. + -- no overflows are OK here. + -- so we test for Q / P overflows by calculating P / Q + + procedure Get_Jacobi_Rotation_Factors + (P, Q : in Real; + s : out Real; + tau : out Real; + Delta_D : out Real) + is + t, Gamma : Real; + -- function "/" (x, y : Real) return Real renames Divide; + -- faster than stnd "/", but scarcely matters. + begin + + s := Zero; + tau := Zero; + Delta_D := Zero; + + if Abs (Q) > e_Real_Safe_Min and then + Abs (P) > e_Real_Safe_Min and then + Abs (Q) < e_Real_Safe_Max and then + Abs (P) < e_Real_Safe_Max then + + -- Following Gamma is usually the more accurate. + + Gamma := P / (Abs (Q) + Sqrt (P*P + Q*Q)); + + if Q < Zero then Gamma := -Gamma; end if; + + -- usual case overwhelmingly. If you scale matrix to unit Norm, + -- then no overflows because Matrix norm is preserved, preventing + -- large P, Q if they are not large to begin with. (So the above + -- tests for < e_Real_Safe_Max would be unnecessary.) + -- (Requires scaling of matrix prior to decomposition to be + -- absolutely sure.) + + -- Should be able to do the following w/o any tests of p,q if don't care + -- about quality of answer in P < e_Safe_Small, Q < e_Safe_Small limit. + -- + --Gamma := P / (Abs(Q) + Sqrt (P*P + Q*Q) + e_Safe_Small); + --if Q < Zero then Gamma := -Gamma; end if; + + elsif Abs (Q) > Abs (P) then + + -- Abs (P) > 0 was a tested before arrival, which implies Abs (Q) > 0. + + t := P / Q; + Gamma := t / (One + Sqrt (One + t*t)); + -- Underflow OK; overflow not allowed. + + elsif Abs (P) >= Abs (Q) then + + t := Q / P; + Gamma := One / (Abs (t) + Sqrt (One + t*t)); + -- Underflow OK; overflow not allowed. + + if t < Zero then Gamma := -Gamma; end if; + + else + + return; -- must have hit some inf's. Use stnd rotation init'd above. + + end if; + + declare + c : Real; + begin + c := Reciprocal_Sqrt (One + Gamma*Gamma); -- Cosine (ok) + --c := Sqrt (One / (One + Gamma*Gamma)); -- Cosine + s := c * Gamma; -- Sine + tau := s / (One + c); -- -cos_minus_1_over_sin + Delta_D := P * Gamma; + end; + + end Get_Jacobi_Rotation_Factors; + + --------------------- + -- Eigen_Decompose -- + --------------------- + + procedure Eigen_Decompose + (A : in out Matrix; + Q_tr : out Matrix; + Eigenvals : out Col_Vector; + No_of_Sweeps_Performed : out Natural; + Total_No_of_Rotations : out Natural; + Start_Col : in Index := Index'First; + Final_Col : in Index := Index'Last; + Eigenvectors_Desired : in Boolean := False) + is + D : Col_Vector renames Eigenvals; + Z, B : Col_Vector; + + Max_Allowed_No_of_Sweeps : constant Positive := 256; -- badly scaled need lots + No_of_Preliminary_Sweeps : constant Positive := 14; + + --Reciprocal_Epsilon : constant Real := One / e_Real_Model_Epsilon; + -- Good stnd setting for the effective eps is: Real'Epsilon * Two**(-2). + -- Usually, Real'Epsilon := 2.0**(-50) for 15 digit Reals. + + Matrix_Size : constant Real_8 := Real_8(Final_Col) - Real_8(Start_Col) + 1.0; + No_of_Off_Diag_Elements : constant Real_8 := 0.5*Matrix_Size*(Matrix_Size-1.0); + Mean_Off_Diagonal_Element_Size : Real; + + s, g, h, tau : Real; -- Rutishauser variable names. + Q, Delta_D : Real; + Sum, Pivot, Threshold : Real; + begin + + -- Initialize all out parameters. D renames Eigenvals. + -- Q_tr starts as Identity; is rotated into set of Eigenvectors of A. + + Q_tr := (others => (others => Zero)); + for j in Index loop + Q_tr(j,j) := One; + end loop; + + Z := (others => Zero); + B := (others => Zero); + D := (others => Zero); + for j in Start_Col .. Final_Col loop -- assume A not all init + D(j) := A(j,j); + B(j) := A(j,j); + end loop; + + No_of_Sweeps_Performed := 0; + Total_No_of_Rotations := 0; + + if Matrix_Size <= 1.0 then return; end if; -- right answer for Size=1. + + + Sweep: for Sweep_id in 1 .. Max_Allowed_No_of_Sweeps loop + + Sum := Zero; + for N in Start_Col .. Final_Col-1 loop --sum off-diagonal elements + for I in N+1 .. Final_Col loop + Sum := Sum + Abs (A(N,I)); + end loop; + end loop; + Mean_Off_Diagonal_Element_Size := Sum / (+No_of_Off_Diag_Elements); + + + exit Sweep when Mean_Off_Diagonal_Element_Size < Min_Allowed_Real; + + + if Sweep_id > No_of_Preliminary_Sweeps then + Threshold := Zero; + else + Threshold := One * Mean_Off_Diagonal_Element_Size; + end if; + + for N in Start_Col .. Final_Col-1 loop + for I in N+1 .. Final_Col loop + + Pivot := A(N,I); + + -- Have to zero out sufficiently small A(I,N) to get convergence, + -- ie, to get Off_Diag_Sum -> 0.0. + -- After 4 sweeps all A(I,N) are small so that + -- A(I,N) / Epsilon will never overflow. The test is + -- A(I,N) / Epsilon <= Abs D(I) and A(I,N) / Epsilon <= Abs D(N). + + if + (Sweep_id > No_of_Preliminary_Sweeps) and then + (Reciprocal_Epsilon * Abs (Pivot) <= Abs D(N)) and then + (Reciprocal_Epsilon * Abs (Pivot) <= Abs D(I)) + then + + A(N,I) := Zero; + + elsif Abs (Pivot) > Threshold then + + Q := Half * (D(I) - D(N)); + + Get_Jacobi_Rotation_Factors (Pivot, Q, s, tau, Delta_D); + -- Pivot=A(N,I) + + D(N) := D(N) - Delta_D; -- Locally D is only used for threshold test. + D(I) := D(I) + Delta_D; + Z(N) := Z(N) - Delta_D; -- Z is reinitialized to 0 each sweep, so Z + Z(I) := Z(I) + Delta_D; -- sums the small d's 1st. Helps a tad. + + A(N,I) := Zero; + + for j in Start_Col .. N-1 loop + g := A(j,N); + h := A(j,I); + A(j,N) := g-s*(h+g*tau); + A(j,I) := h+s*(g-h*tau); + end loop; + for j in N+1 .. I-1 loop + g := A(N,j); + h := A(j,I); + A(N,j) := g-s*(h+g*tau); + A(j,I) := h+s*(g-h*tau); + end loop; + for j in I+1 .. Final_Col loop + g := A(N,j); + h := A(I,j); + A(N,j) := g-s*(h+g*tau); + A(I,j) := h+s*(g-h*tau); + end loop; + + if Eigenvectors_Desired then + for j in Start_Col .. Final_Col loop + g := Q_tr(N,j); + h := Q_tr(I,j); + Q_tr(N,j) := g-s*(h+g*tau); + Q_tr(I,j) := h+s*(g-h*tau); + end loop; + end if; + + Total_No_of_Rotations := Total_No_of_Rotations + 1; + + end if; -- if (Sweep_id > No_of_Preliminary_Sweeps) + + end loop; --I loop (Col) + end loop; --N loop (Row) + + for j in Start_Col .. Final_Col loop -- assume A not all initialized + B(j) := B(j) + Z(j); + D(j) := B(j); + Z(j) := Zero; + end loop; + + end loop Sweep; --Sweep_id loop + + end Eigen_Decompose; + + --------------- + -- Sort_Eigs -- + --------------- + + procedure Sort_Eigs + (Eigenvals : in out Col_Vector; + Q_tr : in out Matrix; -- rows are the eigvectors + Start_Col : in Index := Index'First; + Final_Col : in Index := Index'Last; + Sort_Eigvecs_Also : in Boolean := False) + is + Max_Eig, tmp : Real; + Max_id : Index; + + begin + + if Start_Col < Final_Col then + for i in Start_Col .. Final_Col-1 loop + + Max_Eig := Eigenvals(i); Max_id := i; + + for j in i+1 .. Final_Col loop + if Eigenvals(j) > Max_Eig then + Max_Eig := Eigenvals(j); Max_id := j; + end if; + end loop; + + tmp := Eigenvals(i); + Eigenvals(i) := Max_Eig; + Eigenvals(Max_id) := tmp; + + -- swap rows of Q_tr: + + if Sort_Eigvecs_Also then + for k in Start_Col .. Final_Col loop + tmp := Q_tr(i,k); + Q_tr(i,k) := Q_tr(Max_id,k); + Q_tr(Max_id,k) := tmp; + end loop; + end if; + + end loop; + end if; + + end Sort_Eigs; + +end e_Jacobi_Eigen; + diff --git a/arbitrary/e_jacobi_eigen.ads b/arbitrary/e_jacobi_eigen.ads new file mode 100644 index 0000000..db75f7b --- /dev/null +++ b/arbitrary/e_jacobi_eigen.ads @@ -0,0 +1,124 @@ + +-- PACKAGE e_Jacobi_Eigen +-- +-- Extended precision version of Jacobi's iterative algorithm for +-- eigen-decomposition of square real-valued symmetric matrices. +-- +-- PROCEDURE Eigen_Decompose +-- +-- Works on arbitrary diagonal blocks of input matrix. For other blocks just +-- copy the matrix to desired position; copy overhead is negligable compared +-- to the O(N^3) running time of the decomposition. +-- + +generic + + type e_Real is private; + + type Index is range <>; + type Matrix is array (Index, Index) of e_Real; + + type Real_8 is digits <>; + -- Real_8 is for easy communication with e_Real. + -- Must be digits 15 or more. + -- The function "+" below translates Real_8 to e_Real. + + -- Exported by Extended_Real: + + type e_Integer is range <>; + + with function e_Real_Model_Epsilon return e_Real is <>; + with function e_Real_Machine_Emin return e_Integer is <>; + with function e_Real_Machine_Emax return e_Integer is <>; + with function e_Real_Machine_Radix return Real_8 is <>; + --with function Exponent (X : e_Real) return e_Integer is <>; + + with function "*" (X : e_Real; Y : e_Real) return e_Real is <>; + with function "/" (X : e_Real; Y : e_Real) return e_Real is <>; + with function "-" (X : e_Real; Y : e_Real) return e_Real is <>; + with function "+" (X : e_Real; Y : e_Real) return e_Real is <>; + with function "**" (X : e_Real; I : Integer) return e_Real is <>; + with function "<=" (X : e_Real; Y : e_Real) return Boolean is <>; + with function ">=" (X : e_Real; Y : e_Real) return Boolean is <>; + with function "<" (X : e_Real; Y : e_Real) return Boolean is <>; + with function ">" (X : e_Real; Y : e_Real) return Boolean is <>; + with function "-" (X : e_Real) return e_Real is <>; + with function "+" (X : Real_8) return e_Real is <>; + with function "Abs" (X : e_Real) return e_Real is <>; + --with function "=" (X : e_Real; Y : e_Real) return Boolean is <>; + + -- Exported by Extended_Real.Elementary_Functions: + + with function Sqrt (X : e_Real) return e_Real is <>; + with function Reciprocal_Sqrt (X : e_Real) return e_Real is <>; --1/Sqrt(X) + --with function Divide (X, Y : e_Real) return e_Real is <>; -- X/Y + --with function "**" (X : e_Real; Y : e_Real) return e_Real is <>; + --with function Cos (X : e_Real) return e_Real is <>; + --with function Sin (X : e_Real) return e_Real is <>; + +package e_Jacobi_Eigen is + + subtype Real is e_Real; + + type Col_Vector is array(Index) of Real; + + + -- PROCEDURE Eigen_Decompose + -- + -- Standard Jacobi iterative eigendecomposition. The routine returns + -- eigenvectors and eigenvalues of any real-valued square symmetric matrix. + -- + -- The orthonormal (unordered) eigenvectors are the Columns of Q. + -- The orthonormal (unordered) eigenvectors are returned as the Rows of Q'=Q_tr. + -- Eigenvals (returned in array Eigenvals) are ordered the same as Eigvecs in Q. + -- So A = QEQ'. The diagonal elements of diagonal matrix E are the eigvals. + -- The routine performs the eigen-decomposition on arbitrary square + -- diagonal blocks of matrix A. + -- It is assumed the blocks are symmetric. + -- The upper left corner of the square matrix is (Start_Col, Start_Col). + -- The lower rgt corner of the square matrix is (Final_Col, Final_Col). + -- Matrix A doesn't need to be positive definite, or semi-definite. + -- If Eigenvectors_Desired = False, then Q_tr is not calculated. + -- + -- Routine only sees and operates on the upper triangle of matrix. + -- + -- Input matrix A is destroyed. Save a copy of A if you need it. + -- + -- Eigenvectors of A are returned as the ROWS of matrix: Q_tr + -- + -- so Q_tr * A * Q = Diagonal_Eigs + -- + procedure Eigen_Decompose + (A : in out Matrix; -- destroyed + Q_tr : out Matrix; -- rows of Q_tr are the eigvectors + Eigenvals : out Col_Vector; + No_of_Sweeps_Performed : out Natural; + Total_No_of_Rotations : out Natural; + Start_Col : in Index := Index'First; + Final_Col : in Index := Index'Last; + Eigenvectors_Desired : in Boolean := False); + + + procedure Sort_Eigs + (Eigenvals : in out Col_Vector; + Q_tr : in out Matrix; -- rows of Q_tr are the eigvectors + Start_Col : in Index := Index'First; + Final_Col : in Index := Index'Last; + Sort_Eigvecs_Also : in Boolean := False); + +private + + Zero : constant Real := +0.0; + One : constant Real := +1.0; + Two : constant Real := +2.0; + Half : constant Real := +0.5; + + e_Radix : constant Real := +e_Real_Machine_Radix; + e_Real_Safe_Min : constant Real := e_Radix**Integer(e_Real_Machine_Emin / 4); + e_Real_Safe_Max : constant Real := e_Radix**Integer(e_Real_Machine_Emax / 4); + + Min_Exp : constant Integer := Integer (e_Real_Machine_Emin); + Min_Allowed_Real : constant Real := e_Radix**(Min_Exp/2 + Min_Exp/4); + +end e_Jacobi_Eigen; + diff --git a/arbitrary/e_jacobi_eigen_demo_1.adb b/arbitrary/e_jacobi_eigen_demo_1.adb new file mode 100644 index 0000000..e4ff5fc --- /dev/null +++ b/arbitrary/e_jacobi_eigen_demo_1.adb @@ -0,0 +1,531 @@ + +----------------------------------------------------------------------- +-- procedure e_jacobi_eigen_tst_1, test of extended precision Jacobi +-- Copyright (C) 2008-2009 Jonathan S. Parker. +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or +-- (at your option) any later version. +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see http://www.gnu.org/licenses/ +-- +-- As a special exception, if other files instantiate generics from +-- this unit, or you link this unit with other files to produce an +-- executable, this unit does not by itself cause the resulting +-- executable to be covered by the GNU General Public License. This +-- exception does not however invalidate any other reasons why the +-- executable file might be covered by the GNU Public License. +----------------------------------------------------------------------- + +-- Test Jacobi Eigendecomposition of real valued square matrices. + +with Ada.Numerics.Generic_Elementary_Functions; +with Extended_Real; +with Extended_Real.Elementary_Functions; +with Extended_Real.IO; +with E_Jacobi_Eigen; +With Text_IO; use Text_IO; + +procedure e_jacobi_eigen_demo_1 is + + type Real_8 is digits 15; + + package mth is new Ada.Numerics.Generic_Elementary_Functions (Real_8); + use mth; + package ext is new Extended_Real (Real_8); + use ext; + package fnc is new ext.Elementary_Functions (Sqrt, Log, Exp, Arcsin); + use fnc; + package eio is new ext.IO; -- extented IO + use eio; + package rio is new Text_IO.Float_IO (Real_8); + use rio; + package iio is new Integer_IO (Integer); + use iio; + + subtype Real is e_Real; + + subtype Index is Integer range 1..36; + + -- the test matrix is square-shaped matrix on: Index x Index. + -- eg Hilbert's matrix is a square matrix with unique elements on the range + -- Index'First .. Index'Last. However, you have the option or using any + -- diagonal sub-block of the matrix defined by Index x Index + + subtype Row_Index is Index; + subtype Col_Index is Index; + + Starting_Col : constant Index := Index'First + 0; + Final_Col : constant Index := Index'Last - 0; + + -- Can't change: + + Starting_Row : constant Index := Starting_Col; + Final_Row : constant Index := Final_Col; + + type Matrix is array(Index, Index) of Real; + + --pragma Convention (Fortran, Matrix); --No! prefers Ada convention. + + package eig is new e_Jacobi_Eigen (e_Real, Index, Matrix, Real_8, e_Integer); + use eig; + + -- Eig exports Col_Vector + + subtype Real_Extended is Real; -- general case, works fine + + Zero : constant Real := +0.0; + One : constant Real := +1.0; + Two : constant Real := +2.0; + + A, A_true, Q_tr : Matrix; + Eigenvals : Col_Vector; + Frobenius_QtrQ_Err, Frobenius_QQtr_Err, Frobenius_QEQ_Err : Real; + + No_of_Sweeps_Done, No_of_Rotations : Natural; + + N : constant Real_8 := Real_8 (Starting_Col) - Real_8 (Final_Col) + 1.0; + + ----------------- + -- Rayleigh_Eig -- + ----------------- + + function Rayleigh_Eig + (A : in Matrix; + Q_tr : in Matrix; + j : in Index) + return Real + is + V : Col_Vector := (others => Zero); + Sum : Real_Extended; + begin + + for Row in Row_Index range Starting_Row .. Final_Row loop + Sum := Zero; + for Col in Col_Index range Starting_Col .. Final_Col loop + Sum := Sum + Real_Extended (A(Row, Col)) * Real_Extended (Q_tr(j, Col)); + end loop; + V (Row) := Real (Sum); + end loop; + + Sum := Zero; + for Col in Col_Index range Starting_Col .. Final_Col loop + Sum := Sum + Real_Extended (V(Col)) * Real_Extended (Q_tr(j, Col)); + end loop; + + return Real (Sum); + + end Rayleigh_Eig; + + -------------------- + -- Frobenius_Norm -- + -------------------- + + function Frobenius_Norm + (A : in Matrix) + return Real + is + Max_A_Val : Real := Zero; + Sum, Scaling, tmp : Real := Zero; + begin + + Max_A_Val := Zero; + for Row in Starting_Row .. Final_Row loop + for Col in Starting_Col .. Final_Col loop + if Max_A_Val < Abs A(Row, Col) then Max_A_Val := Abs A(Row, Col); end if; + end loop; + end loop; + + Max_A_Val := Max_A_Val + Two ** Integer (e_Real_Machine_Emin + 8); + Scaling := One / Max_A_Val; + + Sum := Zero; + for Row in Starting_Row .. Final_Row loop + for Col in Starting_Col .. Final_Col loop + tmp := Scaling * A(Row, Col); + Sum := Sum + tmp * tmp; + end loop; + end loop; + + return Sqrt (Sum) * Max_A_Val; + + end Frobenius_Norm; + + ------------------------------------ + -- Get_Err_in_Reassembled_Q_and_A -- + ------------------------------------ + + -- check that A = V*E*Q_tr + -- E is diagonal with the eigs along the diag. + -- V is orthogonal with the eig vecs as columns. + + + procedure Get_Err_in_Reassembled_Q_and_A + (A : in Matrix; -- true original A + Q_tr : in Matrix; + E : in Col_Vector; + Final_Col : in Col_Index; + Starting_Col : in Col_Index; + Frobenius_QtrQ_Err : out Real; + Frobenius_QQtr_Err : out Real; + Frobenius_QEQ_Err : out Real) + is + Err, S : Real; + Min_Real : constant Real := Two ** Integer (e_Real_Machine_Emin + 8); + + Sum : Real_Extended; + + Identity : Matrix := (others => (others => Zero)); + Product_QQ : Matrix := (others => (others => Zero)); + Product_QEQ : Matrix := (others => (others => Zero)); + + subtype Index_Subrange is Index range Starting_Col .. Final_Col; + + begin + + for r in Index_Subrange loop + Identity(r, r) := One; + end loop; + + -- Find error in I - Q*Q' etc. + -- Notation: Q' == Q_tr == transpose of Q. + + for Col in Index_Subrange loop + for Row in Index_Subrange loop + Sum := Zero; + for j in Index_Subrange loop + Sum := Sum + Real_Extended (Q_tr(j, Row)) * Real_Extended (Q_tr(j, Col)); + end loop; + Product_QQ(Row, Col) := Real (Sum); + end loop; + end loop; + + -- Get Frobenius norm of: Product_QQ - I: + + S := Zero; + for Col in Index_Subrange loop + for Row in Index_Subrange loop + Err := Identity(Row, Col) - Product_QQ(Row, Col); + S := S + Err * Err; + end loop; + end loop; + + -- Get fractional Frobenius : Frobenius (Product_QQ - I) / Frobenius (I): + + Frobenius_QQtr_Err := Sqrt(S) / + Sqrt (Make_Extended (Real_8(Starting_Col)+Real_8(Final_Col)+1.0)); + + + -- Find error in I - Q'*Q. + -- reuse array Product_QQ: + + for Col in Index_Subrange loop + for Row in Index_Subrange loop + Sum := Zero; + for j in Index_Subrange loop + Sum := Sum + Real_Extended (Q_tr(Row, j)) * Real_Extended (Q_tr(Col, j)); + end loop; + Product_QQ(Row, Col) := Real (Sum); + end loop; + end loop; + + -- Get Frobenius norm of: Product_QQ - I: + + S := Zero; + for Col in Index_Subrange loop + for Row in Index_Subrange loop + Err := Identity(Row, Col) - Product_QQ(Row, Col); + S := S + Err * Err; + end loop; + end loop; + + -- Get fractional Frobenius : Frobenius (Product_QQ - I) / Frobenius (I): + + Frobenius_QtrQ_Err := Sqrt(S) / + Sqrt (Make_Extended (Real_8(Starting_Col)+Real_8(Final_Col)+1.0)); + + -- check that A = Q*E*Q_tr + -- E is diagonal with the eigs along the diag. + -- Q is orthogonal with the eig vecs as columns. + + -- explicitly calculate Q*E*Q_tr: + + for Col in Index_Subrange loop + for Row in Index_Subrange loop + Sum := Zero; + for j in Index_Subrange loop + Sum := Sum + + Real_Extended (Q_tr(j, Row)) * -- Q(Row, j) + Real_Extended (E(j)) * -- j-th eig is const along Q col + Real_Extended (Q_tr(j, Col)); + end loop; + Product_QEQ(Row, Col) := Real (Sum); + end loop; + end loop; + + -- resuse array Product_QEQ to get Error Matrix := Product_QEQ - A: + + for Col in Starting_Col .. Final_Col loop + for Row in Starting_Row .. Final_Row loop + Product_QEQ(Row, Col) := A(Row, Col) - Product_QEQ(Row, Col); + end loop; + end loop; + + Frobenius_QEQ_Err := Frobenius_Norm (Product_QEQ) / + (Frobenius_Norm (A) + Min_Real); + + end Get_Err_in_Reassembled_Q_and_A; + + ----------- + -- Pause -- + ----------- + + procedure Pause (s0,s1,s2,s3,s4,s5,s6,s7,s8,s9 : string := "") is + Continue : Character := ' '; + begin + new_line; + if S0 /= "" then put_line (S0); end if; + if S1 /= "" then put_line (S1); end if; + if S2 /= "" then put_line (S2); end if; + if S3 /= "" then put_line (S3); end if; + if S4 /= "" then put_line (S4); end if; + if S5 /= "" then put_line (S5); end if; + if S6 /= "" then put_line (S6); end if; + if S7 /= "" then put_line (S7); end if; + if S8 /= "" then put_line (S8); end if; + if S9 /= "" then put_line (S9); end if; + new_line; + begin + put ("Type a character to continue: "); + get_immediate (Continue); + exception + when others => null; + end; + new_line; + end pause; + + ------------------------ + -- Get_Hilbert_Matrix -- + ------------------------ + + procedure Get_Hilbert_Matrix + (A : out Matrix) + is + Prime_Factors, Denominator : Real; + begin + + --Prime_Factors := 3.0*3.0*3.0*5.0*5.0*7.0*11.0*13.0*17.0*19.0*23.0*29.0*31.0*37.0; + -- so Prime_Factors / D is exactly represented in 15 digit floating point + -- up to D = 39 (allowing an 20x20 matrix). Prime_Factors = 166966608033225.0 + + --Prime_Factors := (+166966608033225.0) * (+580027.0) * Two**(-68); + --Prime_Factors := One; + Prime_Factors := (+0.25); + + for Row in Starting_Col .. Final_Col loop + for Col in Starting_Col .. Final_Col loop + Denominator := +(Real_8(Row) + Real_8(Col) - 2.0*Real_8(Starting_Col) + 1.0); + A(Row, Col) := Prime_Factors / Denominator; + end loop; + end loop; + end Get_Hilbert_Matrix; + + pragma Inline (Get_Hilbert_Matrix); + + ---------------------------------- + -- Print_Extended_Real_Settings -- + ---------------------------------- + + procedure Print_Extended_Real_Settings + is + Bits_In_Radix : constant := Desired_No_Of_Bits_In_Radix; + begin + new_line(1); + put (" Desired_Decimal_Digit_Precision ="); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + new_line(1); + put ("Number of decimal digits of precision requested: "); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + put ("Number of digits in use (including 2 guard digits): "); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + new_line(1); + put ("These digits are not decimal; they have Radix: 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); + new_line(1); + put ("In other words, each of these digits is in range: 0 .. 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); put (" - 1."); + new_line(1); + put ("Number of decimal digits per actual digit is approx: 9"); + new_line(2); + put("Guard digits (digits of extra precision) are appended to the end of"); + new_line(1); + put("each number. There are always 2 guard digits. This adds up to 18"); + new_line(1); + put("decimal digits of extra precision. The arithmetic operators, (""*"","); + new_line(1); + put("""/"", ""+"" etc) usually produce results that are correct to all"); + new_line(1); + put("digits except the final (guard) digit."); + new_line(2); + put("If a number is correct to all digits except the final (guard) digit,"); + new_line(1); + put("expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon / (One+One)**Bits_In_Radix, Aft => 20)); + new_line(2); + put("If you lose 2 digits of accuracy (i.e. both guard digits) instead"); + new_line(1); + put("of 1 (as in the above case) then you lose another 9 decimal digits"); + new_line(1); + put("of accuracy. In this case expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon, Aft => 20)); + new_line(1); + + Pause; + + end Print_Extended_Real_Settings; + +begin + + Print_Extended_Real_Settings; + + Pause( + "Test 1: Jacobi Eigendecomposition of matrix A.", + " ", + "The Jacobi Eigendecomposition of A is successful if the identities Q*Q' = I,", + "Q'*Q = I and Q*E*Q' = A are satisfied. Here Q' denotes the transpose of Q, and", + "E is any diagonal matrix. (E will hold the Eigvals.) If 15 digit Reals are used,", + "then we expect the error in the calculation of A = Q*E*Q' to be (hopefully)", + "a few parts per 10**15. In other words ||Q*E*Q' - A|| / ||A|| should be a few", + "multiples of 10**(-15). Here ||*|| denotes the Frobenius Norm. Other matrix", + "norms give slightly different answers, so its an order of magnitude estimate." + ); + + -- + -- Get A = Q*E*Q_tr, or E = Q_tr*A*Q. + -- E is diagonal with the eigs along the diag. + -- V is orthogonal with the eig vecs as columns. + + Get_Hilbert_Matrix (A); + + -- Usually A is not symmetric. Eigen_Decompose doesn't care about + -- that. It uses the upper triangle of A, and pretends that A is + -- symmetric. But for subsequent analysis, we symmetrize: + + if false then -- use lower triangle of A to make a fully symmetric A: + + for Col in Starting_Col .. Final_Col loop + for Row in Col .. Final_Row loop + A(Col, Row) := A(Row, Col); -- write lower triangle of A to upper triangle + end loop; + end loop; + + else -- use upper triangle of A to make a fully symmetric A: + + for Col in Starting_Col .. Final_Col loop + for Row in Starting_Col .. Col loop + A(Col, Row) := A(Row, Col); -- write lower triangle of A to upper triangle + end loop; + end loop; + + end if; + + A_true := A; -- Save original A + + Eigen_Decompose + (A => A, -- A is destroyed + Q_tr => Q_tr, + Eigenvals => Eigenvals, + No_of_Sweeps_Performed => No_of_Sweeps_Done, + Total_No_of_Rotations => No_of_Rotations, + Final_Col => Final_Col, + Start_Col => Starting_Col, + Eigenvectors_Desired => True); + + Sort_Eigs + (Eigenvals => Eigenvals, + Q_tr => Q_tr, + Start_Col => Starting_Col, + Final_Col => Final_Col, + Sort_Eigvecs_Also => True); + + + Get_Err_in_Reassembled_Q_and_A + (A => A_True, + Q_tr => Q_tr, + E => Eigenvals, + Final_Col => Final_Col, + Starting_Col => Starting_Col, + Frobenius_QtrQ_Err => Frobenius_QtrQ_Err, + Frobenius_QQtr_Err => Frobenius_QQtr_Err, + Frobenius_QEQ_Err => Frobenius_QEQ_Err); + + -- Froebenius norm fractional error: + -- Max_Error_F = ||Err_Matrix|| / ||A|| + + new_line; + new_line; + put(" No of sweeps performed, and Total_No_of_Rotations / (N*(N-1)/2) ="); + new_line; + put(Real_8 (No_of_Sweeps_Done)); + put(Real_8 (No_of_Rotations) / (N*(N-1.0)/2.0)); + new_line; + put(" Err in I-Q*Q' (Q = orthogonal) is ||I-Q*Q'|| / ||I|| ="); + put(e_Real_Image (Frobenius_QQtr_Err)); + new_line; + put(" Err in I-Q'*Q (Q = orthogonal) is ||I-Q'*Q|| / ||I|| ="); + put(e_Real_Image (Frobenius_QtrQ_Err)); + new_line; + put(" Err in A-Q*E*Q' (E = eigenvals) is ||A-Q*E*Q'|| / ||A|| ="); + put(e_Real_Image (Frobenius_QEQ_Err)); + new_line; + + Pause( + "Test 2: Estimate of error in eigenvalues.", + " ", + "Actual error can be calculated accurately by rerunning the calculation", + "with higher precision, but the following method works very well. Simply ", + "subtract the calculated eigenvalues from the Rayleigh eigs. In other", + "words, calculate Err = E - Q'*A*Q, where the column vectors of Q are the", + "calculated Eigenvectors of A, and the diagonal of E contains the", + "calculated Eigenvalues of A. The diagonal elements of Err follow:" + ); + + new_line (1); + for I in Starting_Col .. Final_Col loop + new_line; + put("Eigenvalue number"); put (Index'Image (I)); + new_line; + put("Eigenvalue = "); + put(e_Real_Image (Eigenvals(I))); + new_line (1); + put("Err in Eigenval = "); + put(e_Real_Image ((Rayleigh_Eig(A_True, Q_tr, I)-Eigenvals(I)))); + end loop; + new_line (2); + + Pause( + "Final comment.", + "Errors were unusually small here. (Got the 1st guard digit right.)", + "The Jacobi Eigendecomposition is good at minimizing accumulation of error.", + "In large flt. pt. calculations, you should expect to get both guard digits", + "wrong (i.e. lose an additional 9 decimal digits of accuracy).", + "That's why 2 guard digits are used." + ); + Pause( + "Also worth noting that no rounding was done in the Eigendecomposition", + "arithmetic. Rounding would lower accuracy and slow down the execution.", + "In other algorithms rounding may be beneficial or necessary. It can be", + "done intermittantly, not at all, or frequently. Calls to routines that", + "perform rounding are inserted by the user, entirely at his discretion." + ); + +end; diff --git a/arbitrary/e_jacobi_eigen_tst_1.adb b/arbitrary/e_jacobi_eigen_tst_1.adb new file mode 100644 index 0000000..c7aaa0b --- /dev/null +++ b/arbitrary/e_jacobi_eigen_tst_1.adb @@ -0,0 +1,127 @@ + +-- Demonstrate Jacobi Eigendecomposition of real valued square matrices. + +with Ada.Numerics.Generic_Elementary_Functions; +with extended_real; +with extended_real.elementary_functions; +with extended_real.io; +with e_jacobi_eigen; +with text_io; use text_io; + +procedure e_jacobi_eigen_tst_1 is + + type Real_8 is digits 15; + + package mth is new Ada.Numerics.Generic_Elementary_Functions (Real_8); + use mth; + package ext is new Extended_Real (Real_8); + use ext; + package eio is new ext.IO; + use eio; + package fnc is new Ext.Elementary_Functions (Sqrt, Log, Exp, Arcsin); + use fnc; + + subtype Real is e_Real; + + subtype Index is Integer range 1..36; + + -- the test matrix is square-shaped matrix on: Index x Index. + -- eg Hilbert's matrix is a square matrix with unique elements on the range + -- Index'First .. Index'Last. However, you have the option or using any + -- diagonal sub-block of the matrix defined by Index x Index + + subtype Row_Index is Index; + subtype Col_Index is Index; + + Starting_Col : constant Index := Index'First + 0; + Final_Col : constant Index := Index'Last - 0; + + -- Can't change: + + Starting_Row : constant Index := Starting_Col; + Final_Row : constant Index := Final_Col; + + type Matrix is array(Index, Index) of Real; + + --pragma Convention (Fortran, Matrix); --No! prefers Ada convention. + + package eig is new e_Jacobi_Eigen (e_Real, Index, Matrix, Real_8, e_Integer); + use eig; + + -- Eig exports Col_Vector + + subtype Real_Extended is Real; -- general case, works fine + + Zero : constant Real := +0.0; + One : constant Real := +1.0; + Two : constant Real := +2.0; + + A, A_true, Q_tr : Matrix; + Eigenvals : Col_Vector; + Frobenius_QtrQ_Err, Frobenius_QQtr_Err, Frobenius_QEQ_Err : Real; + + No_of_Sweeps_Done, No_of_Rotations : Natural; + + N : constant Real_8 := Real_8 (Final_Col) - Real_8 (Starting_Col) + 1.0; + + ------------------------ + -- Get_Hilbert_Matrix -- + ------------------------ + + procedure Get_Hilbert_Matrix + (A : out Matrix) + is + Prime_Factors, Denominator : Real; + begin + + --Prime_Factors := 3.0*3.0*3.0*5.0*5.0*7.0*11.0*13.0*17.0*19.0*23.0*29.0*31.0*37.0; + -- so Prime_Factors / D is exactly represented in 15 digit floating point + -- up to D = 39 (allowing an 20x20 matrix). Prime_Factors = 166966608033225.0 + + Prime_Factors := (+166966608033225.0) * (+580027.0) * Two**(-68); + + for Row in Starting_Col .. Final_Col loop + for Col in Starting_Col .. Final_Col loop + Denominator := +(Real_8(Row) + Real_8(Col) - 2.0*Real_8(Starting_Col) + 1.0); + A(Row, Col) := Prime_Factors / Denominator; + end loop; + end loop; + end Get_Hilbert_Matrix; + + pragma Inline (Get_Hilbert_Matrix); + +begin + + -- + -- Get A = Q*E*Q_tr, or E = Q_tr*A*Q. + -- E is diagonal with the eigs along the diag. + -- V is orthogonal with the eig vecs as columns. + + Get_Hilbert_Matrix (A); + + A_true := A; -- Save original A + + Eigen_Decompose + (A => A, -- A is destroyed + Q_tr => Q_tr, + Eigenvals => Eigenvals, + No_of_Sweeps_Performed => No_of_Sweeps_Done, + Total_No_of_Rotations => No_of_Rotations, + Final_Col => Final_Col, + Start_Col => Starting_Col, + Eigenvectors_Desired => True); + + new_line; + put ("Matrix size (N): "); + put (Real_8'Image(N)); + new_line; + put ("First Eigenvalue: "); + put (e_Real_Image(Eigenvals(Starting_Col))); + new_line; + put ("Smallest Eigenvalue: "); + put (e_Real_Image(Eigenvals(Final_Col))); + new_line; + put ("No_of_Rotations / (N(N-1)/2): "); + put (Real_8'Image(Real_8(No_of_Rotations) / (0.5*N*(N-1.0)))); + +end; diff --git a/arbitrary/e_real_demo_1.adb b/arbitrary/e_real_demo_1.adb new file mode 100644 index 0000000..e29dee1 --- /dev/null +++ b/arbitrary/e_real_demo_1.adb @@ -0,0 +1,702 @@ + +with Text_IO; use Text_IO; +with Ada.Numerics.Generic_Elementary_Functions; +with Extended_Real; +with Extended_Real.Elementary_Functions; +with Extended_Real.IO; + +procedure e_real_demo_1 is + + type Real is digits 15; + + package mth is new Ada.Numerics.Generic_Elementary_Functions(Real); + use mth; + package ext is new Extended_Real (Real); + use ext; + package fnc is new Ext.Elementary_Functions (Sqrt, Log, Exp, Arcsin); + use fnc; + package rio is new Text_IO.Float_IO (Real); + use rio; + package iio is new Text_IO.Integer_IO (E_Integer); + use iio; + package eio is new Ext.IO; + use eio; + + Ten : constant E_Real:= +10.0; + + Last : Natural; + Y : E_Real; + + Blank_Str : constant String(1..1024) := (others => ' '); + Number_String : String(1..1024) := (others => ' '); + Seventy_Digits : constant String(1..74) := + "1.234567890123456789012345678901234567890123456789012345678901234567890E-1"; + + Radix : constant Real := E_Real_Machine_Radix; + + Z1, Z2 : E_Real; + + -- Attempt to make Junk constants of order one with non-zero's all over. + Junk1 : constant E_Real := One / (+3.0); + Junk2 : constant E_Real := (+17.0) / (+19.0); + + Test_Vector_Seed : Real; + + No_Decimal_Digits : constant := Desired_Decimal_Digit_Precision; + + ----------- + -- Pause -- + ----------- + + procedure Pause (s0,s1,s2,s3,s4,s5,s6,s7,s8,s9 : string := "") is + Continue : Character := ' '; + begin + new_line; + if S0 /= "" then put_line (S0); end if; + if S1 /= "" then put_line (S1); end if; + if S2 /= "" then put_line (S2); end if; + if S3 /= "" then put_line (S3); end if; + if S4 /= "" then put_line (S4); end if; + if S5 /= "" then put_line (S5); end if; + if S6 /= "" then put_line (S6); end if; + if S7 /= "" then put_line (S7); end if; + if S8 /= "" then put_line (S8); end if; + if S9 /= "" then put_line (S9); end if; + new_line; + begin + put ("Type a character to continue: "); + get_immediate (Continue); + exception + when others => null; + end; + end Pause; + + ---------------------------------- + -- Print_Extended_Real_Settings -- + ---------------------------------- + + procedure Print_Extended_Real_Settings + is + Bits_In_Radix : constant := Desired_No_Of_Bits_In_Radix; + begin + new_line(1); + put (" Desired_Decimal_Digit_Precision ="); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + new_line(1); + put ("Number of decimal digits of precision requested: "); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + put ("Number of digits in use (including 2 guard digits): "); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + new_line(1); + put ("These digits are not decimal; they have Radix: 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); + new_line(1); + put ("In other words, each of these digits is in range: 0 .. 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); put (" - 1."); + new_line(1); + put ("Number of decimal digits per actual digit is approx: 9"); + new_line(2); + put("Guard digits (digits of extra precision) are appended to the end of"); + new_line(1); + put("each number. There are always 2 guard digits. This adds up to 18"); + new_line(1); + put("decimal digits of extra precision. The arithmetic operators, (""*"","); + new_line(1); + put("""/"", ""+"" etc) usually produce results that are correct to all"); + new_line(1); + put("digits except the final (guard) digit."); + new_line(2); + put("If a number is correct to all digits except the final (guard) digit,"); + new_line(1); + put("expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon / (One+One)**Bits_In_Radix, aft => 10)); + new_line(2); + put("If you lose 2 digits of accuracy (i.e. both guard digits) instead"); + new_line(1); + put("of 1 (as in the above case) then you lose another 9 decimal digits"); + new_line(1); + put("of accuracy. In this case expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon, aft => 10)); + new_line(2); + put("In most large floating pt. calculations you lose both guard digits"); + new_line(1); + put("at the minimum. The best precision you can expect is given by"); + new_line(1); + put("the 2nd number above."); + new_line(1); + + if e_Real_Machine_Mantissa > 7 then + new_line(2); + put ("PRESENTLY USING:"); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + put (" DIGITS."); + new_line(1); + put ("Parts of this demo don't work great with more than 7 digits."); + end if; + + Pause; + + end Print_Extended_Real_Settings; + + + procedure Test_Rem (Test_Vector_Seed : Real) is + Difference, Max_Err : E_Real; + Z1, Z2 : E_Real := Zero; + Z4 : E_Real; + No_of_Trials : constant Integer := 20_000; + begin + + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Junk1; + Z2 := Z2 + (+Test_Vector_Seed) * Junk2; + + -- try to reproduce Z1: + + Z4 := Remainder (Z1, Z2) + Z2 * Unbiased_Rounding (Z1 / Z2); + -- Z4 should equal Z1. + + Difference := Abs (One - Z1 / Z4); + + --put (Make_Real (Z3 / Z2)); put (Make_Real (Z4 / Z2)); New_Line; + -- These quantities should always be in range [-0.5..0.5] + + if Difference > Max_Err then + Max_Err := Difference; + end if; + + end loop; + + put ("Result = "); + put (e_Real_Image (Max_Err)); + new_line; + + end Test_Rem; + + ----------------------------- + -- Test_Digit_Mult_and_Div -- + ----------------------------- + + procedure Test_Digit_Mult_and_Div + (Test_Vector_Seed : Real) + is + Difference, Max_Err : E_Real; -- init to 0 essential + Delta_Digit : constant Real := 50_000.0; + Real_Digit : Real := Radix + Delta_Digit - 1.0; + Digit1 : e_Digit; + Z1, Z2 : E_Real := Zero; + No_of_Trials : constant Integer := 20_000; + begin + Z1 := (+Test_Vector_Seed) * Junk1; + + for I in 1 .. No_of_Trials loop + + Real_Digit := Real_Digit - Delta_Digit; + Digit1 := Make_e_Digit (Real_Digit); + + Z2 := Digit1 * (Z1 / Digit1); + Difference := Abs (One - Z2 / Z1); + + if Difference > Max_Err then + Max_Err := Difference; + end if; + + end loop; + + put ("Result = "); + --put (e_Real_Image (Max_Err); + put (e_Real_Image (Max_Err, Aft => Integer'Min (No_Decimal_Digits, 50))); + new_line; + + end Test_Digit_Mult_and_Div; + + ----------------------- + -- Test_Mult_and_Add -- + ----------------------- + + -- uses Newton's method to calculate 1/(X). Square and invert to + -- compare with X; square and multiply with with X to compare w. 1.0. + + procedure Test_Mult_and_Add + (Test_Vector_Seed : Real) + is + + Difference, Max_Err : E_Real; + No_of_Trials : constant Integer := 20_000; + + function Inverse (X : E_Real) return E_Real is + X_isqr : E_Real; + Iterations : constant Integer := 24; + X_start : constant Real := 1.0 / Make_Real(X); + begin + X_isqr := Make_Extended (X_Start); + for I in 1..Iterations loop + X_isqr := X_isqr + (One - X * X_isqr) * X_isqr; + end loop; + return X_isqr; + end Inverse; + + begin + Z1 := Zero; + + for I in 1..No_of_Trials loop + Z1 := Z1 + (+Test_Vector_Seed) * Junk1; + + Z2 := Inverse (Z1); + Difference := Abs (One - Z1 * Z2); + + if Difference > Max_Err then + Max_Err := Difference; + end if; + + end loop; + + put ("Result = "); + put (e_Real_Image (Max_Err)); + new_line; + + end Test_Mult_and_Add; + + ----------------------- + -- Test_Mult_and_Div -- + ----------------------- + + procedure Test_Mult_And_Div + (Test_Vector_Seed : Real) + is + Difference, Max_Err : E_Real; + No_of_Trials : constant Integer := 20_000; + begin + for I in 1 .. No_of_Trials loop + Z1 := Z1 + Junk2 * (+Test_Vector_Seed); + Z2 := (Z1 * (One / Z1)); + --Z2 := (Z1 / Z1); + Difference := Abs (One - Z2); + + if Difference > Max_Err then + Max_Err := Difference; + end if; + + end loop; + + put ("Result = "); + put (e_Real_Image (Max_Err)); + new_line; + end Test_Mult_And_Div; + + + ------------------------ + -- Test_Make_Extended -- + ------------------------ + + procedure Test_Make_Extended (Test_Vector_Seed : Real) is + Max : Real; + Difference : Real; + Junk1, Junk2 : Real; + No_of_Trials : constant Integer := 20_000; + begin + Junk1 := 0.0; + Max := 0.0; + for I in 1 .. No_of_Trials loop + Junk1 := Junk1 + Test_Vector_Seed; + Junk2 := Make_Real (Make_Extended (Junk1)); + Difference := Junk2 - Junk1; + IF Abs(Difference) > Max THEN + Max := Difference; + END IF; + end loop; + put ("Max error in (X - Make_Real (Make_Extended(X))) = "); + put (Max); + new_line; + end Test_Make_Extended; + + +begin + + Print_Extended_Real_Settings; + + new_line(2); + put ("Next step: consider the following 70 digit number:"); + new_line(2); + put (" " & Seventy_Digits); + + new_line(1); + pause ( + "The 70 digit number printed above will be translated to binary,", + "(e_Real) then translated back to text and printed just below."); + + E_Real_Val (Seventy_Digits, Y, Last); + new_line; + put (E_Real_Image (Y)); + + new_line(1); + pause ( + "The 1st number at the top is correctly reproduced (usually) a few digits", + "beyond the advertized precision. Beyond that a few more wrong digits", + "are retained. The point at which the correct digits stop and the wrong", + "start varies, so it is usually best to keep them all for best accuracy." ); + new_line(1); + + + --pause ("You can round away a guard digit by calling function Machine(X):"); + --new_line; + --put (E_Real_Image (Machine (Y))); + --new_line(1); + + new_line(1); + new_line(1); + pause ( + "Usually you don't want to print all these digits, so use for example,", + "E_Real_Image (X, Aft => 15) to print 15 digits after the decimal.", + "This is not rounded to 15 digits. It just truncates the string.", + "Also, the min value for Aft is 12. It will always print at least 12." + ); + new_line(1); + E_Real_Val (Seventy_Digits, Y, Last); + new_line; + put (E_Real_Image (Y, Aft => 15)); + new_line(1); + + + --procedure E_Real_Val (X : in String; + -- Result: out E_Real; + -- Last : out Natural); + + Pause ("More of the same."); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..50) := "12345678901234567890123456789012345678901234567890"; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..52) := " 12345678901234567890123456789012345678901234567890"; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..53) := "12345678901234567890123456789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..54) := "12345678901234567890123456789012345678901234567890. "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..54) := "123456789012345678901234.56789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..56) := " .12345678901234567890123456789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..59) := "00000.12345678901234567890123456789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..55) := "+.12345678901234567890123456789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..55) := "-.12345678901234567890123456789012345678901234567890 "; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..58) := "-.12345678901234567890123456789012345678901234567890e-0111"; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..57) := "-.12345678901234567890123456789012345678901234567890E0111"; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line(1); + Number_String := (others => ' '); + Number_String(1..59) := ".1234567890123456789012345678901234567890123456789e99999999"; + new_line; put (" "); put (Number_String(1..60)); + new_line; put (" Translating this string to binary and back to text, I get:"); + E_Real_Val (Number_String, Y, Last); + new_line; put (" " & E_Real_Image (Y)); + + new_line; + pause ("Print a few numbers:"); + + new_line; put ("One:"); + new_line; put (E_Real_Image (One)); + new_line; put ("10**400:"); + new_line; put (E_Real_Image (Ten**400)); + new_line; put ("10**-400:"); + new_line; put (E_Real_Image (Ten**(-400))); + new_line; put ("10**-2899:"); + new_line; put (E_Real_Image (Zero + Ten**(-2899))); + new_line; put ("E_Real_Model_Epsilon:"); + new_line; put (E_Real_Image (E_Real_Model_Epsilon)); + new_line; put ("E_Real_Model_Epsilon + 10**-600:"); + new_line; put (E_Real_Image (E_Real_Model_Epsilon + Ten**(-600))); + new_line; put ("1 + E_Real_Model_Epsilon:"); + new_line; put (E_Real_Image (E_Real_Model_Epsilon + One)); + new_line; put ("1 + E_Real_Machine_Epsilon:"); + new_line; put (E_Real_Image (E_Real_Machine_Epsilon + One)); + new_line; put ("0.0:"); + new_line; put (E_Real_Image (Zero)); + new_line; + + + new_line(1); + pause (" The following tests the 2 functions: Make_Extended and Make_Real.", + " These 2 routines translate between ordinary 15 decimal digit floats", + " (Real) and extended precision floats (e_Real). Numbers in the ordinary", + " floating point type (Real) are transformed into extended precision floats", + " (e_Real) by calls to Make_Extended. e_Real's are transformed back to", + " Real's (with a loss of precision) by calls to Make_Real. Below we print:", + " ", + " Result := X - Make_Real (Make_Extended(X))", + " ", + " 2_000 values of X are used each test, and max Abs(Result) is printed."); + new_line; + + Test_Vector_Seed := 1.2345678912345678E-4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Make_Extended (Test_Vector_Seed); + end loop; + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Make_Extended (Test_Vector_Seed); + end loop; + + Test_Vector_Seed := 1.234567891234567891E+31; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E+8; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E-8; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E-31; + Test_Make_Extended (Test_Vector_Seed); + + + new_line(1); + pause (" Some tests of ""+"" and ""*"". The following is calculated: ", + " ", + " Result := One - X * Reciprocal (X)", + " ", + " where Reciprocal (X) = 1/X is obtained from Newton's method.", + " 2_000 values of X are used each test, and max Abs(Result) is printed."); + new_line; + + Test_Vector_Seed := 1.2345678912345678E+31; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-8; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-31; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-14; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Mult_and_Add (Test_Vector_Seed); + end loop; + Test_Vector_Seed := 1.2345678912345678E+14; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Mult_and_Add (Test_Vector_Seed); + end loop; + + pause ( + "Notice that the error is usually much smaller than you might expect", + "from the number of decimal digits requested. For simple operations", + "like ""*"", ""+"", and ""/"" the first of the 2 guard digits is usually,", + "calculated correctly, (and that's an extra 9 decimal digits of precision.", + "In fact 2 guard digits (18 decimal digits) are always used. The 2nd", + "guard digits is for safety, and so that Sqrt's etc are of similar accuracy", + "to operations like ""*""."); + new_line(1); + + + new_line(1); + pause (" Some tests of ""*"" and ""/"". The following is calculated:", + " ", + " Result := One - X * (One / X)", + " ", + " 2_000 values of X are used each test, and max Abs(Result) is printed."); + new_line; + + Test_Vector_Seed := 1.2345678912345678E-4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Mult_and_Div (Test_Vector_Seed); + end loop; + + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Mult_and_Div (Test_Vector_Seed); + end loop; + + Test_Vector_Seed := 1.2345678912345678E+31; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-8; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-31; + Test_Mult_And_Div (Test_Vector_Seed); + + + new_line; + pause (" Some tests of operations between e_Digit types and e_Real types.", + " e_Digits are special e_Reals that are small and efficient.", + " X is an e_Real and D is an e_Digit. The following is calculated:", + " ", + " Result := One - (D * (X / D)) / X)", + " ", + " 2_000 values of D are used each test, and max Abs(Result) is printed."); + new_line(2); + + Test_Vector_Seed := 1.2345678912345678E+61; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+28; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+27; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+26; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + end loop; + + Test_Vector_Seed := 1.2345678912345678E-4; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-7; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-18; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-28; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-31; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-91; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + + new_line; + pause (" Some simple tests of the exponentiation operator ""**"":"); + new_line; + + Z1 := (Make_Extended (7.2345)) ** (-277); + put (Make_Real(Z1)); put(" should be: "); put (7.2345**(-277)); new_line; + Z1 := (Make_Extended (7.2345)) ** 277; + put (Make_Real(Z1)); put(" should be: "); put (7.2345**277); new_line; + Z1 := (Make_Extended (1.2345)) ** 177; + put (Make_Real(Z1)); put(" should be: "); put (1.2345**177); new_line; + Z1 := (One + One + One) ** 97; + put (Make_Real(Z1)); put(" should be: "); put (3.0**97); new_line; + Z1 := (One + One) ** 67; + put (Make_Real(Z1)); put(" should be: "); put (2.0**67); new_line; + Z1 := (One + One) ** (-67); + put (Make_Real(Z1)); put(" should be: "); put (2.0**(-67)); new_line; + Z1 := (One + One) ** (0); + put (Make_Real(Z1)); put(" should be: "); put (2.0**(0)); new_line(1); + + new_line; + pause (" A test of Round_Away_Smallest_Guard_Digit and e_Real_Machine_Epsilon:"); + new_line; + + Z1 := (One + e_Real_Machine_Epsilon) - One; + Z1 := Z1 / e_Real_Machine_Epsilon; + put(" should be 1.0:"); put (Make_Real(Z1)); new_line; + + Z1 := Machine (One + e_Real_Machine_Epsilon) - One; + put(" should be 0.0:"); put (Make_Real(Z1)); new_line; + + Z2 := Make_Extended(0.99999999999999); + Z1 := Machine (Z2 + e_Real_Machine_Epsilon) - Z2; + Z1 := Z1 / e_Real_Machine_Epsilon; + put(" should be 1.0:"); put (Make_Real(Z1)); new_line; + + loop + begin + new_line(2); put ("Enter a number: "); + Number_String := Blank_Str; + --get_line (Number_String, LengthStr); + get_Line (Number_String, Last); + E_Real_Val (Number_String, Y, Last); + new_line; put ("I read this as: "); + new_line; put (E_Real_Image (Y)); + exit; + exception + when others => + put_line("Some error. Try again."); + end; + end loop; + + +end; diff --git a/arbitrary/e_real_io_tst_2.adb b/arbitrary/e_real_io_tst_2.adb new file mode 100644 index 0000000..af05125 --- /dev/null +++ b/arbitrary/e_real_io_tst_2.adb @@ -0,0 +1,200 @@ + +-- Simple test of "*", "/", "+", and "-" using random arguments. +-- Doesn't test endpoints very well. + +with Extended_Real; +with Extended_Real.e_Rand; +with Extended_Real.IO; +with Text_IO; use Text_IO; + +procedure e_real_io_tst_2 is + + type Real is digits 15; + + package ext is new Extended_Real (Real); + use ext; + package eio is new ext.IO; + use eio; + package rnd is new ext.e_Rand; + use rnd; + package rio is new Text_IO.Float_IO(Real); + use rio; + package iio is new Text_IO.Integer_IO(e_Integer); + use iio; + + X, Z1 : e_Real; + Last : Integer; + + Max_Error, Err : e_Real; + + Exp_First, Exp_Last : e_Integer; + + type Integer32 is range -2**31+1..2**31-1; + + Mult_Limit : constant Integer32 := 8_000_000; -- usually > N * 10^6 + -- Number of iterations of multiplication div test. The +/- tests + -- are 8 times this numbers. + + Some_Seed : constant Integer := 7251738; + -- Start at different rand stream. + + -------------------- + -- Get_Random_Exp -- + -------------------- + + -- Make Random e_Integer in range Exp_First..Exp_Last + -- + function Random_Exp + (Exp_First : in e_Integer; + Exp_Last : in e_Integer) + return e_Integer + is + Exp : e_Integer; + X : Real; + begin + + -- returns random Real in range [0, 1): + --X := Real (rnd.Next_Random_Int mod 2**12) * 2.0**(-12); + + -- returns random Real in range (0, 1]: + X := Real (1 + rnd.Next_Random_Int mod 2**24) * 2.0**(-24); + + Exp := Exp_First + e_Integer (X * (Real (Exp_Last) - Real (Exp_First))); + + return Exp; + + end; + + ------------- + -- Get_999 -- + ------------- + + -- Make an e_real full of digits that have the max value that any + -- any digit can have. + -- + function Get_999 return e_Real is + + Max_Digit : constant e_Digit := Make_e_Digit (e_Real_Machine_Radix-1.0); + + Next_Digit : e_Digit; + Delta_Exp : e_Integer; + Result : e_Real; -- Init to 0 important + + begin + + for I in 0 .. e_Real_Machine_Mantissa-1 loop + + Delta_Exp := -I - 1; + Next_Digit := Scaling (Max_Digit, Delta_Exp); + Result := Next_Digit + Result; + + end loop; + + return Result; + + end; + + ---------------------------------- + -- Print_Extended_Real_Settings -- + ---------------------------------- + + procedure Print_Extended_Real_Settings + is + Bits_In_Radix : constant := Desired_No_Of_Bits_In_Radix; + begin + new_line(1); + put (" Desired_Decimal_Digit_Precision ="); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + new_line(1); + put ("Number of decimal digits of precision requested: "); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(1); + put ("Number of digits in use (including 2 guard digits): "); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + new_line(1); + put ("These digits are not decimal; they have Radix: 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); + new_line(1); + put ("In other words, each of these digits is in range: 0 .. 2**("); + put (e_Integer'Image(Bits_In_Radix)); put(")"); put (" - 1."); + new_line(1); + put ("Number of decimal digits per actual digit is approx: 9"); + new_line(2); + put("Guard digits (digits of extra precision) are appended to the end of"); + new_line(1); + put("each number. There are always 2 guard digits. This adds up to 18"); + new_line(1); + put("decimal digits of extra precision. The arithmetic operators, (""*"","); + new_line(1); + put("""/"", ""+"" etc) usually produce results that are correct to all"); + new_line(1); + put("digits except the final (guard) digit."); + new_line(2); + put("If a number is correct to all digits except the final (guard) digit,"); + new_line(1); + put("expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon / (One+One)**Bits_In_Radix, aft => 10)); + new_line(2); + put("If you lose 2 digits of accuracy (i.e. both guard digits) instead"); + new_line(1); + put("of 1 (as in the above case) then you lose another 9 decimal digits"); + new_line(1); + put("of accuracy. In this case expect errors of the order:"); + new_line(2); + put(e_Real_Image (e_Real_Model_Epsilon, aft => 10)); + new_line(2); + put("The above number, by the way, is e_Real_Model_Epsilon."); + new_line(3); + + end Print_Extended_Real_Settings; + +begin + + rnd.Reset (Some_Seed); + + Print_Extended_Real_Settings; + + put ("The test translates binary to text, then back to binary, and prints"); + new_line(1); + put ("the difference: prints X - e_Real_Val (e_Real_Image (X)) over randomly"); + new_line(1); + put ("generated X's. 8_000_000 X's are used, and the max error is printed."); + new_line(1); + put ("The testing goes on for as many hours as you let it:"); + new_line(2); + + for repetitions in 1 .. 1_000_000 loop + + + Exp_First := e_Real_Machine_Emin+1; -- cause X has exp of -1 intitially. + Exp_Last := e_Real_Machine_Emax-1; + + -- The random num we make by scaling with Exp in [Exp_First, Exp_Last] + -- The function Random returns a rand in the range [0.0 .. 1.0). + -- The smallest exp of Random is Max_Available_Digits - 1. + + Max_Error := Zero; + + for I in Integer32 range 1 .. Mult_Limit loop + + X := Random * Get_999; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + e_Real_Val (e_Real_Image (X), Z1, Last); + Err := (Z1/X - One); + + if Err > Max_Error then + Max_Error := Err; + end if; + + end loop; + + new_line; + put ("Max Error ="); + put (e_Real_Image (Max_Error)); + + end loop; + +end; diff --git a/arbitrary/e_real_tst_1.adb b/arbitrary/e_real_tst_1.adb new file mode 100644 index 0000000..fd9bb33 --- /dev/null +++ b/arbitrary/e_real_tst_1.adb @@ -0,0 +1,435 @@ +-- +-- Simple tests of Extended_Real. Search for catastrophic +-- errors or portability problems. +-- +with Extended_Real; +with text_io; use text_io; +procedure e_real_tst_1 is + + type Real is digits 15; + + package Extend is new Extended_Real (Real); + use Extend; + package rio is new Text_IO.float_io(Real); + use rio; + package iio is new Text_IO.Integer_io(E_Integer); + use iio; + + Radix : constant Real := E_Real_Machine_Radix; + Radix_Minus_1 : constant Real := Radix - 1.0; + Radix_Minus_1_Squ : constant Real := Radix_Minus_1 * Radix_Minus_1; + + Z1, Z2, Z3, Z4 : E_Real; + + -- Attempt to make Junk constants of order one with non-zero's all over. + Junk1 : constant E_Real := One / (+3.0); + Junk2 : constant E_Real := (+17.0) / (+19.0); + + Test_Vector_Seed : Real; + + --********************************************************************** + procedure Test_Rem (Test_Vector_Seed : Real) is + Dig, Digit : Real; + Min, DeltaExp : E_Integer; + Difference1 : E_Real; + Z1, Z2 : E_Real := Zero; + Z4 : E_Real; + begin + Min := E_Integer'Last; + Dig := 0.0; + + new_line; + for I in 1..100 loop + Z1 := Z1 + Junk1; + Z2 := Z2 + (+Test_Vector_Seed) * Junk2; + + -- try to reproduce Z1: + + Z4 := Remainder (Z1, Z2) + Z2 * Unbiased_Rounding (Z1 / Z2); + -- Z4 should equal Z1. + + Difference1 := (Z4 - Z1) / Z4; + + --put (Make_Real (Remainder (Z1, Z2) / Z2)); new_line; + -- should always be in range [-0.5..0.5] + --put (Make_Real ((Z2*Unbiased_Rounding (Z1 / Z2)) / Z1)); New_Line; + + IF Are_Not_Equal (Difference1, Zero) THEN + + DeltaExp := Exponent(Difference1); + Digit := Make_Real (Fraction (Leading_Part (Difference1,1))); + IF Abs(DeltaExp) < Abs(Min) THEN + Min := DeltaExp; + Dig := Digit; + END IF; + + END IF; + + end loop; + + IF Min = E_Integer'Last THEN + Dig := 0.0; + Min := -E_Integer'Last + 1; + END IF; + + -- The above uses the normalized Exponent, which assumes a fractional 1st + -- Digit. Below we (effectively) multiply the first non-zero digit by Radix, + -- and subtract 1 from the "power of Radix" exponent. + put ("Approximate error ="); + put (Dig*Radix); put (" * Radix**("); put (E_Integer'Image(Min-1)); put(")"); + new_line; + + end Test_Rem; + + + procedure Test_Digit_Mult_and_Div (Test_Vector_Seed : Real) is + Dig, Digit : Real; + Min, DeltaExp : E_Integer; + Difference1 : E_Real; + Real_Digit : Real := Radix + 1999.0; + Digit1 : E_Digit; + Z1, Z2 : E_Real := Zero; + begin + Min := E_Integer'Last; + Z1 := (+Test_Vector_Seed) * Junk1; + + for I in 1..1000 loop + + Real_Digit := Real_Digit - 2000.0; + Digit1 := Make_E_Digit (Real_Digit); + + Z2 := Digit1 * (Z1 / Digit1); + Difference1 := (Z1 - Z2) / Z1; + + IF Are_Not_Equal (Difference1, Zero) THEN + + DeltaExp := Exponent(Difference1); + Digit := Make_Real (Fraction (Leading_Part (Difference1,1))); + IF Abs(DeltaExp) < Abs(Min) THEN + Min := DeltaExp; + Dig := Digit; + END IF; + + END IF; + + end loop; + + IF Min = E_Integer'Last THEN + Dig := 0.0; + Min := -E_Integer'Last + 1; + END IF; + + -- The above uses the normalized Exponent, which assumes a fractional 1st + -- Digit. Below we (effectively) multiply the first non-zero digit by Radix, + -- and subtract 1 from the "power of Radix" exponent. + put ("Approximate error ="); + put (Dig*Radix); put (" * Radix**("); put (E_Integer'Image(Min-1)); put(")"); + new_line; + + end Test_Digit_Mult_and_Div; + + ----------------------- + -- Test_Mult_and_Add -- + ----------------------- + + -- uses Newton's method to calculate 1/(X). Square and invert to + -- compare with X; square and multiply with with X to compare w. 1.0. + + procedure Test_Mult_and_Add (Test_Vector_Seed : Real) is + Dig, Digit : Real; + Min, DeltaExp : E_Integer; + Difference1 : E_Real; + + -- + function Reciprocal (X : E_Real) return E_Real is + X_isqr : E_Real; + X_start : constant Real := 1.0 / Make_Real(X); + Iterations : constant Integer := 9; + begin + X_isqr := Make_Extended (X_Start); + for I in 1..Iterations loop + X_isqr := X_isqr + (One - X * X_isqr) * X_isqr; + end loop; + return X_isqr; + end Reciprocal; + + begin + Min := E_Integer'Last; + Dig := 0.0; + Z1 := Zero; + + for I in 1..20 loop + Z1 := Z1 + (+Test_Vector_Seed) * Junk1; + + Z2 := Reciprocal (Z1); + Difference1 := One - Z1 * Z2; + + IF Are_Not_Equal (Difference1, Zero) THEN + + DeltaExp := Exponent(Difference1); + Digit := Make_Real (Fraction (Leading_Part (Difference1,1))); + IF Abs(DeltaExp) < Abs(Min) THEN + Min := DeltaExp; + Dig := Digit; + END IF; + + END IF; + + end loop; + + IF Min = E_Integer'Last THEN + Dig := 0.0; + Min := -E_Integer'Last + 1; + END IF; + + put ("Approximate error ="); + put (Dig*Radix); put (" * Radix**("); put (E_Integer'Image(Min-1)); put(")"); + new_line; + + end Test_Mult_and_Add; + + --********************************************************************** + procedure Test_Mult_And_Div (Test_Vector_Seed : Real) is + Dig, Digit : Real; + Min, DeltaExp : E_Integer; + Difference : E_Real; + begin + Min := E_Integer'Last; + Dig := 0.0; + for I in 1 .. 10_000 loop + Z1 := Z1 + Junk2 * (+Test_Vector_Seed); + Z2 := (Z1 * (One / Z1)); + Difference := One - Z2; + DeltaExp := Exponent(Difference); + Digit := Make_Real (Fraction (Leading_Part (Difference,1))); + IF Abs(DeltaExp) < Abs(Min) THEN + Min := DeltaExp; + Dig := Digit; + END IF; + end loop; + + put ("Approximate error ="); + put (Dig*Radix); put (" * Radix**("); put (E_Integer'Image(Min-1)); put(")"); + new_line; + end Test_Mult_And_Div; + + --********************************************************************** + procedure Test_Make_Extended (Test_Vector_Seed : Real) is + Max : Real; + Difference : Real; + Junk1, Junk2 : Real; + begin + Junk1 := 0.0; + Max := 0.0; + for I in 1..1000 loop + Junk1 := Junk1 + Test_Vector_Seed; + Junk2 := Make_Real (Make_Extended (Junk1)); + Difference := Junk2 - Junk1; + IF Abs(Difference) > Max THEN + Max := Difference; + END IF; + end loop; + put ("Max error in (X - Make_Real (Make_Extended(X))) = "); + put (Max); + new_line; + end Test_Make_Extended; + + --********************************************************************** +begin + + -- Simple test of "*" and "-": + Z1 := Make_Extended (Radix_Minus_1); + Z2 := Z1 * Z1; + Z3 := Z1 * Z2; + Z4 := Z2 - Make_Extended (Radix_Minus_1_Squ); + put(" Simple test of * and - : "); new_line; + put(Make_Real(Z1)); put(" This should be Radix - 1."); new_line; + put(Make_Real(Z2)); put(" This should be Radix - 1 squared."); new_line; + put(Make_Real(Z3)); put(" This should be Radix - 1 cubed."); new_line; + put(Make_Real(Z4)); put(" This should be near zero, but not quite."); new_line; + new_line; + + + new_line; + put(" Simple test of Remainder:"); + new_line; + + Test_Vector_Seed := 1.2345678912345678E+77; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+14; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-8; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-14; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-35; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-77; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-123; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-171; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-201; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-231; + Test_Rem (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-271; + Test_Rem (Test_Vector_Seed); + + + new_line; + put(" Simple test of mixed digit, extended operations:"); + new_line; + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Digit_Mult_and_Div (Test_Vector_Seed); + + new_line; + put(" Simple test of + and *:"); + new_line; + + Test_Vector_Seed := 1.2345678912345678E+131; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+81; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+31; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-8; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-31; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-81; + Test_Mult_and_Add (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-131; + Test_Mult_and_Add (Test_Vector_Seed); + + + --********************************************************************** + new_line; put ("Maximum available number of Digits is: "); + put (e_Real_Machine_Mantissa); new_line; + + new_line(2); put ("Some tests of +,*: "); new_line; + Test_Vector_Seed := 1.2345678912345678E-4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Mult_and_Add (Test_Vector_Seed); + end loop; + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Mult_and_Add (Test_Vector_Seed); + end loop; + + new_line(2); put ("Some tests of /,*: "); new_line; + Test_Vector_Seed := 1.2345678912345678E-4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Mult_and_Div (Test_Vector_Seed); + end loop; + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Mult_and_Div (Test_Vector_Seed); + end loop; + + new_line(2); put ("Some tests of Make_Extended: "); new_line; + Test_Vector_Seed := 1.2345678912345678E-4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed + 1.2345678912345678E-4; + Test_Make_Extended (Test_Vector_Seed); + end loop; + Test_Vector_Seed := 1.2345678912345678E+4; + for I in 1..10 loop + Test_Vector_Seed := Test_Vector_Seed * 1.2345678912345678E+4; + Test_Make_Extended (Test_Vector_Seed); + end loop; + + --********************************************************************** + new_line; + put(" Simple test of /:"); + new_line; + + Test_Vector_Seed := 1.2345678912345678E+31; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E+8; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-8; + Test_Mult_And_Div (Test_Vector_Seed); + + Test_Vector_Seed := 1.2345678912345678E-31; + Test_Mult_And_Div (Test_Vector_Seed); + + new_line; + put(" Simple test of Make_Extended:"); + new_line; + Test_Vector_Seed := 1.234567891234567891E+31; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E+8; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E-8; + Test_Make_Extended (Test_Vector_Seed); + + Test_Vector_Seed := 1.234567891234567891E-31; + Test_Make_Extended (Test_Vector_Seed); + + --********************************************************************** + new_line; + put(" Simple test of exponentiation:"); new_line; + Z1 := (Make_Extended (7.2345)) ** (-277); + put (Make_Real(Z1)); put(" should be: "); put (7.2345**(-277)); new_line; + Z1 := (Make_Extended (7.2345)) ** 277; + put (Make_Real(Z1)); put(" should be: "); put (7.2345**277); new_line; + Z1 := (Make_Extended (1.2345)) ** 177; + put (Make_Real(Z1)); put(" should be: "); put (1.2345**177); new_line; + Z1 := (One + One + One) ** 97; + put (Make_Real(Z1)); put(" should be: "); put (3.0**97); new_line; + Z1 := (One + One) ** 67; + put (Make_Real(Z1)); put(" should be: "); put (2.0**67); new_line; + Z1 := (One + One) ** (-67); + put (Make_Real(Z1)); put(" should be: "); put (2.0**(-67)); new_line; + Z1 := (One + One) ** (0); + put (Make_Real(Z1)); put(" should be: "); put (2.0**(0)); new_line(1); + + new_line; + put(" Quick test of Machine (rounds away smallest guard digit) and Model_Epsilon:"); + new_line; + Z1 := (One + e_Real_Model_Epsilon) - One; + Z1 := Z1 / e_Real_Model_Epsilon; + put(" should be 1.0:"); put (Make_Real(Z1)); new_line; + + Z1 := Machine (One + e_Real_Machine_Epsilon) - One; + put(" should be 0.0:"); put (Make_Real(Z1)); new_line; + + Z2 := Make_Extended(0.99999999999999); + Z1 := Machine (Z2 + e_Real_Model_Epsilon) - Z2; + Z1 := Z1 / e_Real_Model_Epsilon; + put(" should be 1.0:"); put (Make_Real(Z1)); new_line; + +end; diff --git a/arbitrary/e_real_tst_2.adb b/arbitrary/e_real_tst_2.adb new file mode 100644 index 0000000..7e2a471 --- /dev/null +++ b/arbitrary/e_real_tst_2.adb @@ -0,0 +1,459 @@ + +-- Simple test of "*", "/", "+", and "-" using random arguments. +-- Doesn't test endpoints very well. + +with Extended_Real; +with Extended_Real.e_Rand; +with Extended_Real.IO; +with Text_IO; use Text_IO; + +procedure e_real_tst_2 is + + type Real is digits 15; + + package ext is new Extended_Real (Real); + use ext; + package rnd is new ext.e_Rand; + use rnd; + package eio is new ext.IO; + use eio; + package rio is new Text_IO.Float_IO(Real); + use rio; + package iio is new Text_IO.Integer_IO(e_Integer); + use iio; + + X, Y, Z1 : e_Real; + Delta_X, Big_Digits : e_Real; + + Frac_Part : Real; + Exp_Part : e_Integer; + Frac_Error : Real; + Exp_Error : e_Integer; + Exp_First, Exp_Last : e_Integer; + Y_Exp_First, Y_Exp_Last : e_Integer; + Half_Digit : constant e_Digit := Make_e_Digit (0.5); + X_min : constant e_Real + := Scaling (Make_Extended(1.0), e_Real_Machine_Emin); + + type Integer32 is range -2**31+1..2**31-1; + + Mult_Limit : constant Integer32 := 100_000_000; -- usually > N * 10^6 + -- Number of iterations of multiplication div test. The +/- tests + -- are 8 times this numbers. + + Some_Seed : constant Integer := 9795178; + -- Start at different rand stream. + + --------- + -- Min -- + --------- + + function Min (X, Y : e_Integer) return e_Integer is + begin + return e_Integer'Min (X, Y); + end; + + --------- + -- Max -- + --------- + + function Max (X, Y : e_Integer) return e_Integer is + begin + return e_Integer'Max (X, Y); + end; + + -------------------- + -- Get_Random_Exp -- + -------------------- + + -- Make Random e_Integer in range Exp_First..Exp_Last + -- + function Random_Exp + (Exp_First : in e_Integer; + Exp_Last : in e_Integer) + return e_Integer + is + Exp : e_Integer; + X : Real; + begin + + -- returns random Real in range [0, 1): + + X := Real (rnd.Next_Random_Int mod 2**7) * 2.0**(-7); + + Exp := Exp_First + e_Integer (X * (Real (Exp_Last) - Real (Exp_First))); + + return Exp; + + end; + + ------------- + -- Get_999 -- + ------------- + + -- Make an e_real full of digits that have the max value that any + -- any digit can have. + -- + function Get_999 return e_Real is + + Max_Digit : constant e_Digit := Make_e_Digit (e_Real_Machine_Radix-1.0); + Digit_Last : constant e_Integer := e_Real_Machine_Mantissa; + + Next_Digit : e_Digit; + Delta_Exp : e_Integer; + Result : e_Real; -- Init to 0 important + + begin + + for I in e_Integer range 0..Digit_Last loop + + Delta_Exp := -I - 1; + Next_Digit := Scaling (Max_Digit, Delta_Exp); + Result := Next_Digit + Result; + + end loop; + + return Result; + + end; + + -------------------------- + -- Get_Normalized_Delta -- + -------------------------- + + -- Want something like Delta_X / (X + X_Min), but want to avoid + -- extended division and avoid possible exceptions. Here X_min is + -- say about Radix**(e_Real_Machine_Emin-1). + -- RETURNS Abs (Frac_Part). + -- + procedure Get_Normalized_Delta + (X : in e_Real; + Delta_X : in e_Real; + Frac_Part : out Real; + Exp_Part : out e_Integer) + is + X_tmp : e_Real := X; + begin + + IF (Are_Equal (Delta_X, Zero)) THEN + Frac_Part := 0.0; + Exp_Part := e_Integer'First; + return; + END IF; + + -- OK, so Delta_X is not Zero. + -- Find out if X is: + + IF (Are_Equal (X, Zero)) THEN + X_tmp := X_min; + END IF; + + -- OK, so Delta_X and X are not Zero. + -- Now attempt an approximation of Delta_X / (X + X_min): + + Exp_Part := Exponent (Delta_X) - Exponent (X_tmp); + Frac_Part := Abs (Make_Real (Fraction (Delta_X)) + / Make_Real (Fraction (X_tmp))); + + end Get_Normalized_Delta; + + ----------------------- + -- Print_e_Real_data -- + ----------------------- + + -- Want a fast and very approximate translation to decimal, without + -- linking to IO package. + + procedure Print_e_Real_data + (Frac_Part : in Real; + Exp_Part : in e_Integer) + is + Decimal_Digits_in_Mantissa : constant Real := + Real (e_Real_Machine_Mantissa*Desired_No_Of_Bits_In_Radix)/3.322; + Exp_02, Exp_10 : e_Integer; + Frac_Correction : Real; + begin + + -- go from radix**Exp_Part to 2**(30*Exp_10 + Exp_02): + + if Abs Exp_Part >= (e_Integer'Last-1) / 30 then -- Bits_In_Radix<=30 always. + Exp_10 := 0; + Exp_02 := 0; + else + Exp_10 := (Exp_Part * Desired_No_Of_Bits_In_Radix) / 30; + Exp_02 := (Exp_Part * Desired_No_Of_Bits_In_Radix) rem 30; + if Exp_02 < 0 then + Frac_Correction := 0.5 ** (Integer (Abs Exp_02)); + else + Frac_Correction := 2.0 ** (Integer (Abs Exp_02)); + end if; + end if; + + put(Frac_Part * Frac_Correction, aft => 3); + put(" * "); put("10**("); put(e_Integer'Image (Exp_10*9)); put(")"); + + + put(" or: "); + put(Frac_Part, aft => 3); put(" * "); + put("Radix**("); put(e_Integer'Image (Exp_Part)); put(")"); + + exception + when others => + put(" Some printing error here."); new_line; + + end Print_e_Real_data; + + ---------------------------------- + -- Print_Extended_Real_Settings -- + ---------------------------------- + + procedure Print_Extended_Real_Settings + is + Decimal_Digits_in_Mantissa : constant Real := + Real (e_Real_Machine_Mantissa*Desired_No_Of_Bits_In_Radix)/3.322; + begin + new_line(1); + put (" Desired_Decimal_Digit_Precision ="); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + new_line(2); + put ("Currently using "); + put (Integer'Image(Desired_Decimal_Digit_Precision)); + put (" (or more) decimal digits of precision."); + new_line(2); + put ("Number of digits in use (including 2 guard digits):"); + put (e_Integer'Image(e_Real_Machine_Mantissa)); + new_line(1); + put ("The digits are not decimal; they have Radix: 2**("); + put (e_Integer'Image(Desired_No_Of_Bits_In_Radix)); put(")"); + new_line(1); + put ("In other words, each digits is in the range: 0 .. 2**("); + put (e_Integer'Image(Desired_No_Of_Bits_In_Radix)); put(")"); put (" - 1."); + new_line(2); + put("If a number is correct to all digits except the last digit, expect:"); + new_line(2); + put("Fractional part of error: 1.0000000000E+00"); + put(" Error's exponent: "); put(e_Integer'Image(-e_Real_Machine_Mantissa+1)); + new_line(2); + put("An error of this size is of the order: 10**("); + put(Integer'Image (Integer (-Decimal_Digits_in_Mantissa+9.0))); put(")"); + new_line(2); + put("If you lose two digits of accuracy (i.e. both guard digits) instead of 1"); + new_line(1); + put("(as in the above case) then you lose another 9 decimal digits of"); + new_line(1); + put("accuracy. In this case expect errors of the order:"); + new_line(2); + put("Fractional part of error: 1.0000000000E+00"); + put(" Error's exponent: "); put(e_Integer'Image(-e_Real_Machine_Mantissa+2)); + new_line(2); + put("An error of this size is of the order: 10**("); + put(Integer'Image (Integer (-Decimal_Digits_in_Mantissa+18.0))); put(")"); + new_line(2); + end Print_Extended_Real_Settings; + +begin + + rnd.Reset (Some_Seed); + + Print_Extended_Real_Settings; + + put ("Test of +,-,/,* over randomly generated test vectors."); + new_line(1); + put ("The tests search for failures in +,-,/,*. They don't attempt"); + new_line(1); + put ("to measure ultimate error in floating point operations."); + new_line(1); + put ("The testing goes on for as many hours as you let it:"); + new_line(2); + + for repetitions in 1 .. 1_000_000 loop + + + Big_Digits := Get_999; + + Exp_First := e_Real_Machine_Emin / 2 + e_Real_Machine_Mantissa; + Exp_Last := e_Real_Machine_Emax / 2 - 0; + -- Above settings are for division/mult tests: + + -- The random num we make by scaling with Exp in [Exp_First, Exp_Last] + -- should have the property that when squared it is still in range, + -- since the function Random returns a rand in the range [0.0 .. 1.0). + -- The smallest exp of Random is e_Real_Machine_Mantissa - 1. + + --******************************************* + -- NOTE: X = 1 + Rand or Y = 1 + Rand; guarantees loss of 3 digits + -- accuracy. This method doesn't really measure error in * or /. + -- It does however show the importance of guard digits. + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y := One - Half_Digit*Random; + Y := Scaling (Y, Random_Exp (Exp_First, Exp_Last)); + + Z1 := X; + Mult (Z1, Y); -- Z1 := X * Y; + Z1 := Z1 / Y; -- (so Z1 should equal X) + Delta_X := Z1 - X; + + --Z1 := Y; + --Square (Z1); + --Z1 := Z1 / Y; + --Delta_X := (Z1 - Y); + + Get_Normalized_Delta (X, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y := One - Half_Digit*Random; + Y := Scaling (Y, Random_Exp (Exp_First, Exp_Last)); + + Z1 := (X * Y) / Y; + Delta_X := Z1 - X; + + Get_Normalized_Delta (X, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y := Scaling (Big_Digits, Random_Exp (Exp_First, Exp_Last)); + + Z1 := (X * Y) / Y; + Delta_X := Z1 - X; + + Get_Normalized_Delta (X, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + --******************************************* + + Exp_First := -e_Real_Machine_Emax + 2; + Exp_Last := e_Real_Machine_Emax - 2; + -- Above settings are for +/- tests: + + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit * 2**2 loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y_Exp_First := Max (Exp_First, Exponent(X)+1); + Y_Exp_Last := Min (Exp_Last, Exponent(X) + e_Real_Machine_Mantissa + 1); + Y := Scaling (Big_Digits, Random_Exp (Y_Exp_First, Y_Exp_Last)); + + Z1 := (X + Y) - X; + Delta_X := Z1 - Y; + + Get_Normalized_Delta (Y, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit * 2**2 loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y_Exp_First := Max (Exp_First, Exponent(X) - e_Real_Machine_Mantissa - 1); + Y_Exp_Last := Min (Exp_Last, Exponent(X) - 1); + Y := Scaling (Big_Digits, Random_Exp (Y_Exp_First, Y_Exp_Last)); + + Z1 := (X + Y) - Y; + Delta_X := Z1 - X; + + Get_Normalized_Delta (X, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + + --******************************************* + Exp_Error := e_Integer'First; + Frac_Error := 1.0E-100; + + for I in Integer32 range 1 .. Mult_Limit * 2**2 loop + + X := Random; + X := Scaling (X, Random_Exp (Exp_First, Exp_Last)); + + Y_Exp_First := Max (Exp_First, Exponent(X) - e_Real_Machine_Mantissa - 1); + Y_Exp_Last := Min (Exp_Last, Exponent(X) - 1); + Y := Random; + Y := Scaling (Y, Random_Exp (Y_Exp_First, Y_Exp_Last)); + + Z1 := (X + Y) - Y; + Delta_X := Z1 - X; + + Get_Normalized_Delta (X, Delta_X, Frac_Part, Exp_Part); + + IF Exp_Part > Exp_Error THEN Exp_Error := Exp_Part; END IF; + IF Frac_Part > Frac_Error THEN Frac_Error := Frac_Part; END IF; + + end loop; + + new_line; put ("Max Error ="); + Print_e_Real_data (Frac_Error, Exp_Error); + + end loop; -- repetitions + + +end; + diff --git a/arbitrary/extended_real-e_rand.adb b/arbitrary/extended_real-e_rand.adb new file mode 100644 index 0000000..341ae95 --- /dev/null +++ b/arbitrary/extended_real-e_rand.adb @@ -0,0 +1,109 @@ + +----------------------------------------------------------------------- +-- package body Extended_Real.E_Rand, extended precision random numbers. +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + + +package body Extended_Real.E_Rand is + + State : Random_Int := 701; + + ---------------- + -- Get_Random -- + ---------------- + + -- 61 bit rands: + + function Next_Random_Int + return Random_Int + is + X2 : Random_Int; + + S8 : constant := 6; S7 : constant := 20; S6 : constant := 32; + S5 : constant := 30; S4 : constant := 15; S3 : constant := 7; + S2 : constant := 3; S1 : constant := 1; + + -- Error detection is by assertion: + + pragma Assert + (S8=6 and S7=20 and S6=32 and S5=30 and S4=15 and S3=7 and S2=3 and S1=1); + + -- Error correction is by inspection: + -- (if mutated parameters are detected, use data given below to correct). + -- 1 3 7 15 30 32 20 6 + -- 1 3 7 15 30 32 20 6 + -- 1 3 7 15 30 32 20 6 + + begin + + X2 := State; + X2 := X2 XOR (X2 / 2**S8); + X2 := (X2 XOR (X2 * 2**S7))mod 2**61; + X2 := X2 XOR (X2 / 2**S6); + X2 := (X2 XOR (X2 * 2**S5))mod 2**61; + X2 := X2 XOR (X2 / 2**S4); + X2 := (X2 XOR (X2 * 2**S3))mod 2**61; + X2 := X2 XOR (X2 / 2**S2); + X2 := (X2 XOR (X2 * 2**S1))mod 2**61; + State := X2; + + return X2; + + end Next_Random_Int; + + ----------- + -- Reset -- + ----------- + + -- Initiator and state must never be negative or 0! + + procedure Reset + (Initiator : in Positive := 7777777) + is + X : Integer := Initiator mod 2**30; + -- if Ints are 64 bit, keep it under 61 bit, while still portable to 32 bit int. + begin + if X = 0 then X := 1; end if; + State := Random_Int (X); + end Reset; + + ------------ + -- Random -- + ------------ + + function Random + return E_Real + is + Result : E_Real; + begin + + for I in Digit_Index loop + Result.Digit(I) := Digit_Type (Next_Random_Int mod 2**No_Of_Bits_in_Radix); + end loop; + + if Result.Digit(Digit_Index'First) = Digit_Zero then + Result.Digit(Digit_Index'First) := Digit_One; + end if; + + Result.Is_Zero := False; + Result.Is_Positive := True; + Result.Is_Infinite := False; + Result.Exp := -1; + + return Result; + + end Random; + +end Extended_Real.E_Rand; diff --git a/arbitrary/extended_real-e_rand.ads b/arbitrary/extended_real-e_rand.ads new file mode 100644 index 0000000..69109d2 --- /dev/null +++ b/arbitrary/extended_real-e_rand.ads @@ -0,0 +1,24 @@ + +-- PACKAGE E_Rand +-- +-- Minimal random number generator, mainly for basic test of +-- extended arithmetic. +-- +-- Outputs extended precision pseudo-random numbers on [0.0 .. 1.0) + +generic +package Extended_Real.E_Rand is + + function Random return E_Real; + -- returns E_Reals in [0, 1) + + procedure Reset (Initiator : in Positive := 7777777); + -- Resets both function Random and function Next_Random_Int. + + + type Random_Int is mod 2**64; + + function Next_Random_Int return Random_Int; + -- 61 bit generator, period 2**61-1. + +end Extended_Real.E_Rand; diff --git a/arbitrary/extended_real-elementary_functions.adb b/arbitrary/extended_real-elementary_functions.adb new file mode 100644 index 0000000..b8da46f --- /dev/null +++ b/arbitrary/extended_real-elementary_functions.adb @@ -0,0 +1,2033 @@ + +----------------------------------------------------------------------- +-- package body Extended_Real.Elementary_Functions, extended precision functions +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + +package body Extended_Real.Elementary_Functions is + + Max_Available_Bits : constant e_Integer + := Desired_No_Of_Bits_in_Radix * e_Real_Machine_Mantissa; + -- This equals: Bits_per_Mantissa = Bits_per_Digit * No_of_Digits_per_Mantissa + -- (e_Real_Machine_Mantissa = Mantissa'Length = No_of_Digits_per_Mantissa). + + -- The following are frequently used global constants: + + Radix : constant Real := e_Real_Machine_Radix; + + Half_Digit : constant E_Digit := Make_E_Digit (0.5); + Two_Digit : constant E_Digit := Make_E_Digit (2.0); + Three_Digit : constant E_Digit := Make_E_Digit (3.0); + Four_Digit : constant E_Digit := Make_E_Digit (4.0); + Nine_Digit : constant E_Digit := Make_E_Digit (9.0); + Twelve_Digit : constant E_Digit := Make_E_Digit (12.0); + + --One : constant e_Real := +1.0; -- in Extended_Real spec + --Zero : constant e_Real := +0.0; + Two : constant e_Real := +2.0; + Three : constant e_Real := +3.0; + Half : constant e_Real := +0.5; + Three_Quarters : constant e_Real := +(0.75); + Less_Than_Half_Pi : constant e_Real := +(3.14159265 / 2.0); + + + -- Global memory for important constants. They're initialized + -- on the first call to the functions that return them, and + -- there after are simply returned from memory: + -- Pi, Sqrt_2, etc + + type Pi_mem is record + Val : e_Real; -- automatically initialized to Zero. + Initialized : Boolean := False; + end record; + Pi_Memory : Pi_Mem; + + type Inverse_Pi_mem is record + Val : e_Real; -- automatically initialized to Zero. + Initialized : Boolean := False; + end record; + Inverse_Pi_Memory : Inverse_Pi_Mem; + + type Quarter_Pi_mem is record + Val : e_Real; -- automatically initialized to Zero. + Initialized : Boolean := False; + end record; + Quarter_Pi_Memory : Quarter_Pi_Mem; + + type Inverse_Sqrt_2_mem is record + Val : e_Real; + Initialized : Boolean := False; + end record; + Inverse_Sqrt_2_memory : Inverse_Sqrt_2_mem; + + type Half_Inverse_Log_2_mem is record + Val : e_Real; + Initialized : Boolean := False; + end record; + Half_Inverse_Log_2_memory : Half_Inverse_Log_2_mem; + + type Log_2_mem is record + Val : e_Real; + Initialized : Boolean := False; + end record; + Log_2_memory : Log_2_mem; + + ------------ + -- Arcsin -- + ------------ + + -- The result of the Arcsin function is in the quadrant containing + -- the point (1.0, x), where x is the value of the parameter X. This + -- quadrant is I or IV; thus, the range of the Arcsin function is + -- approximately -Pi/2.0 to Pi/2.0 (-Cycle/4.0 to Cycle/4.0, if the + -- parameter Cycle is specified). + -- Argument_Error is raised by Arcsin when the absolute + -- value of the parameter X exceeds one. + -- + -- Uses Newton's method: Y_k+1 = Y_k + (A - Sin(Y_k)) / Cos(Y_k) + -- to get Arcsin(A). + -- Requires call to Arcsin (Real) and assumes that this call gets + -- the first two radix digits correct: 48 bits usually. (It almost + -- always gets 53 bits correct.) + -- + -- Arcsin(x) = x + x^3/6 + 3*x^5/40 ... + -- (so Arctan(x) = x if x < e_Real_Model_Epsilon) + + function Arcsin (X : e_Real) + return e_Real + is + Y_0 : e_Real; + X_Abs : e_Real := Abs (X); + No_Correct_Bits : E_Integer; + Sign_Is_Negative : Boolean := False; + Scaling_Performed : Boolean := False; + begin + + if Are_Equal (X_Abs, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if One < X_Abs then + raise E_Argument_Error; + end if; + + if X_Abs < e_Real_Model_Epsilon then + return X; -- series solution: arcsin = x + x^3/6 + ... + end if; + + Sign_Is_Negative := False; + if X < Zero then + Sign_Is_Negative := True; + end if; + + if Are_Equal (X_Abs, One) then + Y_0 := Two_Digit * e_Quarter_Pi; + if Sign_Is_Negative then Y_0 := -Y_0; end if; + return Y_0; + end if; + + -- STEP 2. We may have to scale the argument if its near 1. Newton_Raphson + -- doesn't do well there. So we use identity: + -- + -- arcsin(x) - arcsin(y) = arcsin(x*Sqrt(1-y*y) - y*Sqrt(1-x*x)), + -- + -- so setting y = 1 (get the arccos(x) = Pi/2 - Arcsin(x)): + -- + -- arcsin(x) = -arcsin(Sqrt(1-x*x)) + Pi/2. + -- + -- Well, we can't use Sqrt(1-x*x) because there's too much error near + -- X = small. We can use Sqrt(1-X) * Sqrt(1+X) but don't want the 2 sqrts. + -- So! we want Arcsin (Sqrt(1-X) * Sqrt(1+X)) = Y, or + -- Sin(Y) = 2 * Sqrt((1-X)/2) * Sqrt((1+X)/2). Then Sin(Y) = 2*Sin(Z)Cos(Z) + -- where Z = Arcsin(Sqrt((1-X)/2)). So, Y = Arcsin (Sin(Z + Z)) = 2*Z. + -- The final answer is Arcsin(X) = Pi/2 - 2*Arcsin(Sqrt((1-X)/2)). + -- + -- IMPORTANT: We can't scale if X_Abs <= 0.5 because we use this + -- routine with an argument of 0.5 to calculate Pi, and scaling + -- requires a knowledge of Pi. + + Scaling_Performed := False; + if Three_Quarters < X_Abs then + X_Abs := Sqrt (Half - Half_Digit * X_Abs); + Scaling_Performed := True; + end if; + + -- Need starting value for Newton's iteration of Arcsin (X_scaled). + -- Assume positive arg to improve likelihood that + -- external Arcsin fctn. does what we want. + + Y_0 := Make_Extended (Arcsin (Make_Real (X_Abs))); + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + --Y_0 := Y_0 + (X_Abs - Sin(Y_0)) / Cos(Y_0); + Y_0 := Y_0 + Divide ((X_Abs - Sin(Y_0)) , Cos(Y_0)); + -- Cos is never near 0. + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits >= Max_Available_Bits; + + end loop; + + if Scaling_Performed then + Y_0 := Two_Digit * (e_Quarter_Pi - Y_0); + end if; + + if Sign_Is_Negative then + Y_0 := -Y_0; + end if; + + return Y_0; + + end Arcsin; + + ------------ + -- Arccos -- + ------------ + + -- The result of the Arccos function is in the quadrant containing + -- the point (x, 1.0), where x is the value of the parameter X. This + -- quadrant is I or II; thus, the Arccos function ranges from 0.0 to + -- approximately Pi (Cycle/2.0, if the parameter Cycle is + -- specified). + -- + -- Argument_Error is raised by Arccos when the absolute + -- value of the parameter X exceeds one. + -- + -- In all cases use Arccos(X) = Pi/2 - Arcsin(X). + -- When Abs(X) < 0.5 we use Pi/2 - Arcsin(X). When + -- Abs(X) > 0.5 its better to use the following formula for Arcsin: + -- Arcsin(X) = Pi/2 - 2*Arcsin(Sqrt((1-|X|)/2)). (X > 0.0) + -- Arcsin(X) = -Pi/2 + 2*Arcsin(Sqrt((1-|X|)/2)). (X < 0.0) + -- + function Arccos (X : e_Real) + return e_Real + is + Result : e_Real; + X_Abs : constant e_Real := Abs (X); + begin + + if X_Abs > One then + raise Constraint_Error; + end if; + + if Are_Equal (X, Zero) then + return Two_Digit * e_Quarter_Pi; + end if; + + if Are_Equal (X, One) then + return Zero; + end if; + + if Are_Equal (X, -One) then + return e_Pi; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if X > Half then + Result := Two_Digit * Arcsin (Sqrt (Half - Half_Digit*X_Abs)); + elsif X < -Half then + Result := Four_Digit* + (e_Quarter_Pi - Half_Digit*Arcsin (Sqrt (Half - Half_Digit*X_Abs))); + else + Result := (e_Quarter_Pi - Arcsin(X)) + e_Quarter_Pi; + end if; + -- e_Quarter_Pi is stored with more correct binary digits than + -- E_Half_Pi because its slightly less than 1.0. + + return Result; + + end Arccos; + + ------------ + -- Arctan -- + ------------ + + -- Newton's method avoids divisions: + -- + -- Result := Result + Cos(Result) * (Cos(Result) * Arg - Sin(Result)) + -- + -- Arctan(x) = Pi/2 - Arctan(1/x) + -- Arctan(x) = -Pi/2 + Arctan(1/x) (x<0) + -- + -- Arctan(x) = x - x^3/3 + x^5/5 - x^7/7 ... + -- (so Arctan(x) = x if x < e_Real_Model_Epsilon) + -- + -- Arctan (X) = Arcsin(X / Sqrt(1 + X**2)). + -- + -- Not really Ada95-ish for Arctan. + -- + function Arctan + (X : e_Real) + return e_Real + is + Y_0, Arg, Cos_Y_0, Sin_Y_0 : e_Real; + X_Abs : constant e_Real := Abs (X); + X_real : Real; + Sign_Is_Negative : Boolean := False; + Argument_Reduced : Boolean := False; + No_Correct_Bits : E_Integer; + begin + if X_Abs < e_Real_Model_Epsilon then + return X; -- series solution: arctan = x - x^3/3 + ... + end if; + + Sign_Is_Negative := False; + if X < Zero then + Sign_Is_Negative := True; + end if; + + -- returns Pi/2 at +/- inf. (Note Reciprocal returns 1/inf as Zero.) + -- inf is regarded as large finite number. Raising exceptions + -- in these cases may also be good idea. Which is right? + + if Are_Equal (X_Abs, Positive_Infinity) then + Y_0 := Two_Digit * e_Quarter_Pi; + if Sign_Is_Negative then Y_0 := -Y_0; end if; + return Y_0; + end if; + + -- function Make_Real underflows to 0.0 as desired for small X_Abs. + + if X_abs < Two then + Argument_Reduced := False; + Arg := X_abs; + else + -- use: Arctan(x) = Pi/2 - Arctan(1/x) + Argument_Reduced := True; + Arg := Reciprocal (X_abs); + end if; + + X_real := Make_Real (Arg); + Y_0 := Make_Extended (Arctan (X_real)); + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + Sin_Y_0 := Sin (Y_0); + Cos_Y_0 := Cos (Y_0); -- bst accuracy. + --Cos_Y_0 := Sqrt (One - Sin_Y_0*Sin_Y_0); -- fstr, not too bad accuracy-wise. + + Y_0 := Y_0 + (Arg*Cos_Y_0 - Sin_Y_0) * Cos_Y_0; + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits >= Max_Available_Bits; + + end loop; + + if Argument_Reduced then + Y_0 := Two_Digit * (e_Quarter_Pi - Half_Digit*Y_0); + end if; + -- Lose a whole guard digit of precision when we double e_Quarter_Pi. + -- (It becomes slightly > 1, pushes a digit off the end of the mantissa.) + -- So do the subtraction 1st. + + if Sign_Is_Negative then + Y_0 := -Y_0; + end if; + + return Y_0; + + end Arctan; + + --------- + -- Log -- + --------- + + -- Uses Newton's method: Y_k+1 = Y_k + (A - Exp(Y_k)) / Exp(Y_k) + -- to get Log(A) = Natural Log, the inverse of Exp. + -- Requires call to Log (Real) and assumes that this call gets + -- the first two radix digits correct: 48 bits usually. (It almost + -- always gets 53 bits correct.) Argument reduction due to Brent: + -- 1st step is to get a rough approximate value of Log(X), called + -- Log_X_approx. Then get Y = Log(X/exp(Log_X_approx)). + -- The final answer is Log(X) = Y + Log_X_approx. (Disabled at present.) + -- This gets Y very near 0, which greatly speeds up subsequent + -- calls to Exp in the Newton iteration above. Actually, + -- the scaling business is commented out below. If you uncomment it, + -- the routine runs 60% faster, in some tests, but is less accurate. + -- We'll err on the side of accuracy and use an unscaled X. But + -- if you use extended floating point with 2 guard digits, it would + -- be sensible to use the scaled X, because the loss in accuracy is + -- negligable compared to the extra precision of another guard digit. + -- Test for X = One: must return Zero. + -- + -- The exception Constraint_Error is raised, signaling a pole of the + -- mathematical function (analogous to dividing by zero), in the following + -- cases, provided that Float_Type'Machine_Overflows is True: + -- by the Log, Cot, and Coth functions, when the value of the + -- parameter X is zero; + + function Log (X : e_Real) + return e_Real + is + Result, X_scaled, Y_0 : e_Real; + X_Exp : E_Integer; + No_Correct_Bits : E_Integer; + Log_X_approx_real : Real; + begin + + if X < Zero then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Zero) then + raise Constraint_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, One) then + return Zero; + end if; + + -- STEP 1. First argument reduction step. Get Log_X_approx_real using + -- a call to log(real). If X=0 or inf, then X_scaled=0..get exception. + + X_Exp := Exponent (X); + X_scaled := Fraction (X); -- not zero or infinity. + + Log_X_approx_real := Log (Make_Real(X_scaled)) + Log (Radix) * Real (X_Exp); + X_scaled := X; + + -- Log_X_approx := Make_Extended (Log_X_approx_real); + -- X_scaled := X / Exp (Log_X_approx); + -- The above Scaling is the fastest, since then Exp does no scaling. + -- It is clearly less accurate, tho' tolerably so, especially if you + -- use two guard digits instead of 1. We use the unscaled X here, + -- because it (for example) returns a value of Log(2.0) + -- with an error that is about 50 times smaller than the above. + + + -- STEP 2. Need starting value for Newton's iteration of Log (X_scaled). + + --Y_0 := Make_Extended (Log (Make_Real (X_scaled))); -- slightly > Zero + + Y_0 := Make_Extended (Log_X_approx_real); + + + -- STEP 3. Start the iteration. Calculate the number of iterations + -- required as follows. num correct digits doubles each iteration. + -- 1st iteration gives 4 digits, etc. Each step set desired precision + -- to one digit more than that we expect from the Iteration. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + Y_0 := Y_0 + (X_scaled * Exp(-Y_0) - One); + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits >= Max_Available_Bits; + + end loop; + + --Result := Y_0 + Log_X_approx; -- for use with scaled version (Step 2) + Result := Y_0; + + return Result; + + end Log; + + -------------------------- + -- Log (arbitrary base) -- + -------------------------- + + -- The exception E_Argument_Error is raised, signaling a parameter + -- value outside the domain of the corresponding mathematical function, + -- in the following cases: + -- by the Log function with specified base, when the value of the + -- parameter Base is zero, one, or negative; + -- + -- The exception Constraint_Error is raised, signaling a pole of the + -- mathematical function (analogous to dividing by zero), in the following + -- cases, provided that Float_Type'Machine_Overflows is True: + -- by the Log, Cot, and Coth functions, when the value of the + -- parameter X is zero. + -- + -- Struggling to remember: Z = Log(X, Base) implies X = Base**Z + -- or X = Exp (Log(Base)*Z) which implies Log(X) = Log(Base) * Z, so + -- Log(X, Base) = Z = Log(X) / Log(Base). + -- + function Log (X : e_Real; Base : e_Real) return e_Real is + Result : e_Real; + begin + + if Are_Equal (Base,Zero) then + raise E_Argument_Error; + end if; + + if Base < Zero then + raise E_Argument_Error; + end if; + + if Are_Equal (Base,One) then + raise E_Argument_Error; + end if; + + if Are_Equal (Base,Two) then + + Result := Two_Digit * (Log(X) * e_Half_Inverse_Log_2); + -- Divide by e_log_2. Multiply by 0.5/Log(2) not 1/log(2), because + -- 0.5/Log(2) is slightly less than 1, hence contains more correct + -- digits. (And multiplication is preferred for efficiency.) + + else + + Result := Log(X) / Log(Base); + + end if; + + return Result; + + end Log; + + ---------- + -- "**' -- + ---------- + + -- Say X**N = Exp (Log (X) * N). + -- + -- Exponentiation by a zero exponent yields the value one. + -- Exponentiation by a unit exponent yields the value of the left + -- operand. Exponentiation of the value one yields the value one. + -- Exponentiation of the value zero yields the value zero. + -- The results of the Sqrt and Arccosh functions and that of the + -- exponentiation operator are nonnegative. + -- + -- Argument_Error is raised by "**" operator, when the value of the left + -- operand is negative or when both operands have the value zero; + -- + function "**" (Left : e_Real; Right : e_Real) return e_Real is + Result : e_Real; + begin + + -- Errors: + + if Left < Zero then + raise E_Argument_Error; + end if; + + if Are_Equal (Left, Zero) and then Are_Equal (Right, Zero) then + raise E_Argument_Error; + end if; + + -- Special Cases. We now know that they aren't both Zero: + + if Are_Equal (Right, Zero) then -- Left is not Zero + return One; + end if; + + if Are_Equal (Left, Zero) then -- Right is not Zero + return Zero; + end if; + + if Are_Equal (Right, One) then + return Left; + end if; + + if Are_Equal (Left, One) then -- Still OK if Right = Zero + return One; + end if; + + -- Should we optimize for integer N? + + Result := Exp (Log (Left) * Right); + + return Result; + + end "**"; + + --------- + -- Exp -- + --------- + + -- Sum Taylor series for Exp(X). + -- Actually, we sum series for Exp(X) - 1 - X, because scaling makes + -- X small, and Exp(X) - 1 - X has more correct digits for small X. + -- [get max arg size and test for it.] + -- + function Exp + (X : e_Real) + return e_Real + is + Order : Real := 0.0; + Delta_Exponent : E_Integer := 0; + Next_Term, Sum : e_Real; + + X_Scaled_2, X_scaled_1 : e_Real; + Total_Digits_To_Use : E_Integer; + + N : Integer; + N_e_Real : e_Real; + + J : constant Integer := 11; + Scale_Factor_2 : constant Integer := 2**J; + -- Must be power of 2 for function call to Make_E_Digit. + -- The optimal value increases with desired precision. But higher + -- order terms in the series are cheap, so its not *too* important. + + Inverse_Two_To_The_J : constant E_Digit + := Make_E_Digit (1.0 / Real (Scale_Factor_2)); + + We_Flipped_The_Sign_Of_X_scaled : Boolean := False; + First_Stage_Scaling_Performed : Boolean := False; + Second_Stage_Scaling_Performed : Boolean := False; + begin + + if Are_Equal (X, Zero) then + return One; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + -- STEP 1. Reduce argument in 2 stages: 1st Remainder(X, Log(2)), + -- then divide by 2**J. + -- So X_Scaled = Remainder (X, Log_2), or approximately: + -- X_Scaled = X - Unbiased_Rounding (X / Log(2)) * Log(2) = X - N * Log(2) + -- Then Exp(X) = Exp(X_Scaled) * Exp(N*Log(2)) = Exp(X_Scaled) * 2**N. + -- + -- Second stage of argument reduction: divide X_Scaled by 2**J: + -- Exp(X) = Exp(X_Scaled/2**J)**2**J * 2**N. + -- + -- E_log_2 is calculated by recursively calling this routine, but + -- with an argument very near 0.0, so First stage scaling is not + -- performed. (It also calls with arg of approx. log(2) = 0.69, so + -- must not allow 1st stage scaling for args that small.) + + N := 0; + X_Scaled_1 := X; + First_Stage_Scaling_Performed := False; + + if Three_Quarters < Abs (X) then + + N_e_Real := Unbiased_Rounding (Two_Digit * (X_Scaled_1 * E_Half_Inverse_Log_2)); + X_Scaled_1 := Remainder (X_Scaled_1, E_Log_2); + + -- Use X_Scaled_1 := X_Scaled_1 - N_e_Real * E_Log_2; ? + -- Not much faster. Somewhat less accurate. Seems OK for small args. + + if Make_Extended (Real (Integer'Last)) < Abs (N_e_Real) then + raise E_Argument_Error with "Argument too large in Exp."; + end if; + + N := Integer (Make_Real (N_e_Real)); + + First_Stage_Scaling_Performed := True; + + end if; + + -- STEP 1b. We want to get Exp() slightly less than one, to maximize + -- the precision of the calculation. So make sure arg is negative. + + We_Flipped_The_Sign_Of_X_scaled := False; + if not (X_scaled_1 < Zero) then + We_Flipped_The_Sign_Of_X_scaled := True; + X_scaled_1 := -X_scaled_1; + end if; + + + -- STEP 2. 2nd stage of argument reduction. Divide X_scaled by 2**J. + -- Don't scale if arg is already small to avoid complications due + -- to underflow of arg to zero. Arg may already be 0. Its OK. + + if Exponent (X_Scaled_1) >= -2 then -- it'll do, Zero OK here + X_Scaled_2 := Inverse_Two_To_The_J * X_Scaled_1; + Second_Stage_Scaling_Performed := True; + else + X_scaled_2 := X_scaled_1; + Second_Stage_Scaling_Performed := False; + end if; + + + -- STEP 3. Start the sum. Calculate Exp(X_Scaled) - (1 + X_Scaled), + -- because for small X_Scaled, this contains more correct digits than Exp. + -- Start summing the series at order 2 instead of order 0. + -- We have verified above that X_scaled /= Zero. + + Order := 2.0; + Next_Term := Half_Digit * X_Scaled_2 * X_Scaled_2; + Sum := Next_Term; + + loop + + Order := Order + 1.0; + if Order = Radix then + raise E_Argument_Error with "Too many terms needed in Exp taylor sum."; + end if; + + -- Use relative Exponents of Sum and Next_Term to check convergence + -- of the sum. Exponent doesn't work for args of 0, so check. + -- Abs (Next_Term) <= Abs (Sum), so we need only check Next_Term. + + if Are_Equal (Next_Term, Zero) then exit; end if; + + Delta_Exponent := Exponent (Sum) - Exponent (Next_Term); + Total_Digits_To_Use := e_Real_Machine_Mantissa - Delta_Exponent + 1; + exit when Total_Digits_To_Use <= 0; + + Next_Term := (X_Scaled_2 * Next_Term) / Make_E_Digit (Order); + + Sum := Sum + Next_Term; + -- Sum can overflow to infinity? Not with our scaled arguments. + + end loop; + + -- STEP 4. Undo effect of 2nd stage of argument scaling. Recall we + -- divided the arg by 2**J, and found Exp(X_Scaled/2**J). Now to get + -- Exp(X_Scaled), must take Exp(X_Scaled/2**J)**2**J, which means + -- repeated squaring of Exp(X_Scaled/2**J) (J times). Its more complicated + -- than that because we calculated G(X) = Exp(X) - 1 - X (since G contains + -- more correct digits than Exp, expecially for small X.) So we + -- use G(2X) = Exp(2X) - 1 - 2X = (G + (1 + X))*(G + (1 + X)) - 1 - 2X + -- = G*G + 2*G*(1+X) + X*X + -- G(2X) = (G(X)+X)**2 + 2G(X). + -- G(4X) = (G(2X)+2X)**2 + 2G(2X). + -- Repeat J times to unscale G. The following also returns X_scaled*2**J. + + if Second_Stage_Scaling_Performed then + for I in 1..J loop + Sum := (Sum + X_scaled_2)**2 + Two_Digit * Sum; + X_Scaled_2 := Two_Digit * X_Scaled_2; + end loop; + end if; + + -- DO the following whether or not Second_Stage or First_Stage + -- scaling was performed (because the series sum neglected the + -- the 1 and the X. If there were no extra guard digit for + -- subtraction ((X_scaled_1 + Sum) is negative) then it would be best + -- to use (0.5 + (Sum + Xscaled)) + 0.5. Following is OK though to + -- get a number slightly less than one with full precision. + -- Recover Exp = G(X) + 1 + X = Sum + 1 + X = (Sum + X_scaled) + 1: + + Sum := (Sum + X_scaled_1) + One; + + -- Second stage unscaling. We now have Y = Exp(-|X_scaled_1|), which is + -- slightly less than 1.0. Keep + -- in Y < 1.0 form as we unscale: might preserve more precision that + -- way, cause we lose much precision if invert a number that's slightly + -- less than one. + + if First_Stage_Scaling_Performed then + if We_Flipped_The_Sign_Of_X_scaled then + Sum := Sum * Two**(-N); + else + Sum := Sum * Two**(N); + end if; + end if; + + if We_Flipped_The_Sign_Of_X_scaled then + Sum := Reciprocal (Sum); + -- X_scaled was positive. We flipped its sign so must invert the result. + end if; + + return Sum; + + end Exp; + + --------------------------- + -- Sin (arbitrary cycle) -- + --------------------------- + + -- The exception E_Argument_Error is raised, signaling a parameter + -- value outside the domain of the corresponding mathematical function, + -- in the following cases: + -- by any forward or inverse trigonometric function with specified + -- cycle, when the value of the parameter Cycle is zero or negative; + -- The results of the Sin, Cos, Tan, and Cot functions with + -- specified cycle are exact when the mathematical result is zero; + -- those of the first two are also exact when the mathematical + -- result is +/-1.0. + + function Sin + (X : e_Real; + Cycle : e_Real) + return e_Real + is + Fraction_Of_Cycle, Result : e_Real; + begin + -- The input parameter X is units of Cycle. For example X = Cycle + -- is same as X = 2Pi, so could call Sin (2 * Pi * X / Cycle), but we + -- want to apply the remainder function here to directly meet certain + -- requirements on returning exact results. + -- Recall: Remainder = X - Round (X/Cycle) * Cycle = X - N * Cycle + -- which is in the range -Cycle/2 .. Cycle/2. The formula will be + -- + -- Sin (X, Cycle) = Sin (2 * Pi * X / Cycle) + -- = Sin (2 * Pi * (X - N * Cycle) / Cycle) + -- = Sin (2 * Pi * Remainder(X,Cycle) / Cycle) + + if Are_Equal (Cycle, Zero) then + raise E_Argument_Error; + end if; + + if Cycle < Zero then + raise E_Argument_Error; + end if; + + Fraction_Of_Cycle := Remainder (X, Cycle) / Cycle; + + if Are_Equal (Fraction_Of_Cycle, Zero) then + return Zero; + end if; + + if Are_Equal (Abs (Fraction_Of_Cycle), Half) then + return Zero; + end if; + + if Are_Equal (Fraction_Of_Cycle, Make_Extended(0.25)) then + return One; + end if; + + if Are_Equal (Fraction_Of_Cycle, Make_Extended(-0.25)) then + return -One; + end if; + + Result := Sin (Make_E_Digit(8.0) * (e_Quarter_Pi * Fraction_Of_Cycle)); + -- Pi/4 is used instead of Pi/2, because it contains more correct + -- binary digits. + + return Result; + + end Sin; + + --------- + -- Sin -- + --------- + + -- Sum Taylor series for Sin (X). + -- + -- Max argument is at present set by requirement: + -- Exponent(X) < Present_Precision-1 + + function Sin + (X : e_Real) + return e_Real + is + Half_Sqrt_Of_Radix : constant Real := 2.0**(Desired_No_Of_Bits_in_Radix/2-1); + Order : Real := 0.0; + Delta_Exponent : E_Integer := 0; + Next_Term, Sum : e_Real; + + X_Scaled_1, X_Scaled_2, X_Scaled_2_Squared : e_Real; + + Total_Digits_To_Use : E_Integer; + + N_e_Real, Half_N : e_Real; + + J : constant Integer := 8; + Three_To_The_J : constant E_Digit := Make_E_Digit (3.0**J); + + Factorial_Part : E_Digit; + + Sign_Of_Term_Is_Pos : Boolean := True; + Arg_Is_Negative : Boolean := False; + N_Is_Odd : Boolean := False; + + First_Stage_Scaling_Performed : Boolean := False; + Second_Stage_Scaling_Performed : Boolean := False; + begin + if Are_Equal (X, Zero) then + return Zero; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + Arg_Is_Negative := False; + if X < Zero then + Arg_Is_Negative := True; + end if; + + if Exponent(X) >= e_Real_Machine_Mantissa-1 then + raise E_Argument_Error; + end if; + -- Can't figure out N below. N_e_Real has to be + -- integer valued: 0, 1, ... 2**(Radix*e_Real_Machine_Mantissa) - 1 + -- This determines Max allowed Argument. + -- If Exponent(N_e_Real) is too large, then can't tell if N is even or odd. + + + -- STEP 1. First argument reduction: Get modulo Pi by Remainder(X, Pi). + -- Get X in range -Pi/2..Pi/2. + -- X_scaled_1 := X - N_e_Real * e_Pi; + + if Less_Than_Half_Pi < Abs(X) then + X_scaled_1 := Remainder (Abs(X), e_Pi); + N_e_Real := Unbiased_Rounding ((Abs(X)-X_scaled_1) * e_Inverse_Pi); + First_Stage_Scaling_Performed := True; + else + X_Scaled_1 := Abs(X); + N_e_Real := Zero; + First_Stage_Scaling_Performed := False; + end if; + + -- Need to know if N is even or odd. N is Positive. + -- If Exponent(N_e_Real) is too large, then we can't tell if N is even or + -- odd. So raise Arg error. This determines Max Arg. + + N_Is_Odd := False; + if not Are_Equal (N_e_Real, Zero) then + Half_N := Half_Digit * N_e_Real; + if Truncation (Half_N) < Half_N then + N_Is_Odd := True; + end if; + end if; + + -- STEP 2. Second stage of argument reduction. Divide by 3**5 = 243 + -- to get Arg less than about 0.01?. Call this 3**J in what follows. + -- Later we recursively use J repetitions of the formula + -- Sin(3*Theta) = Sin(Theta)*(3 - 4*Sin(Theta)**2), to get Sin(Theta*3**J) + -- Cos(3*Theta) = -Cos(Theta)*(3 - 4*Cos(Theta)**2), to get Cos(Theta*3**J) + -- to get Sin (Original_Arg). + -- + -- MUST avoid underflow to Zero in this step. So only if X_Scaled is big. + -- Actually, X_scaled = 0 may pass through, but its OK to start out 0. + + if Exponent (X_Scaled_1) >= -2 then -- it'll do + X_Scaled_2 := X_scaled_1 / Three_To_The_J; + Second_Stage_Scaling_Performed := True; + else + X_Scaled_2 := X_scaled_1; + Second_Stage_Scaling_Performed := False; + end if; + + -- STEP 3. Start the sum. Terms are labeled Order = 1, 2, 3 + -- but the series is X - X**3/3! + X**5/5! + ...+- X**(2*Order-1)/(2*Order-1)!. + -- Summed G(X) = Sin(X) - X, which contains more correct digits at + -- the end. We need these extra digits when we unscale the result. + + X_Scaled_2_Squared := X_scaled_2 * X_Scaled_2; + + Order := 2.0; + Sign_Of_Term_Is_Pos := False; + Next_Term := X_Scaled_2 * X_Scaled_2_Squared / Make_E_Digit (6.0); + Sum := -Next_Term; + -- Above we make the 1st term in G, and begin the sum. + + loop + + Sign_Of_Term_Is_Pos := not Sign_Of_Term_Is_Pos; + Order := Order + 1.0; + + -- Can't make Factorial part if, roughly, 2*Order-1.0 > Radix-1, + -- Because max argument of Make_E_Digit is Radix-1. + + if Order >= Radix / 2.0 then + raise E_Argument_Error with "Too many terms needed in Exp taylor sum."; + end if; + + -- Use relative Eponents of Sum and Next_Term to check convergence + -- of the sum. Exponent doesn't work for args of 0, so check. + -- Abs (Next_Term) <= Abs (Sum), so we need only check Next_Term. + + if Are_Equal (Next_Term, Zero) then exit; end if; + + Delta_Exponent := Exponent (Sum) - Exponent (Next_Term); + + Total_Digits_To_Use := e_Real_Machine_Mantissa - Delta_Exponent + 1; + + exit when Total_Digits_To_Use <= 0; + + if Order < Half_Sqrt_Of_Radix then + + Factorial_Part := Make_E_Digit ((2.0*Order-1.0)*(2.0*Order-2.0)); + Next_Term := (X_Scaled_2_Squared * Next_Term) / Factorial_Part; + + else + + Factorial_Part := Make_E_Digit ((2.0*Order-1.0)); + Next_Term := (X_Scaled_2_Squared * Next_Term) / Factorial_Part; + Factorial_Part := Make_E_Digit ((2.0*Order-2.0)); + Next_Term := Next_Term / Factorial_Part; + + end if; + + if Sign_Of_Term_Is_Pos then + Sum := Sum + Next_Term; + else + Sum := Sum - Next_Term; + end if; + + end loop; + + -- STEP 4. Scale the result iteratively. Recall we divided the arg by 3**J, + -- so we recursively use J repetitions of the formula + -- Sin(3*X) = Sin(X)*(3 - 4*Sin(X)**2), to get Sin(X*3**J). + -- Actually, we summed G(X) = Sin(X) - X. So the formula for G(X) is + -- G(3X) = S(3X) - 3X = S(X)*(3 - 4S(X)**2) - 3X, + -- = (G+X)*(3 - 4(G+X)**2) - 3X, + -- = 3G - 4(G+X)**3, (Cancel out the 3X), + -- G(3X) = 3G(X) - 4(G(X)+X)**3. + -- G(9X) = 3G(3X) - 4(G(3X)+3X)**3, etc. + -- Still requires only 2 (full) mults per loop, just like the original formula. + -- Notice below that we output X_scaled * 3**J, which is required next step. + + if Second_Stage_Scaling_Performed then + for I in 1..J loop + Sum := Three_Digit * Sum - Four_Digit * (Sum + X_scaled_2)**3; + X_scaled_2 := Three_Digit * X_scaled_2; + end loop; + end if; + + + -- STEP 5. We have Sin(X - N * Pi). Want Sin(X). If N is odd, then + -- flip sign of Sum. Next, flip sign again if the + -- original argument is neg: Arg_Is_Negative = True. + -- Remember, we summed for Sum = G = Sin - X, whether or not scaling + -- was performed. (X is called X_scaled, no matter what.) + -- So we recover Sin = G + X + + Sum := Sum + X_scaled_1; + + if First_Stage_Scaling_Performed then + if N_Is_Odd then + Sum := -Sum; + end if; + end if; + + if Arg_Is_Negative then + Sum := -Sum; + end if; + + return Sum; + + end Sin; + + --------------------------- + -- Cos (arbitrary cycle) -- + --------------------------- + + -- The exception E_Argument_Error is raised, signaling a parameter + -- value outside the domain of the corresponding mathematical function, + -- in the following cases: + -- by any forward or inverse trigonometric function with specified + -- cycle, when the value of the parameter Cycle is zero or negative; + -- The results of the Sin, Cos, Tan, and Cot functions with + -- specified cycle are exact when the mathematical result is zero; + -- those of the first two are also exact when the mathematical + -- result is +/-1.0. + -- + function Cos (X : e_Real; Cycle : e_Real) return e_Real is + Fraction_Of_Cycle, Result : e_Real; + begin + + -- The input parameter X is units of Cycle. For example X = Cycle + -- is same as X = 2Pi, so could use Cos (2 * Pi * X / Cycle), but we + -- want to apply the remainder function here to directly meet certain + -- requirements on returning exact results. + -- Recall: Remainder = X - Round (X/Cycle) * Cycle = X - N * Cycle + -- which is in the range -Cycle/2 .. Cycle/2. The formula will be + -- + -- Cos (X, Cycle) = Cos (2 * Pi * X / Cycle) + -- = Cos (2 * Pi * (X - N * Cycle) / Cycle) + -- = Cos (2 * Pi * Remainder(X,Cycle) / Cycle) + + if Are_Equal (Cycle, Zero) then + raise E_Argument_Error; + end if; + + if Cycle < Zero then + raise E_Argument_Error; + end if; + + -- Now get twice the fraction of the cycle, and handle special cases: + + Fraction_Of_Cycle := Remainder (X, Cycle) / Cycle; + + if Are_Equal (Fraction_Of_Cycle, Zero) then + return One; + end if; + + if Are_Equal (Abs (Fraction_Of_Cycle), Half) then + return -One; + end if; + + if Are_Equal (Abs (Fraction_Of_Cycle), Make_Extended(0.25)) then + return Zero; + end if; + + Result := Cos (Make_E_Digit(8.0) * (e_Quarter_Pi * Fraction_Of_Cycle)); + -- Use Pi/4 becase it contains more correct binary digits than Pi. + + return Result; + + end Cos; + + --------- + -- Cos -- + --------- + + -- Sum Taylor series for Cos (X). Actually sum series for G = Cos(X) - 1. + -- Reason is, G contains more correct digits than Cos, which is + -- required when we undo effects of argument reduction. + -- Max argument is at present + -- set by requirement: Exponent(X) < Present_Precision-1 + -- + function Cos + (X : e_Real) + return e_Real + is + Half_Sqrt_Of_Radix : constant Real := 2.0**(Desired_No_Of_Bits_in_Radix/2-1); + Order : Real := 0.0; + Delta_Exponent : E_Integer := 0; + Next_Term, Sum, Sum_2, Sum_3 : e_Real; + + X_Scaled_1, X_Scaled_Squared : e_Real; + + Total_Digits_To_Use : E_Integer; + + N_e_Real, Half_N : e_Real; + + J : constant Integer := 8; + Three_To_The_J : constant E_Digit := Make_E_Digit (3.0**J); + + Factorial_Part : E_Digit; + + Sign_Of_Term_Is_Pos : Boolean := True; + N_Is_Odd : Boolean := False; + + First_Stage_Scaling_Performed : Boolean := False; + Second_Stage_Scaling_Performed : Boolean := False; + begin + if Are_Equal (X, Zero) then + return One; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if Exponent(X) >= e_Real_Machine_Mantissa-1 then + raise E_Argument_Error; + end if; + -- Can't figure out N below. N_e_Real has to be + -- integer valued: 0, 1, ... 2**(Radix*e_Real_Machine_Mantissa) - 1 + -- This determines Max allowed Argument. + + + -- STEP 1. First stage argument reduction. + -- Take X modulo Pi by Remainder(X, Pi). Get X in range -Pi/2..Pi/2. + -- X_scaled_1 := X - N_e_Real * e_Pi; + + if Less_Than_Half_Pi < Abs(X) then + X_scaled_1 := Remainder (Abs(X), e_Pi); + N_e_Real := Unbiased_Rounding ((Abs(X)-X_scaled_1) * e_Inverse_Pi); + First_Stage_Scaling_Performed := True; + else + X_Scaled_1 := Abs(X); + N_e_Real := Zero; + First_Stage_Scaling_Performed := False; + end if; + + -- Need to know if N is even or odd. N is Positive. + + N_Is_Odd := False; + if not Are_Equal (N_e_Real, Zero) then + Half_N := Half_Digit * N_e_Real; + if Truncation (Half_N) < Half_N then + N_Is_Odd := True; + end if; + end if; + + -- STEP 1b. If X_Scaled is nearing Pi/2 then its too big for Taylor's. + -- Must call Sin (Pi/2 - X_Scaled). IMPORTANT: only do this for + -- X_scaled > Pi/6, because Arcsin(X => 0.5) = Pi/6 is used to calculate + -- Pi, and would get infinite recursive calls when Arcsin calls Cos + -- which calls Sin, while Sin and Cos call e_Quarter_Pi, which + -- calls Arcsin. e_Quarter_Pi is used instead of the less accurate Pi/2. + + if One < X_Scaled_1 then + + Sum := Sin ((e_Quarter_Pi - X_Scaled_1) + e_Quarter_Pi); + if N_Is_Odd then + Sum := -Sum; + end if; + return Sum; + + end if; + + -- STEP 2. Second stage of argument reduction. Divide by 3**8 = 81*81 + -- to get argument less than about .02. Call this 3**J in what follows. + -- Later we recursively use J repetitions of the formula + -- Sin(3*Theta) = Sin(Theta)*(3 - 4*Sin(Theta)**2), to get Sin(Theta*3**J) + -- Cos(3*Theta) = -Cos(Theta)*(3 - 4*Cos(Theta)**2), to get Cos(Theta*3**J). + -- + -- Its OK if X_scaled is 0 at this point, but not if the following + -- forces it to underflow to 0. Therefore only scale large args: + + if Exponent (X_Scaled_1) >= -2 then -- it'll do + X_Scaled_1 := X_scaled_1 / Three_To_The_J; + Second_Stage_Scaling_Performed := True; + else + Second_Stage_Scaling_Performed := False; + end if; + + + -- STEP 3. Start the sum. Terms are labeled Order = 0, 1, 2, 3 + -- but the series is 1 - X**2/2! + X**4/4! + ...+- X**(2*Order)/(2*Order)!. + -- Below we actually calculate Cos(X) - 1. + -- Start summing the series at order 1 instead of order 0. + + Order := 1.0; + X_Scaled_Squared := X_scaled_1 * X_Scaled_1; + Next_Term := Half_Digit * X_Scaled_Squared; + Sum := -Next_Term; + Sign_Of_Term_Is_Pos := False; + + loop + + Sign_Of_Term_Is_Pos := not Sign_Of_Term_Is_Pos; + Order := Order + 1.0; + + -- Can't make Factorial part if, roughly, 2*Order > Radix-1. + + if Order >= (Radix-1.0) / 2.0 then + raise E_Argument_Error with "Too many terms needed in Exp taylor sum."; + end if; + + -- Use relative Eponents of Sum and Next_Term to check convergence + -- of the sum. Exponent doesn't work for args of 0, so check. + -- Abs (Next_Term) <= Abs (Sum), so we need only check Next_Term. + -- If Next_Term is 0, we are finished anyway. + + if Are_Equal (Next_Term, Zero) then exit; end if; + + Delta_Exponent := Exponent (Sum) - Exponent (Next_Term); + Total_Digits_To_Use := e_Real_Machine_Mantissa - Delta_Exponent + 1; + exit when Total_Digits_To_Use <= 0; + + if Order < Half_Sqrt_Of_Radix then + + Factorial_Part := Make_E_Digit ((2.0*Order)*(2.0*Order-1.0)); + Next_Term := (X_Scaled_Squared * Next_Term) / Factorial_Part; + + else -- Do it the slow way. (Should rarely happen.) + + Factorial_Part := Make_E_Digit (2.0*Order); + Next_Term := (X_Scaled_Squared * Next_Term) / Factorial_Part; + Factorial_Part := Make_E_Digit (2.0*Order-1.0); + Next_Term := Next_Term / Factorial_Part; + + end if; + + if Sign_Of_Term_Is_Pos then + Sum := Sum + Next_Term; + else + Sum := Sum - Next_Term; + end if; + + end loop; + + -- STEP 4. Scale the result iteratively. Recall we got Cos(Arg/3**J). Now + -- we want Cos(Arg). So we use J repetitions of the formula + -- Cos(3*Theta) = -Cos(Theta)*(3 - 4*Cos(Theta)**2), to get Cos(Theta*3**J). + -- Recall we summed for Cos(X) - 1, because we retain more correct digits + -- this way for small X. (The 1 would have shifted correct digits off the + -- array.) So we actually have is Sum = G(X) = Cos(X) - 1. So the formula + -- for Cos(3X) is (1+G)*(4(G+1)**2 - 3) = (1+G)*(1 + 8G + 4G**2). Then + -- G(3X) = Cos(3X) - 1 = 9G + 12G*2 + 4G**3. Next, unscale G: + + if Second_Stage_Scaling_Performed then + for I in 1..J loop + --Sum := Sum * (Four_Digit * Sum * Sum - Three); + Sum_2 := Sum*Sum; + Sum_3 := Sum*Sum_2; + Sum := Nine_Digit * Sum + Twelve_Digit * Sum_2 + Four_Digit * Sum_3; + end loop; + end if; + + -- STEP 5. We have Cos(X - N * Pi). Want Cos(X). If N is odd, then + -- flip sign of Sum. First remember we summed for G = Cos - 1, whether or + -- not scaling was performed. Must recover Cos next: + + Sum := Sum + One; -- Get Cos(X) = G(X) + 1 = Sum + 1. + + if First_Stage_Scaling_Performed then + if N_Is_Odd then + Sum := -Sum; + end if; + end if; + + return Sum; + + end Cos; + + ------------------------- + -- Reciprocal_Nth_Root -- + ------------------------- + + -- Uses Newton's method to get A**(-1/N): + -- Y_k+1 = Y_k + (1 - A * Y_k**N) * Y_k / N. + -- Requires call to log(A) and assumes that this call gets + -- the first two radix digits correct: 48 bits usually. (It almost + -- always gets more correct.) + -- REMEMBER, this calculates to the max possible precision; + -- Does not reflect dynamic precision floating point. + -- N must be less than Radix - 1, which is usually 2**24 - 1. + + function Reciprocal_Nth_Root + (X : e_Real; + N : Positive) + return e_Real is + Exponent_Of_X, Scaled_Exp_Of_X, Exp_Mod_N : E_Integer; + Result, X_Fraction, Scaled_X_Fraction, Y_0 : e_Real; + + Shift_Sign, Ratio : Real := 0.0; + Log_Of_Scaled_X_Fraction, Real_X_Fraction : Real := 0.0; + + No_Correct_Bits : E_Integer; + E_Digit_N, Inverse_E_Digit_N : E_Digit; -- already initialized + + Optimization_Is_Possible : Boolean := False; + begin + if Are_Equal (X, Zero) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if Real(N) > (Radix-1.0) then + raise E_Argument_Error; + end if; + + -- STEP 0b. An optimization. If N is a power of two, we can speed + -- up the calculation by multiplying by 1/N rather than dividing by + -- N in the newton iteration. + + Optimization_Is_Possible := False; + for I in 0..Desired_No_Of_Bits_In_Radix-2 loop + if 2**I = N then + Optimization_Is_Possible := True; + end if; + end loop; + + if Optimization_Is_Possible then + Inverse_E_Digit_N := Make_E_Digit (1.0 / Real(N)); + else + E_Digit_N := Make_E_Digit (Real(N)); + end if; + + -- STEP 1. Argument reduction. Break X into Fraction and Exponent. + -- Choose to decrement Abs(Exponent) by (Exponent MOD N), + -- and multiply fraction by Radix ** (Exponent MOD N). + -- The reason is of course, we want to divide decremented Exponent by N, + -- so we want Scaled_Exp_Of_X to be an integral multiple of -N. + + Exponent_Of_X := Exponent (X); -- What if X is 0, inf, etc??? + X_Fraction := Fraction (X); + + Exp_Mod_N := Abs (Exponent_Of_X) MOD E_Integer(N); -- N never < 1. + + -- Make sure that Scaled_Exp_Of_X is in range of e_Real, and also + -- make sure that it is scaled to an integral multiple of N. + if Exponent_Of_X < 0 then + Scaled_Exp_Of_X := Exponent_Of_X + Exp_Mod_N; + Shift_Sign := +1.0; + else + Scaled_Exp_Of_X := Exponent_Of_X - Exp_Mod_N; + Shift_Sign := -1.0; + end if; + + -- Scale the fraction to compensate for the above shift in the Exponent: + if Exponent_Of_X < 0 then + Scaled_X_Fraction := Scaling (X_Fraction, - Exp_Mod_N); + else + Scaled_X_Fraction := Scaling (X_Fraction, + Exp_Mod_N); + end if; + + -- STEP 2. Get starting value for Newton's iteration. + -- Want Real number estimate of Scaled_X_Fraction**(-1/N). + -- Get the first 2 radix digits correct (48 bits usually), and call it Y_0. + -- Must worry about exponents too large for type Real in the value + -- Scaled_X_Fraction prior to the **(-1/N) operation, so we do it indirectly. + -- Arg ** (-1/N): use exp (log (Arg**(-1/N))) = exp ( (-1/N)*log (Arg) ). + -- Need estimate: [ X_Fraction * Radix**(-Shift_Sign * Exp_Mod_N) ]**(-1/N). + -- First want natural Log(X_Fraction * Radix**(-Shift_Sign * Exp_Mod_N)) + -- which equals Log(X_Fraction) - Shift_Sign * Log(Radix) * Exp_Mod_N. + -- Next divide this quantity by -N, and take exp(): + -- Exp (-Log (X_Fraction) / N + Shift_Sign * Log(Radix) * Exp_Mod_N / N). + -- This is the estimate we want, and because Exp_Mod_N / N is always + -- < 1.0, the arguments should be well within range of Log and Exp, + -- because Exp (Shift_Sign * Log(Radix) * Exp_Mod_N / N) is less than Radix. + + Real_X_Fraction := Make_Real (X_Fraction); + Ratio := Real (Exp_Mod_N) / Real(N); + Log_Of_Scaled_X_Fraction + := -Log (Real_X_Fraction) / Real(N) + Shift_Sign * Ratio * Log (Radix); + Y_0 := Make_Extended (Exp (Log_Of_Scaled_X_Fraction)); + -- Starting val in Newton's iteration. + + + -- STEP 3. Start the iteration. Calculate the number of iterations + -- required as follows. num correct digits doubles each iteration. + -- 1st iteration gives 4 digits, etc. Each step set desired precision + -- to one digit more than that we expect from the Iteration. + -- + -- It is important to remember that e_Real_Machine_Mantissa includes + -- the 1 or 2 guard digits. The last of these may have a lot of error in + -- the end, the first of these may have some error. That's why they're + -- there. Also remember that when Set_No_Of_Digits_To_Use is + -- called, the precision it sets includes the 2 guard digits, both + -- of which may be wrong, so we add 2 to the setting below, just + -- in case. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + if Optimization_Is_Possible then -- multiply by inverse of N: + + Y_0 := Y_0 + Inverse_E_Digit_N * ((One - Scaled_X_Fraction * Y_0**N) * Y_0); + + else -- divide by N: + + Y_0 := Y_0 + (One - Scaled_X_Fraction * Y_0**N) * Y_0 / E_Digit_N; + + end if; + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits > Max_Available_Bits; + + end loop; + + Result := Scaling (Y_0, (-Scaled_Exp_Of_X) / E_Integer (N)); + -- Product of Y_0 = Scaled_X_Fraction**(-1/N) with + -- Radix**(-Scaled_Exponent(X) / N) equals X**(-1/N). + + return Result; + + end Reciprocal_Nth_Root; + + ------------ + -- Divide -- + ------------ + + -- Uses Newton's method: Y_k+1 = Y_k + (1 - A*Y_k) * Y_k to get Z / A. + -- Requires call to 1 / Make_Real(A). + + function Divide (Z, X : e_Real) + return e_Real + is + Exponent_Of_X : E_Integer; + Result, X_Fraction, Y_0 : e_Real; + No_Correct_Bits : E_Integer; + begin + + if Are_Equal (X, Zero) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then -- like underflow + return Zero; + end if; + + if Are_Equal (X, Negative_Infinity) then -- like underflow + return Zero; + end if; + + -- Argument reduction. Break X into Fraction and Exponent. + -- Iterate to get inverse of fraction. Negate to get inverse of Exp. + + Exponent_Of_X := Exponent (X); + X_Fraction := Fraction (X); + + -- Get the first 2 radix digits correct (48 bits usually). Remember that the + -- Newton's iteration here produces 1/(X_Fraction). The result will be + -- the product of the newton's iteration and Radix to the power Exp_Scale_Val. + + Y_0 := Make_Extended (1.0 / Make_Real (X_Fraction)); + -- Starting val in Newton's iteration. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + -- Iterate: + + loop + + --Y_0:= Y_0 *(Two_Digit + (-X_Fraction) * Y_0); -- faster, much less accurate + Mult (Y_0, (Two - X_Fraction * Y_0)); -- faster, much less accurate + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits >= Max_Available_Bits / 2 + 1; + -- final correction is outside the loop. + + end loop; + + Result := Z * Y_0; -- Z / X + + Result := Result + (Z - X_Fraction * Result) * Y_0; --bst so far + -- Y_0 := Y_0 + (One - X_Fraction * Y_0) * Y_0; + -- The iteration for Y_0 is the final step for 1/X. Multiplied by Z to get Result. + + Result := Scaling (Result, -Exponent_Of_X); + -- Product of 1/Fraction(X) with Radix**(-Exponent(X)) equals 1/X. + + return Result; + + end Divide; + + ---------------- + -- Reciprocal -- + ---------------- + + -- Uses Newton's method: Y_k+1 = Y_k + (1 - A*Y_k) * Y_k to get 1 / A. + + function Reciprocal (X : e_Real) + return e_Real + is + Exponent_Of_X : E_Integer; + Result, X_Fraction, Y_0 : e_Real; + No_Correct_Bits : E_Integer; + begin + + if Are_Equal (X, Zero) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then -- like underflow + return Zero; + end if; + + if Are_Equal (X, Negative_Infinity) then -- like underflow + return Zero; + end if; + + -- Argument reduction. Break X into Fraction and Exponent. + -- Iterate to get inverse of fraction. Negate to get inverse of Exp. + + Exponent_Of_X := Exponent (X); + X_Fraction := Fraction (X); + + -- Newton's iteration here produces 1/(X_Fraction). Final result will be + -- the product of the newton's iteration and Radix to the power Exp_Scale_Val. + + Y_0 := Make_Extended (1.0 / Make_Real (X_Fraction)); + -- Starting val in Newton's iteration. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + -- Iterate: + + loop + + --Y_0:= Y_0 *(Two_Digit + (-X_Fraction) * Y_0); -- faster, much less accurate + Mult (Y_0, (Two - X_Fraction * Y_0)); -- faster, much less accurate + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits > Max_Available_Bits / 2 + 1; + -- final correction is below. + + end loop; + + Y_0 := Y_0 + (One - X_Fraction * Y_0) * Y_0; -- accurate final step. + + Result := Scaling (Y_0, -Exponent_Of_X); + -- Product of 1/Fraction(X) with Radix**(-Exponent(X)) equals 1/X. + + return Result; + + end Reciprocal; + + --------------------- + -- Reciprocal_Sqrt -- + --------------------- + + -- Uses Newton's method: Y_k+1 = Y_k + (1 - A*Y_k**2) * Y_k / 2 + -- to get 1 / sqrt(A). Multiply by A to get desired result; then refine. + + function Reciprocal_Sqrt (X : e_Real) + return e_Real + is + Result : e_Real; + X_scaled, Y_0 : e_Real; + Exp_Scale_Val : E_Integer; + No_Correct_Bits : E_Integer; + begin + + if X < Zero then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Zero) then + raise E_Argument_Error; + end if; + + + -- Break X into Fraction and Exponent. If Exponent is + -- odd then add or subtract 1. (Increase X if X < 1, decrease otherwise.) + -- Only important thing is scale X + -- down to somewhere near 1, and to scale X by an even power of Radix. + -- We break X up because the Newton's method works better on X_scaled than + -- than on X in general. Also we use Sqrt(Make_Real(X_scaled)) to start + -- things off for Newton's method, so we want X_scaled in range of Sqrt(Real). + + Exp_Scale_Val := Exponent (X); -- what if X is 0, inf, etc..??? + + -- Exp is odd. Make it even, but keep things in range of e_Real: + + if Abs (Exp_Scale_Val) mod 2 /= 0 then + if Exp_Scale_Val < 0 then + Exp_Scale_Val := Exp_Scale_Val + 1; + else + Exp_Scale_Val := Exp_Scale_Val - 1; + end if; + end if; + + X_scaled := Scaling (X, -Exp_Scale_Val); + + + -- Take Sqrt by dividing the even Exp_Scale_Val by 2, and by taking + -- the SQRT of X_scaled. Start the iteration off with a call to SQRT + -- in the standard library for type Real. + + Exp_Scale_Val := Exp_Scale_Val / 2; + Y_0 := Make_Extended (Sqrt (1.0 / Make_Real (X_scaled))); + -- Starting val in Newton's iteration. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + --Y_0:= Y_0 + Half_Digit * ((One - X_scaled * (Y_0 * Y_0)) * Y_0); + --Y_0:= Y_0*(Half_Digit * (Three - X_scaled * (Y_0 * Y_0))); --inaccurate + Mult (Y_0, Half_Digit * (Three - X_scaled * (Y_0 * Y_0))); + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits > Max_Available_Bits / 2 + 1; + -- final correction is below. + + end loop; + + -- both work: + --Y_0 := Y_0 + Half_Digit * ((One - X_scaled * (Y_0 * Y_0)) * Y_0); + Y_0 := Y_0 - Half_Digit * (X_scaled * (Y_0 * Y_0) - One) * Y_0; + Result := Scaling (Y_0, -Exp_Scale_Val); + + return Result; + + end Reciprocal_Sqrt; + + ---------- + -- Sqrt -- + ---------- + + -- Uses Newton's method: Y_k+1 = Y_k + (1 - A*Y_k**2) * Y_k / 2 + -- to get 1 / sqrt(A). Multiply by A to get desired result; then refine. + -- Requires call to Sqrt(Real) and assumes that this call gets + -- the first radix digit correct. (It almost + -- always gets 53 bits correct.) + + function Sqrt (X : e_Real) + return e_Real + is + Result, X_scaled, Y_0 : e_Real; + Exp_Scale_Val : E_Integer; + No_Correct_Bits : E_Integer; + begin + + if X < Zero then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Positive_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Negative_Infinity) then + raise E_Argument_Error; + end if; + + if Are_Equal (X, Zero) then + return Zero; + end if; + + --if Are_Equal (X, One) then -- Ada9X. + --return One; + --end if; + + -- Break X into Fraction and Exponent. If Exponent is + -- odd then add or subtract 1. (Increase X if X < 1, decrease otherwise.) + -- Only important thing is scale X + -- down to somewhere near 1, and to scale X by an even power of Radix. + -- We break X up because the Newton's method works better on X_scaled than + -- than on X in general. Also we use Sqrt(Make_Real(X_scaled)) to start + -- things off for Newton's method, so we want X_scaled in range of Sqrt(Real). + + Exp_Scale_Val := Exponent (X); -- what if X is 0, inf, etc..??? + -- This Exp is powers of Radix = 2**30 or 2**29. + + -- Exp is odd. Make it even, but keep things in range of e_Real: + + if Abs (Exp_Scale_Val) mod 2 /= 0 then + if Exp_Scale_Val < 0 then + Exp_Scale_Val := Exp_Scale_Val + 1; + else + Exp_Scale_Val := Exp_Scale_Val - 1; + end if; + end if; + + X_scaled := Scaling (X, -Exp_Scale_Val); + + Exp_Scale_Val := Exp_Scale_Val / 2; + Y_0 := Make_Extended (Sqrt (1.0 / Make_Real (X_scaled))); + -- Starting val in Newton's iteration. + + No_Correct_Bits := Real'Machine_Mantissa - 2; + + loop + + --Y_0:= Y_0 + Half_Digit * ((One - X_scaled * Y_0 * Y_0) * Y_0); + --Y_0:= Y_0*(Half_Digit * (Three - X_scaled * Y_0 * Y_0)); --inaccurate + Mult (Y_0, Half_Digit * (Three - X_scaled * Y_0 * Y_0)); + + No_Correct_Bits := No_Correct_Bits * 2; + exit when No_Correct_Bits > Max_Available_Bits / 2 + 1; + -- Have the final correction outside the loop. + + end loop; + + Result := Y_0 * X_scaled; -- now its SQRT(X_scaled); Y_0 = 1/SQRT(X_scaled) + + Result := Result + Half_Digit * (X_scaled - Result*Result) * Y_0; --important + + Result := Scaling (Result, Exp_Scale_Val); -- equals SQRT(X). + + return Result; + + end Sqrt; + + ------------- + -- Make_Pi -- + ------------- + + -- This is an important independent test of Arcsin (hence Sin and Arcos). + -- Once we verify that Arcsin (hence Sin) is correct, can test other + -- arguments with (eg) Sin (A + B) = etc. + -- Has much greater error than 4*Arcsin(1/Sqrt(2)). + -- Need Pi to full precision for the trigonometic functions. + -- Here is the Salamin-Brent algorithm. + -- A_0 = 1.0, B_0 = 1.0/Sqrt(2.0), and D_0 = Sqrt(2.0) - 0.5. + -- + -- A_k = (A_{k-1} + B_{k-1}) / 2 + -- B_k = Sqrt (A_{k-1} * B_{k-1}) + -- D_k = D_{k-1} - 2**k * (A_k - B_k)**2 + -- + -- Then P_k = (A_k + B_k)**2 / D_k converges quadratically to + -- Pi. All steps must be done at full precision. + -- + -- function Make_Pi return e_Real is + -- A_0, B_0, D_0 : e_Real; + -- A_1, B_1, D_1 : e_Real; + -- C, Result : e_Real; + -- Two_To_The_k : E_Digit; + -- Two_To_The_20 : constant E_Digit := Make_E_Digit (2.0**20); + -- We_Are_Finished : Boolean := False; + -- No_Of_Correct_Digits : E_Integer; + -- Old_Precision : constant E_Integer := Present_Precision; + -- begin + -- + -- A_0 := One; + -- B_0 := E_Inverse_Sqrt_2; + -- D_0 := Two * E_Inverse_Sqrt_2 - Half; + -- -- this give Pi_0 = 3.1877, or error of about 1.47 %. This is smaller + -- -- 1 part in 64, so 6 bits correct. There follows (k=1) 12, (k=2) 24. + -- -- So requires two more iterations to get one digit, 3 to get 2 + -- -- digits. Seems to work better than this estimate. + -- + -- No_Of_Correct_Digits := 1; + -- + -- -- The following loop should get us up to half a million digits. In + -- -- the unlikely case you need more, then another loop follows. + -- -- k in 1..7 gives you 33 Radix 2**24 digits. + -- + -- for k in 1..20 loop + -- + -- Two_To_The_k := Make_E_Digit (2.0**k); + -- + -- A_1 := Half_Digit * (A_0 + B_0); + -- B_1 := Sqrt (A_0 * B_0); + -- C := (A_1 - B_1); + -- D_1 := D_0 - Two_To_The_k * (C * C); + -- + -- if k >= 3 then + -- -- We did 3rd iteration to get 2 correct digits. + -- -- No_Correct.. was initialized to 1. + -- No_Of_Correct_Digits := No_Of_Correct_Digits * 2; + -- end if; + -- -- Should be OK overflow-wise here. Range of E_Integer is 4 times + -- -- the limit set by Max_Available_Precision. + -- + -- if No_Of_Correct_Digits > e_Real_Machine_Mantissa then + -- We_Are_Finished := True; + -- exit; + -- end if; + -- + -- A_0 := A_1; B_0 := B_1; D_0 := D_1; + -- + -- + -- end loop; + -- + -- -- We want to optimize the calculation of D_1 above by multiplying + -- -- by an E_Digit on the left (Two_To_The_k) instead of an e_Real. + -- -- Stop doing this at Two_To_The_k = 2**20 to stay in range of E_Digit. + -- -- Below we finish up if necessary by multiplying twice..still much + -- -- more efficient than e_Real*e_Real. + -- + -- if not We_Are_Finished then -- keep trying + -- for k in 21..40 loop + -- + -- Two_To_The_k := Make_E_Digit (2.0**(k-20)); + -- + -- A_1 := Half_Digit * (A_0 + B_0); + -- B_1 := Sqrt (A_0 * B_0); + -- C := (A_1 - B_1); + -- D_1 := D_0 - Two_To_The_k * (Two_To_The_20 * (C * C)); + -- + -- No_Of_Correct_Digits := No_Of_Correct_Digits * 2; + -- exit when No_Of_Correct_Digits > e_Real_Machine_Mantissa; + -- + -- A_0 := A_1; B_0 := B_1; D_0 := D_1; + -- + -- end loop; + -- end if; + -- + -- C := (A_1 + B_1); + -- Result := C * C / D_1; + -- + -- Set_No_Of_Digits_To_Use (Old_Precision); -- Restore precision. + -- + -- return Result; + -- + -- end Make_Pi; + + ---------------- + -- Make_Log_2 -- + ---------------- + + -- Important independent test of Log(X). Verify that Log(X) is correct + -- at X = 2, and use (eg) Log(XY) = Log(X) + Log(Y) to test other vals. + -- This is for testing other routines: Has greater error than Log(X). + -- Log_2, hopefully, for testing purposes. + -- A_0 = 1.0, B_0 = Two**(2-M); + -- + -- A_k = (A_{k-1} + B_{k-1}) / 2 + -- B_k = Sqrt (A_{k-1} * B_{k-1}) + -- + -- Then Log(2) = Pi / (2 * B_k * m) ??? + -- + -- Here M = N/2+1 where N = 29*e_Real_Machine_Mantissa = number of bits desired. + -- + -- function Make_Log_2 return e_Real is + -- A_0, B_0 : e_Real; + -- A_1, B_1 : e_Real; + -- Result : e_Real; + -- We_Are_Finished : Boolean := False; + -- No_Of_Correct_Digits : E_Integer; + -- N : Integer := 24 * Integer(e_Real_Machine_Mantissa); -- Upper estimate. + -- M : Integer := N/2 + 24; -- Need only N/2 + 1 + -- begin + -- + -- A_0 := One; + -- B_0 := Two**(2-M); -- clean this up with the scaling ftcn. + -- + -- No_Of_Correct_Digits := 1; + -- + -- -- The following loop should get us up to half a million digits. In + -- -- the unlikely case you need more, then another loop follows. + -- + -- for k in 1..16 loop -- I suspect far fewer than 20 iterations required. + -- + -- A_1 := Half_Digit * (A_0 + B_0); + -- B_1 := Sqrt (A_0 * B_0); + -- + -- A_0 := A_1; B_0 := B_1; + -- + -- end loop; + -- + -- Result := Half_Digit * e_Pi / (B_1 * Make_Extended(Real(M))); + -- + -- Set_No_Of_Digits_To_Use (Old_Precision); -- Restore precision. + -- + -- return Result; + -- + -- end Make_Log_2; + + ---------- + -- e_Pi -- + ---------- + + -- Returns Pi to Max Available Precision. Use Arcsin, cause it has + -- much lower error than Make_Pi. + -- Used for scaling trig functions, etc. + + function e_Pi return e_Real is + begin + + if not Pi_memory.Initialized then + --Pi_memory.Val := Make_Pi; + Pi_memory.Val := Four_Digit * e_Quarter_Pi; + -- Only works because arg is so small no scaling by E_pi is done. + + Pi_memory.Initialized := True; + + end if; + + return Pi_memory.Val; + + end e_Pi; + + ------------------ + -- e_Inverse_Pi -- + ------------------ + + -- Returns Pi to Max Available Precision. Use Arcsin, cause it has + -- lower error than Make_Pi. + -- Used for scaling trig functions, etc. + + function e_Inverse_Pi return e_Real is + begin + + if not Inverse_Pi_memory.Initialized then + Inverse_Pi_memory.Val := (+0.25) / e_Quarter_Pi; + Inverse_Pi_memory.Initialized := True; + end if; + + return Inverse_Pi_memory.Val; + + end e_Inverse_Pi; + + ------------------ + -- e_Quarter_Pi -- + ------------------ + + -- Returns Pi/4 to Max Available Precision. + -- Used for scaling trig functions, etc. + + function e_Quarter_Pi return e_Real is + begin + + if not Quarter_Pi_memory.Initialized then + Quarter_Pi_memory.Val := (+1.5) * Arcsin (Half); + Quarter_Pi_memory.Initialized := True; + end if; + + return Quarter_Pi_memory.Val; + + end e_Quarter_Pi; + + ---------------------- + -- e_Inverse_Sqrt_2 -- + ---------------------- + + -- Returns 1/Sqrt(2.0) to Max Available Precision. + -- Used for making Pi. + + function e_Inverse_Sqrt_2 return e_Real is + begin + + if not Inverse_Sqrt_2_memory.Initialized then + Inverse_Sqrt_2_memory.Val := Reciprocal_Nth_Root (Two, 2); + Inverse_Sqrt_2_memory.Initialized := True; + end if; + + return Inverse_Sqrt_2_memory.Val; + + end e_Inverse_Sqrt_2; + + -------------------------- + -- e_Half_Inverse_Log_2 -- + -------------------------- + + -- Returns Exp(1.0) to Max Available Precision. + -- Used for scaling arguments of Exp. + + function e_Half_Inverse_Log_2 return e_Real is + begin + + if not Half_Inverse_Log_2_memory.Initialized then + Half_Inverse_Log_2_memory.Val := Half / E_Log_2; + Half_Inverse_Log_2_memory.Initialized := True; + end if; + + return Half_Inverse_Log_2_memory.Val; + + end e_Half_Inverse_Log_2; + + -------------- + -- e_Log_2 -- + -------------- + + -- Returns Log(2.0) to Max Available Precision. + -- Used for scaling Exp(X). + + function e_Log_2 return e_Real is + begin + + if not Log_2_memory.Initialized then + Log_2_memory.Val := Log (Two); + Log_2_memory.Initialized := True; + end if; + + return Log_2_memory.Val; + + end e_Log_2; + +end Extended_Real.Elementary_Functions; + diff --git a/arbitrary/extended_real-elementary_functions.ads b/arbitrary/extended_real-elementary_functions.ads new file mode 100644 index 0000000..53db2d4 --- /dev/null +++ b/arbitrary/extended_real-elementary_functions.ads @@ -0,0 +1,97 @@ + +-- PACKAGE Extended_Real.Elementary_Functions +-- +-- Taylor series are used to get Exp, Sin and Cos. Once these are calculated, +-- Newton-Raphson iteration gives inverse functions: Log, Arccos, and Arcsin, +-- Arctan. Similarly, starting with the function G(Y) = Y**(-N), +-- Newton-Raphson iteration gives the inverse: F(X) = X**(-1/N), +-- the reciprocal of the Nth root of X. Newton-Raphson is used directly to get +-- Sqrt, Inverse, and Inverse_Nth_root. Inverse(X) is the reciprocal of +-- of X. Its usually faster than the "/" function, (One / X). +-- +-- Newton-Raphson iteration is used to calculate Y = F(a) when F's inverse +-- function G(Y) is known. (G satisfies G(F(a)) = a.) Say we want Y = F(a), +-- We can't calculate F, but given a Y we can calculate G(Y) and we know a. Then +-- use Newton-Raphson to solve for Y in equation G(Y) = a. The iteration is +-- +-- dG/dY(Y_0) = (G(Y_0) - a) / (Y_0 - Y_1), or, +-- +-- Y_1 = Y_0 - (G(Y_0) - a) / dG/dY(Y_0). +-- +-- For example, if want F(a) = Log(a), and we can get G(Y) = Exp(Y), then we +-- iterate for Y = Log(a) using: +-- +-- Y_1 = Y_0 - (Exp(Y_0) - a) / Exp(Y_0). +-- +-- Similarly, if we want F(a) = a**(-1/N) and we know G(Y) = Y**(-N), then: +-- +-- Y_1 = Y_0 - (Y_0**(-N) - a) / (-N*Y**(-N-1), +-- +-- = Y_0 - (1 - a*Y_0**N) * Y / (-N). +-- +-- Argument reduction is necessary in most of the routines. Some of the +-- arg reduction ideas come from Brent, D M Smith, D H Bailey. +-- + +generic + + -- Functions for type Real. Must be correct to 48 bits. + -- You should never have to enter these parameters, as long as + -- they are visible at instantiation. Log is natural. + + with function Sqrt (X : Real) return Real is <>; + with function Log (X : Real) return Real is <>; -- Natural log. (ln) + with function Exp (X : Real) return Real is <>; -- inverse of log + with function Arcsin (X : Real) return Real is <>; -- inverse of sin + with function Arctan (Y : Real; X : Real := 1.0) return Real is <>; + +package Extended_Real.Elementary_Functions is + + function Sqrt (X : e_Real) return e_Real; + function Exp (X : e_Real) return e_Real; + function Log (X : e_Real) return e_Real; + function Log (X : e_Real; Base : e_Real) return e_Real; + function Sin (X : e_Real) return e_Real; + function Sin (X : e_Real; Cycle : e_Real) return e_Real; + function Cos (X : e_Real) return e_Real; + function Cos (X : e_Real; Cycle : e_Real) return e_Real; + function Arcsin (X : e_Real) return e_Real; + function Arccos (X : e_Real) return e_Real; + function "**" (Left : e_Real; Right : e_Real) return e_Real; + + function Arctan (X : e_Real) return e_Real; + -- Output is in range [-Pi/2 .. Pi/2] only. Arctan (Infinity) = Pi/2. + + function Reciprocal (X : e_Real) return e_Real; + -- Newton-Raphson inversion. Usually faster than One / X. + + function Divide (Z, X : e_Real) return e_Real; + -- Newton-Raphson Z / X. + + + function Reciprocal_Nth_Root (X : e_Real; N : Positive) return e_Real; + -- Reciprocal of the N-th root of X: X**(-1/N) = 1 / X**(1/N). One way + -- to get the N-th root of X is to take One / Reciprocal_Nth_Root(X, N). + -- N must be less than Radix - 1, which is usually 2**29 - 1. + -- (This function is non-standard, but is used by some of the other + -- routines, so might as well export it.) + + function Reciprocal_Sqrt (X : e_Real) return e_Real; + + + function e_Quarter_Pi return e_Real; -- returns Pi/4 by arcsin method. + function e_Log_2 return e_Real; -- returns Log(2.0). + function e_Inverse_Pi return e_Real; -- returns 1/Pi by arcsin method. + function e_Inverse_Sqrt_2 return e_Real; -- returns 1/Sqrt(2.0). + function e_Half_Inverse_Log_2 return e_Real; -- returns 0.5/Log(2.0). + -- The above constants are calculated to max available precision. + -- They are all slightly less than one - the the highest precision + -- that this package is capable of. So the above versions are + -- preferred to the ones given below, which are somewhat + -- greater than one. + + function e_Pi return e_Real; -- returns Pi by arcsin method. + + E_Argument_Error : Exception; + +end Extended_Real.Elementary_Functions; diff --git a/arbitrary/extended_real-io.adb b/arbitrary/extended_real-io.adb new file mode 100644 index 0000000..ce8db3f --- /dev/null +++ b/arbitrary/extended_real-io.adb @@ -0,0 +1,704 @@ + +----------------------------------------------------------------------- +-- package body Extended_Real.IO, translations between extended precision and text +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + +-- Test for E_Real_Machine_Emax etc + +package body Extended_Real.IO is + + -- The following are some global constant sets for ASCII to e_Real + -- Translation. + + Is_Numeral : constant Set + := Set'('0'..'9' => True, others => False); + + Is_Sign : constant Set + := Set'('-' | '+' => True, others => False); + + Is_Decimal_Pt : constant Set + := Set'('.' => True, others => False); + + + -- A Chunk is a digit in Radix 10**6, which is the radix of decimal + -- number we're translating the binary number into..6 decimal digits + -- at a time. The following are used by both the ASCII to e_Real + -- and e_Real to ASCII routines. + + type Chunk_Integer is range -2**31+1 .. 2**31-1; + Chunk : Chunk_Integer; + Chunk_Width : constant := No_Of_Decimal_Digits_Per_Chunk; -- up in spec. + Ten_To_The_Chunk_Width : constant E_Digit + := Make_E_Digit (10.0**Chunk_Width); + + + -- The following are for translating the exponent to a string. + -- Exp_String is used by both "e_Real_Image" and "Exp_Image" + + Exp_String_Width : constant Positive := E_Integer'Width; + -- Should include space for the sign. + + subtype Exp_String is String (1..Exp_String_Width); + + Exp_Full_Text : Exp_String := (others => ' '); + + Log_Base_10_Of_2 : constant := 0.3010299956639812; + + Zero : constant e_Real := +0.0; + Ten : constant e_Real := +10.0; + + + --------------- + -- Exp_Image -- + --------------- + + -- Also returns the sign of Exp_Val: either '+' or '-'. + -- Uses fact that first char of E_Integer'Image is ' ' or '-'. + -- Left justifies the digits of the Exp in a field of blanks. + -- Another option is to right justify in a field of 0's. + + function Exp_Image (Exp_Val : E_Integer) return Exp_String is + Result : Exp_String := (others => ' '); + E_String : constant String := E_Integer'Image (Exp_Val); + begin + + Result (1..E_String'Length) := E_String; + if Result(1) = ' ' then + Result(1) := '+'; + end if; + return Result; + + end Exp_Image; + + ------------------------------ + -- Count_of_Trailing_Blanks -- + ------------------------------ + + function Count_of_Trailing_Blanks (Exp : Exp_String) return Integer is + cnt : Integer := 0; + begin + for i in reverse 1..Exp_String'Length loop + if Exp(i) = ' ' then + cnt := cnt + 1; + else + exit; + end if; + end loop; + return cnt; + end Count_of_Trailing_Blanks; + + ------------------ + -- e_Real_Image -- + ------------------ + + -- Extended Real (e_Real) to Text translation + + function e_Real_Image + (X : in e_Real; + Aft : in Positive := Positive'Last) + return String + is + + -- Make strings that are large enough to return any desired + -- result that is inside the bounds stated in the spec. The + -- extra chunk is there to fill in for lost digits due to leading + -- zeros. + + Max_Result_Length : constant Positive + := Max_Practical_String_Length + 2*Chunk_Width + Exp_String_Width + 3; + + subtype Result_String is String(1..Max_Result_Length); + Result : Result_String := (others => ' '); + Mantissa_String : Result_String := (others => '0'); + + Sign : Character := ' '; -- init important. + Mantissa_Length : Positive; + Result_Length : Positive; + Exp_Stripped_Length : Integer; + + + -- Types for translating from extended e_Real to Decimal. + + Exp_Base_2 : Real; + Exp_Base_10_Shift : Real; + Exp_Shift, Exp_Val : E_Integer; + I_Exp_Shift : Integer; + Leading_Zeros : Natural; + + Y : e_Real := Abs (X); + Leading_Chunk, Trailing_Chunks : e_Real; + + Stage_Last : Positive; -- Initialized in Step 0. + + + -- Read global memory Mantissa_Length. + -- Update global memory "Mantissa_String". Right justify + -- Chunk_Integer'Image(Chunk). Use fact that Mantissa_String is + -- initialized all '0', so no need to add '0's to the left of the + -- right justified Chunk_Integer'Image(Chunk). Recall Mantissa_.. + -- allows an extra chunk at the end to adjust for removal of leading + -- zeros. + + procedure Add_To_Mantissa_String (Chunk : Chunk_Integer; + Stage : Positive) is + Ascii_Chunk : constant String := Chunk_Integer'Image (Chunk); + Len : constant Integer := Ascii_Chunk'Length; + Start, Ending : Integer; + begin + + Start := (Stage - Positive'First + 1)*Chunk_Width - Len + 2; + Ending := (Stage - Positive'First + 1)*Chunk_Width; + -- Right justifies digits in a field of '0's. + + if Start > Mantissa_Length + 2*Chunk_Width then + return; + end if; + if Ending > Mantissa_Length + 2*Chunk_Width then + Ending := Mantissa_Length + 2*Chunk_Width; + end if; + + Mantissa_String (Start..Ending) := Ascii_Chunk(2..Ending-Start+2); + -- Ascii_Chunk always starts with ' ' or '-'. Here its always positive. + -- Its min length (Len) is always 2, even if chunk = '0'. + + end Add_To_Mantissa_String; + + begin + + + -- STEP 0-. Special cases. Zero and Infinity. + + if Are_Equal (X, Zero) then + return "0.0"; + end if; + + if Are_Equal (X, Positive_Infinity) then + return "+inf"; + end if; + + if Are_Equal (X, Negative_Infinity) then + return "-inf"; + end if; + + -- STEP 0. Determine number of decimal digits in Mantissa. + -- Its Min (Aft+1, Max_No_Of_Digits, and Max_Practical_String_Length). + -- It may not be less than 12. + + if Aft = Positive'Last then + Mantissa_Length := Aft; -- Positive'Last is the upper limit. + else + Mantissa_Length := Aft + 1; -- the usual case. + end if; + + if Mantissa_Length < 13 then Mantissa_Length := 13; end if; + + if Mantissa_Length > Max_No_Of_Digits then + Mantissa_Length := Max_No_Of_Digits; + end if; + + if Mantissa_Length > Max_Practical_String_Length then + Mantissa_Length := Max_Practical_String_Length; + end if; + + Stage_Last := (Mantissa_Length - 1) / Chunk_Width + 3; + -- This is ceiling (Mantissa_Length / Chunk_Width) + 1. (+ 2 really) + -- This is the number of steps required to strip digits of number + -- Mantissa_Length from the e_Real in chunks of Chunk_Width. The + -- extra 2 chunk2 are usually required to fill in for digits lost due + -- to leading zeros in the first chunk. (Normalization of the decimal + -- representation.) + + + -- STEP 1. Multiply Item by a power of 10.0 to make it less than 1.0. + -- Why a power of ten? Cause then we can shift the final Radix 10 exp + -- by exactly the value of the exponent. The formula is + -- Shift = -Ceiling (Log_Base_10_Of_2 * Exp_Base_2) + -- Exp_Base_2 is the normalized exp: i.e. 2**(-Exp_Base_2) * Item + -- is slightly less than 1. But we want a power of 10 with this property, + -- not a power of 2. 10**(-N) = 2**(-Exp_Base_2) implies + -- N = Log_Base_10_Of_2 * Exp_Base_2, except that we must make N the + -- appropriate nearest integer. If we do that by rounding N up (taking + -- the ceiling) then 10**(-N) < 2**(-Exp_Base_2) which implies that + -- 10**(-N) * X < 2**(-Exp_Base_2) * X, so 10**(-N) * X < 1.0, + -- as required. + + Exp_Base_2 := Real (No_Of_Bits_In_Radix) * Real (Exponent (X)); + + Exp_Base_10_Shift := -Real'Ceiling (Log_Base_10_Of_2 * Exp_Base_2); + I_Exp_Shift := Integer (Exp_Base_10_Shift); + Exp_Shift := E_Integer (Exp_Base_10_Shift); + + + -- STEP 2. Multiplying X by 10**Shift will make it less than 1. + -- Want to multiply Item by 10**(6 + Shift) to get no more than 6 decimal + -- digits sticking out to the left of the decimal point (and often 0), + -- because we're stripping 6 decimal digits at a time from the left of the + -- decimal point with the truncate function. Remember Y := Abs(X). + -- Loop translates Y into Radix 10**6. + -- Each Chunk is a digit in Radix 10**6. + + Y := Ten ** (No_Of_Decimal_Digits_Per_Chunk + I_Exp_Shift) * Y; + -- Y := Machine(Y); + -- Machine would round away all guard digits. + + for Stage in 1..Stage_Last loop + + Leading_Chunk := Truncation (Y); + Trailing_Chunks := Y - Leading_Chunk; + + Chunk := Chunk_Integer (Make_Real (Leading_Chunk)); + Add_To_Mantissa_String (Chunk, Stage); + + Y := Ten_To_The_Chunk_Width * Trailing_Chunks; + -- Shift another 6 decimal digits to the left of the decimal point. + + end loop; + + + -- STEP 3. Construct the string. Get leading sign. Strip away leading + -- Zeros. Exp is the amount we shifted by above, adjusted for stripped + -- leading zeros. (ie, move decimal point to right of leading zeros + -- and 1st non-zero digit; decrement Exp by 1 for each leading zero etc.) + + if X < Zero then + Sign := '-'; + end if; + -- Set the sign. Sign is initialized ' '. Recall Y = Abs(X). + + -- Count leading zeros: + Leading_Zeros := 0; + for I in 1..20 loop -- Should be no more than 8 if digit 1..10^9. + if Mantissa_String (I) /= '0' then + exit; + else + Leading_Zeros := Leading_Zeros + 1; + end if; + end loop; + -- Right now the virtual decimal point sits to the left of every digit + -- in Mantissa_String, and the Exponent is given by -Shift. We want + -- to shift the decimal point to the right of each leading zero, and to + -- the right of the first non-zero digit. Then DECREASE the exponent + -- by that shift. + + Exp_Val := -Exp_Shift - (1 + E_Integer(Leading_Zeros)); + -- the 1 accounts for the 1st non-zero digit to the left of decimal point. + + Exp_Full_Text := Exp_Image (Exp_Val); + Exp_Stripped_Length := Exp_String_Width - Count_of_Trailing_Blanks (Exp_Full_Text); + + Result_Length := Mantissa_Length + Exp_Stripped_Length + 3; + + Result(1..Result_Length) := + Sign & Mantissa_String (1+Leading_Zeros) & '.' + & Mantissa_String (2+Leading_Zeros..Mantissa_Length+Leading_Zeros) + & 'E' & Exp_Full_Text (1..Exp_Stripped_Length); + + return Result(1..Result_Length); + + end e_Real_Image; + + ---------------------- + -- Integer_Value_Of -- + ---------------------- + + -- Assumes that a decimal point is immediately to the right of the last digit. + -- Translate each chunk (Digit_n) of 8 decimal digits into e_Real, and sum + -- the polynomial in powers of 10**8. (Do it this way because we can then + -- use E_Digit*e_Real operations for efficiency.) Horner's rule is + -- + -- Digit_0 + 10**8*(Digit_1 + 10**8*(Digit_2 + ... + 10**8*(Digit_n))))) + -- + -- Below, the 10**8 is called Ten_To_The_Chunk_Width. Its a Global constant. + -- IMPORTANT to return ZERO if string has 0 length. e_Real_Val requires it. + + function Integer_Value_Of (F : String) return e_Real is + Result, Digit_i : e_Real; -- initialized to zero. (Important). + No_Of_Full_Chunk_Iterations : Natural; + First_Partial_Chunk_Width : Natural; + Start, Ending : Integer; + begin + if F'Length = 0 then + return Zero; + end if; + + No_Of_Full_Chunk_Iterations := F'Length / Chunk_Width; + First_Partial_Chunk_Width := F'Length MOD Chunk_Width; + + -- Special case for highest order Digit_i, the one of width + -- First_Partial_Chunk_Width. If this partial chunk is the only + -- chunk (ie., No_Of_Full_Chunk_Iterations = 0) then there is no + -- multiplication of the result by 10**8. (See formula above.) + + if First_Partial_Chunk_Width > 0 then + Start := F'First; + Ending := F'First + First_Partial_Chunk_Width - 1; + + Digit_i := +Real (Chunk_Integer'Value (F(Start..Ending))); + + if No_Of_Full_Chunk_Iterations > 0 then + Result := Ten_To_The_Chunk_Width * Digit_i; + else + Result := Digit_i; + end if; + end if; + + -- Do the lower order Digits_i's. The lowest order Digit_i has no + -- 10**8 multiplied by it. + + for i in 0..No_Of_Full_Chunk_Iterations-1 loop + + Start := F'First + First_Partial_Chunk_Width + i * Chunk_Width; + Ending := Start + Chunk_Width - 1; + + Digit_i := +Real (Chunk_Integer'Value (F(Start..Ending))); + + if i < No_Of_Full_Chunk_Iterations-1 then + Result := Ten_To_The_Chunk_Width * (Result + Digit_i); + else + Result := Result + Digit_i; + end if; + + end loop; + + return Result; + + end Integer_Value_Of; + + ------------------------- + -- Fractional_Value_Of -- + ------------------------- + + -- Assumes that a decimal point is immediately to the left of the 1st digit. + -- IMPORTANT to return ZERO if string has 0 length. e_Real_Val requires it. + -- + function Fractional_Value_Of (F : String) return e_Real is + Result : e_Real; + begin + + if F'Length = 0 then + return Zero; + end if; + + Result := Integer_Value_Of(F); + -- We have Val as though decimal point were to the right of all digits. + -- Want it to the left of all digits: just multiply by Ten**(-F'Length) + -- We do it this way so can use efficiency of E_Digit*e_Real in routine + -- Integer_Value_Of. But many possible optimizations are neglected. + -- If performance is an issue, then maybe try e_Real / E_Digit method + -- where E_Digit = 10**8. + + Result := Result * Ten**(-F'Length); + + return Result; + + end Fractional_Value_Of; + + ---------------- + -- e_Real_Val -- + ---------------- + + -- Accepts the following formats: + -- INTEGER : 1234567 + -- DECIMAL : 12.34567 or -.1234567 or 1234567. + -- EXPONENTIAL : 1234.567E+002 or .1234567E2 or 123467.E-03 + -- NON_DECIMAL_EXPONENTIAL : -1234567E-003 + -- + -- Notice that + -- 1) The Decimal point may be anywhere to the left of the E. + -- 2) The Leading sign is optional if its '+'. + -- 3) The sign of the exponent is optional if its '+'. + -- + -- Start_Of_Num is the first non-white space. If the first char of the + -- string is not white-space, then this will be Start_Of_Num. End_Of_Num + -- is the non-white char just before the start of more white-space. BUT if + -- the string comes to an end before any white-space re-appears, then the + -- end of the string is taken as End_Of_Num. + -- + procedure e_Real_Val + (X : in String; + Y : out e_Real; + Last : out Natural) + is + No_Of_Decimal_Pts, No_Of_Exp_Symbols, No_Of_Exp_Signs : Natural := 0; + Decimal_Pt_Exists, Exp_Symbol_Exists, Exp_Sign_Exists : Boolean := False; + Leading_Sign_Exists : Boolean := False; + Decimal_Pt_Pos, Exp_Symbol_Pos, Exp_Sign_Pos : Positive; + Start_Of_Aft : Positive; + Num_Is_Positive : Boolean; + Char : Character; + + type Format is + (Int, Decimal_Pt_Only, Exponential, No_Decimal_Pt_Exponential); + The_Format : Format; + + Start_Of_Num, End_Of_Num, Start_Of_Exp : Natural := 0; + Fore_Width, Aft_Width, Exp_Width : Natural := 0; + Exp_Str : Exp_String := (others => ' '); + Exp_Val : Integer := 0; + + Fore, Aft, Result : e_Real; + + begin + + -- Handle null strings: + + if X'Length = 0 then + raise E_Format_Error; + end if; + + + -- STEP 1. Strip away leading whitespace and sign. Start_Of_Num is the + -- first non-white space character. If its '-' or '+', then increment + -- Start_Of_Num by 1. + + for I in X'Range loop + if not Is_White_Space (X(I)) then + Start_Of_Num := I; + exit; + end if; + end loop; + + if Start_Of_Num = 0 then -- The string is all white-space, so: + raise E_Format_Error; + end if; + + -- If there is a leading sign, make a note of it, and then bypass it. + + Num_Is_Positive := True; -- No sign means positive + Char := X(Start_Of_Num); + if Is_Sign (Char) then + Start_Of_Num := Start_Of_Num + 1; + Leading_Sign_Exists := True; + if Char = '+' then + Num_Is_Positive := True; + elsif Char = '-' then + Num_Is_Positive := False; + end if; + end if; + + if Leading_Sign_Exists and then Start_Of_Num > X'Last then + raise E_Format_Error; + end if; + -- when we incremented Start_Of_Num, we went beyond X'Last. So only char + -- is the Sign. + + + -- STEP 2. Scan everything beyond the leading sign. End_Of_Num is + -- initialized to 0. Here is where we update it. + + for I in Start_Of_Num..X'Last loop + + Char := X(I); + + if Is_White_Space (Char) then -- we know Start.. points to non-whitespace + End_Of_Num := I-1; + exit; + end if; + + if Is_Decimal_Pt (Char) then + Decimal_Pt_Exists := True; + Decimal_Pt_Pos := I; + No_Of_Decimal_Pts := No_Of_Decimal_Pts + 1; + elsif Is_Exp_Symbol (Char) then + Exp_Symbol_Exists := True; + Exp_Symbol_Pos := I; + No_Of_Exp_Symbols := No_Of_Exp_Symbols + 1; + elsif Is_Sign (Char) then + Exp_Sign_Exists := True; + Exp_Sign_Pos := I; + No_Of_Exp_Signs := No_Of_Exp_Signs + 1; + elsif not Is_Numeral (Char) then + raise E_Format_Error; + end if; + + end loop; + + if End_Of_Num = 0 then -- Reached the end of string w/o detecting whitespace + End_Of_Num := X'Last; + end if; + + -- Do some error checking: + + if No_Of_Decimal_Pts > 1 then + raise E_Format_Error; + end if; + + if No_Of_Exp_Signs > 1 then + raise E_Format_Error; + end if; + + if No_Of_Exp_Symbols > 1 then + raise E_Format_Error; + end if; + + if Decimal_Pt_Exists and Exp_Symbol_Exists then + if Decimal_Pt_Pos >= Exp_Symbol_Pos then + raise E_Format_Error; + end if; + end if; + + -- if there's an 'E' and a '+', then the sign must directly follow the + -- the 'E'. Neither can be at the beginning or end of the num. if the + -- sign exists, then 'E' must exist, but not vice-versa. + if Exp_Sign_Exists then + if Exp_Sign_Pos = End_Of_Num or else Exp_Sign_Pos = Start_Of_Num then + raise E_Format_Error; + end if; + end if; + + if Exp_Symbol_Exists then + if Exp_Symbol_Pos = End_Of_Num or else Exp_Symbol_Pos = Start_Of_Num then + raise E_Format_Error; + end if; + end if; + + if Exp_Sign_Exists and Exp_Symbol_Exists then + if not (Exp_Sign_Pos = Exp_Symbol_Pos + 1) then + raise E_Format_Error; + end if; + end if; + + if Exp_Sign_Exists and not Exp_Symbol_Exists then + raise E_Format_Error; + end if; + + -- Diagnose the format of the number: + + if (not Decimal_Pt_Exists) and (not Exp_Symbol_Exists) then + The_Format := Int; + elsif Decimal_Pt_Exists and (not Exp_Symbol_Exists) then + The_Format := Decimal_Pt_Only; + elsif Decimal_Pt_Exists and Exp_Symbol_Exists then + The_Format := Exponential; + else + The_Format := No_Decimal_Pt_Exponential; + end if; + + -- STEP 3. Do the arithmetic. The string goes like Fore . Aft E Exp, + -- in the most general case. The following is not really optimized + -- for speed. + + case The_Format is + when Int => + + Fore_Width := End_Of_Num - Start_Of_Num + 1; + if Fore_Width = 0 then + raise E_Format_Error; + end if; + + Result := Integer_Value_Of(X (Start_Of_Num..End_Of_Num)); + + when Decimal_Pt_Only => + + Fore_Width := Decimal_Pt_Pos - Start_Of_Num; + Aft_Width := End_Of_Num - Decimal_Pt_Pos; + Start_Of_Aft := Decimal_Pt_Pos + 1; + + if Fore_Width = 0 and Aft_Width = 0 then + raise E_Format_Error; + end if; + -- Notice that Fore or Aft may be 0. The funcs below return + -- 0.0 in that case. + + Fore := Integer_Value_Of (X (Start_Of_Num..Start_Of_Num + Fore_Width - 1)); + Aft := Fractional_Value_Of (X (Start_Of_Aft..Start_Of_Aft + Aft_Width - 1)); + + Result := Fore + Aft; + + when Exponential => + + if Exp_Sign_Exists then + Start_Of_Exp := Exp_Sign_Pos + 1; + else + Start_Of_Exp := Exp_Symbol_Pos + 1; + end if; + + Start_Of_Aft := Decimal_Pt_Pos + 1; + Fore_Width := Decimal_Pt_Pos - Start_Of_Num; + Aft_Width := Exp_Symbol_Pos - Start_Of_Aft; + Exp_Width := End_Of_Num - Start_Of_Exp + 1; + + if Exp_Width = 0 then + raise E_Format_Error; + end if; + if Exp_Width > Integer'Width then + -- E_Integer is derived Integer. Both Integer and E_Integer are much + -- wider than the allowed range of Exponents, so Contraint-Error will + -- be raised in the arithmetic package, not here. + raise E_Format_Error; + end if; + if Fore_Width = 0 and Aft_Width = 0 then + raise E_Format_Error; + end if; + -- Notice that Fore or Aft may be 0. The funcs below return + -- 0.0 in that case. + + Fore := Integer_Value_Of (X (Start_Of_Num..Start_Of_Num + Fore_Width - 1)); + Aft := Fractional_Value_Of (X (Start_Of_Aft..Start_Of_Aft + Aft_Width - 1)); + Exp_Str(1..Exp_Width) := X (Start_Of_Exp..Start_Of_Exp + Exp_Width - 1); + Exp_Val := Integer'Value(Exp_Str); + if Exp_Sign_Exists and then X(Exp_Sign_Pos) = '-' then + Exp_Val := -Exp_Val; + end if; + + Result := (Fore + Aft) * Ten**Exp_Val; + + when No_Decimal_Pt_Exponential => + + -- unusual case. Say there's no Aft, only Fore + + if Exp_Sign_Exists then + Start_Of_Exp := Exp_Sign_Pos + 1; + else + Start_Of_Exp := Exp_Symbol_Pos + 1; + end if; + + Fore_Width := Exp_Symbol_Pos - Start_Of_Num; + Exp_Width := End_Of_Num - Start_Of_Exp + 1; + if Fore_Width = 0 then + raise E_Format_Error; + end if; + if Exp_Width = 0 then + raise E_Format_Error; + end if; + if Exp_Width > Integer'Width then + raise E_Format_Error; + end if; + + Fore := Integer_Value_Of (X (Start_Of_Num..Start_Of_Num + Fore_Width - 1)); + Exp_Str(1..Exp_Width) := X (Start_Of_Exp..Start_Of_Exp + Exp_Width - 1); + Exp_Val := Integer'Value(Exp_Str); + if Exp_Sign_Exists and then X(Exp_Sign_Pos) = '-' then + Exp_Val := -Exp_Val; + end if; + + Result := Fore * Ten**Exp_Val; + + end case; + + -- update the 2 out parameters: + + if Num_Is_Positive then + Y := Result; + else + Y := -Result; + end if; + + Last := End_Of_Num; + + end e_Real_Val; + +end Extended_Real.IO; + diff --git a/arbitrary/extended_real-io.ads b/arbitrary/extended_real-io.ads new file mode 100644 index 0000000..20015e8 --- /dev/null +++ b/arbitrary/extended_real-io.ads @@ -0,0 +1,107 @@ + +generic +package Extended_Real.IO is + + -- E_real Text translation. + + function e_Real_Image + (X : in e_Real; + Aft : in Positive := Positive'Last) + return String; + -- e_Real to Text translation. + -- Minimum length of Aft is 12. If less, it gets set to 12. + -- The number of digits in the result is the minimum of the 3 quantities: + -- Aft+1, Max_No_Of_Digits, and Max_Practical_String_Length. Aft is + -- input by the user: its the number of decimal digits beyond the + -- decimal point. Max_No_Of_Digits is slightly greater than the + -- Desired_Decimal_Digit_Precision, the generic parameter. (see below.) + + + -- Text to E_real translation. + + -- The following Sets define white space. In translation from ASCII to + -- e_Real only white space may separate the numbers in ASCII format. + -- The present setting is standard white space on UNIX workstations and + -- Crays. The parenthesis are used by complex numbers. + + type Set is Array(Character) of Boolean; + pragma Pack (Set); + + Is_White_Space : constant Set := + (' ' | ',' | '(' | ')' => True, others => False); + + Is_Exp_Symbol : constant Set := + ('E' | 'e' | 'D' | 'd' => True, others => False); + + + procedure e_Real_Val + (X : in String; + Y : out e_Real; + Last : out Natural); + -- Accepts many non Ada standard formats of the sort found in ascii output + -- on common machines. Also accepts formats of the sort one is likely + -- to type in at the key board. Accepts the following formats: + -- + -- INTEGER : 1234567 or +1234567 or -1234567 + -- DECIMAL : 12.34567 or -.1234567 or +.1234567 or 1234567. + -- EXPONENTIAL : 1234.567E+002 or .1234567E002 or 123467.E-03 + -- NON_DECIMAL_EXPONENTIAL : -1234567E-003 + -- + -- Notice that + -- 0) Both the 'E' and the '.' are optional. + -- 1) If an 'E' exists, then the '.' may be anywhere to the left of the 'E'. + -- 2) The Leading sign is optional if its '+'. + -- 3) The sign of the exponent is optional if its '+'. + -- 4) The set Is_Exp_Symbol determines which Exp symbols are acceptible. + -- + -- "Last" is the index of the last character of the Number. The purpose + -- is to allow one to read numbers sequentially from a string by inputting + -- string X(Last+1..X'Length) once the value of Last is determined. + -- + -- We leave it up to the extended arithm. package to raise constraint_error + -- when the Exponent is out of range. + + E_Format_Error : Exception; + +private + + -- Parameters that determine the maximum size of the output string: + + No_Of_Decimal_Digits_Per_Chunk : constant := 6; + -- 8 is good if Radix is big enough (see next Assert). + + pragma Assert (10.0**No_Of_Decimal_Digits_Per_Chunk < Radix_Minus_1); + -- The Decimal value of e_Real is actually calculated in + -- Radix 10.0**No_Of_Decimal_Digits_Per_Chunk. i.e. in chunks of 5-8 digits. + + Bits_Per_e_Real : constant := No_Of_Bits_In_Radix * Mantissa'Length; + + Decimal_Digits_Per_e_Real : constant := 1 + (Bits_Per_e_Real*1000 - 1) / 3322; + -- Ceiling (Bits_Per_e_Real / 3.322). + + Chunks_Per_e_Real : constant := + 1 + (Decimal_Digits_Per_e_Real - 1) / No_Of_Decimal_Digits_Per_Chunk; + -- Ceiling (Decimal_Digits_Per_e_Real / No_Of_Decimal_Digits_Per_Chunk) + + Max_No_Of_Digits : constant Positive := + No_Of_Decimal_Digits_Per_Chunk * Chunks_Per_e_Real - 4; + -- + -- Notice: it keeps some of the digits beyond the number supposedly held + -- in the input e_Real (Item). By keeping these extra digits at the end, + -- you reduce the Max error in translating from binary to ascii back to + -- binary again. + -- + -- Subtract 4 because they always seem to be noise anyway. + + pragma Assert (Max_Exponent <= 2 ** (e_Integer'Size - 5)); + -- Just making sure. Doesn't work for excessively large Max_Exponent. + + pragma Assert (e_Integer'Size <= Integer'Size); + -- Essentially requires 32 bit Integer or greater. + + Max_Practical_String_Length : constant Positive := Max_No_Of_Digits + 19; + -- Maximum size of the output strings. + -- Set to whatever you think is appropriate. + -- Strings this size are actually created, so memory must be sufficient. + +end Extended_Real.IO; diff --git a/arbitrary/extended_real.adb b/arbitrary/extended_real.adb new file mode 100644 index 0000000..23b7e83 --- /dev/null +++ b/arbitrary/extended_real.adb @@ -0,0 +1,4268 @@ + +----------------------------------------------------------------------- +-- package body Extended_Real, extended precision floating point arithmetic +-- Copyright (C) 2008-2018 Jonathan S. Parker +-- +-- Permission to use, copy, modify, and/or distribute this software for any +-- purpose with or without fee is hereby granted, provided that the above +-- copyright notice and this permission notice appear in all copies. +-- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-- WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-- MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +-- ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-- WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +-- ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +-- OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +--------------------------------------------------------------------------- + +-- Internally the extended numbers are stored in such a way that the +-- value of e_Real number X is +-- +-- Max +-- X = Radix**Exp * SUM {Radix**(-I) * Digit(I)}. +-- I=0 +-- +-- Externally, the user sees e_Real (via the Exponent, and Fraction +-- attribute functions) as tho' it were normalized. In other words, the +-- value of X is +-- +-- Max +-- X = Radix**Exp_n * SUM {Radix**(-I-1) * Digit(I)} +-- I=0 +-- +-- Exp_n is called the "normalized" exponent. If Exp_n is the normalized exponent +-- then, say, a binary number would be written: +-- +-- 0.111011010001 * 2**(Exp_n). +-- +-- In other words the first binary digit in the mantissa is of power 2**(-1). +-- It is important to know this because the function Real'Exponent(x) returns +-- the *normalized* exponent, and the function Real'Fraction(x) returns +-- x * 2**(-Exp_n) where Exp_n is the normalized exponent. So in the above case, +-- 'Fraction would return 0.111011010001. +-- Also, in normalized form, the first binary digit of the mantissa is always +-- non-zero. + + +package body Extended_Real is + + Disable_Program_Error_Tests : constant Boolean := False; + -- A few assertions and the like. + + ---------------------------------------- + -- Shift_Right_2x_No_of_Bits_in_Radix -- + ---------------------------------------- + + function Shift_Right_2x_No_of_Bits_in_Radix + (Digit : in Digit_Type) + return Digit_Type + is + begin + -- Works in general, but use only for flt types: + --return Digit_Type (Real'Floor (Real (Digit) * Inverse_Radix_Squared)); + return Digit / 2**(2*No_of_Bits_in_Radix); + end Shift_Right_2x_No_of_Bits_in_Radix; + + pragma Inline (Shift_Right_2x_No_of_Bits_in_Radix); + + ------------------------------------- + -- Shift_Right_No_of_Bits_in_Radix -- + ------------------------------------- + + function Shift_Right_No_of_Bits_in_Radix + (Digit : in Digit_Type) + return Digit_Type + is + begin + -- Works in general, but use only for flt types: + --return Digit_Type (Real'Floor (Real (Digit) * Inverse_Radix)); + return Digit / 2**No_of_Bits_in_Radix; + end Shift_Right_No_of_Bits_in_Radix; + + pragma Inline (Shift_Right_No_of_Bits_in_Radix); + + + ----------------- + -- Digit_Floor -- + ----------------- + + -- Used by "/" and Make_Extended: + + function Digit_Floor (X : Real) return Digit_Type is + begin + return Digit_Type (Real'Floor (X)); + end Digit_Floor; + + pragma Assert (Real'Machine_Mantissa > No_Of_Bits_In_Radix); -- ie 53 > 30 + + pragma Inline (Digit_Floor); + + ---------------------------------- + -- Minimum_No_Of_Digits_Allowed -- + ---------------------------------- + + -- Return setting of the constant Min_No_Of_Digits. + -- + function Minimum_No_Of_Digits_Allowed return e_Integer is + begin return Min_No_Of_Digits; + end Minimum_No_Of_Digits_Allowed; + + ---------------------------- + -- Number_Of_Guard_Digits -- + ---------------------------- + + -- Return setting of the constant No_Of_Guard_Digits. + -- + function Number_Of_Guard_Digits return e_Integer is + begin return No_Of_Guard_Digits; + end Number_Of_Guard_Digits; + + +-- SECTION IV. +-- +-- Ada94 attributes. More information on the machine model is given +-- above in the introduction. + + + --------------------------- + -- e_Real_Machine_Rounds -- + --------------------------- + + function e_Real_Machine_Rounds return Boolean is + begin return False; + end e_Real_Machine_Rounds; + + ------------------------------ + -- e_Real_Machine_Overflows -- + ------------------------------ + + function e_Real_Machine_Overflows return Boolean is + begin return False; + end e_Real_Machine_Overflows; + + ------------------------- + -- e_Real_Signed_Zeros -- + ------------------------- + + function e_Real_Signed_Zeros return Boolean is + begin return False; + end e_Real_Signed_Zeros; + + ------------------- + -- e_Real_Denorm -- + ------------------- + + function e_Real_Denorm return Boolean is + begin return False; + end e_Real_Denorm; + + ------------------------- + -- e_Real_Machine_Emax -- + ------------------------- + + function e_Real_Machine_Emax return e_Integer is + begin return Max_Exponent; + end e_Real_Machine_Emax; + + ------------------------- + -- e_Real_Machine_Emin -- + ------------------------- + + function e_Real_Machine_Emin return e_Integer is + begin return Min_Exponent; + end e_Real_Machine_Emin; + + ----------------------------- + -- e_Real_Machine_Mantissa -- + ----------------------------- + + -- Number of digits in machine mantissa. + + function e_Real_Machine_Mantissa + return e_Integer is + begin + return Mantissa'Length; + end e_Real_Machine_Mantissa; + + -------------------------- + -- e_Real_Machine_Radix -- + -------------------------- + + function e_Real_Machine_Radix return Real is + begin return Real_Radix; + end e_Real_Machine_Radix; + + ---------------------------- + -- e_Real_Machine_Epsilon -- + ---------------------------- + + function e_Real_Machine_Epsilon return e_Real is + begin + return e_Real_Model_Epsilon_1; + end e_Real_Machine_Epsilon; + + -------------------------- + -- e_Real_Model_Epsilon -- + -------------------------- + + function e_Real_Model_Epsilon return e_Real is + begin + return e_Real_Model_Epsilon_2; + end e_Real_Model_Epsilon; + + ---------------------------- + -- e_Real_Model_Epsilon_1 -- + ---------------------------- + + -- 1 unit in the larger of the 2 guard digits. + + function e_Real_Model_Epsilon_1 return e_Real is + Result : e_Real; -- equals Zero + begin + Result.Is_Zero := False; + Result.Digit (0) := Digit_One; -- 1 here with Exp=0 => Eps=radix**(-1) + Result.Exp := -(e_Real_Machine_Mantissa-1); + return Result; + end e_Real_Model_Epsilon_1; + + ---------------------------- + -- e_Real_Model_Epsilon_2 -- + ---------------------------- + + -- Guard_Digits = 2 always; assume neither of them is correct. + -- if there's 3 digits of Radix 2^30 then eps is 2^(-30). + -- if there's 4 digits of Radix 2^30 then eps is 2^(-60). + -- if there's 5 digits of Radix 2^30 then eps is 2^(-90) or ~ 10**(-30). + + function e_Real_Model_Epsilon_2 return e_Real is + Result : e_Real; -- equals Zero + begin + Result.Is_Zero := False; + Result.Digit (0) := Digit_One; -- 1 here with Exp=0 => Eps=radix**(-1) + Result.Exp := -(e_Real_Machine_Mantissa-2); + return Result; + end e_Real_Model_Epsilon_2; + + -------------- + -- Exponent -- + -------------- + + -- For every value x of a floating point type T, the normalized exponent + -- of x is defined as follows: + -- - the normalized exponent of zero is (by convention) zero; + -- - for nonzero x, the normalized exponent of x is the unique + -- + -- k-1 k + -- integer k such that T'Machine_Radix <= |x| < T'Machine_Radix . + -- + -- For example, if x = 0.1101011 * 2**1 then k = 1 since + -- 2**0 <= x < 2**1. If X = 0.100000 * 2**1 then 2**0 = x < 2**1. + -- + -- BY CONVENTION, the normalized exponent of 0 is 0. (Internally it ain't.) + + function Exponent (X : e_Real) return e_Integer is + Normalized_Exponent : constant e_Integer := X.Exp + 1; + begin + if X.Is_Zero then + return 0; + else + return Normalized_Exponent; + end if; + -- proper choice because internal form of e_Real is not + -- normalized; internally, Exp is smaller than the normalized exp + -- by 1, because the internal mantissa is larger by a factor + -- of Radix than the normalized mantissa. + end Exponent; + + ------------------ + -- Leading_Part -- + ------------------ + + -- Let k be the normalized exponent of x. The function Leading_Part(x) + -- yields the value + -- + -- k-r k-r + -- - |x/T'Machine_Radix |*T'Machine_Radix , + -- when x is nonnegative and r is positive; + -- k-r k-r + -- - |x/T'Machine_Radix |*T'Machine_Radix , + -- when x is negative and r is positive. + -- + -- Constraint_Error is raised when r is zero or negative. + -- A zero result, which can only occur when x is zero, has + -- the sign of X. + -- Returns the leading digits of X in the range 0..Radix_Digits-1, + -- regardless of sign of X. Other digits are set to 0.0. The exponent + -- is unchanged. Only X = Zero returns Zero. Not sure about infinity. + -- Makes sense that Leading_Part of inf is inf. + -- Notice assume Digit_Index'Last = Ultimate digit. The results are + -- consistant with a dynamic Digit_Index'Last, just a little slower. + -- + function Leading_Part + (X : e_Real; + Radix_Digits : e_Integer) + return e_Real + is + No_Of_Digits : constant e_Integer := Ultimate_Digit + 1; + Result : e_Real := X; + begin + -- By convention: + if Radix_Digits <= 0 then + raise Constraint_Error with "Must have Radix_Digits > 0."; + end if; + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + return X; + end if; + + if Radix_Digits >= No_Of_Digits then + return X; + end if; + + -- So now Radix_Digits < No_Of_Digits which implies that + -- Radix_Digits <= Ultimate_Digit + + for I in Digit_Index range Radix_Digits .. Ultimate_Digit loop + Result.Digit (I) := Digit_Zero; + end loop; + -- The above uses the fact that Digit_Index starts at 0. + + return Result; + + end Leading_Part; + + -------------- + -- Fraction -- + -------------- + + -- -k + -- The function yields the value x*T'Machine_Radix , where + -- k is the normalized exponent of x. A zero result, which + -- can only occur when x is zero, has the sign of X. + -- Not sure about inf, so raise contraint error, because the + -- user may make assumptions about the size of the exponent returned + -- by Exponent. + -- + function Fraction (X : e_Real) return e_Real is + X2 : e_Real := X; + begin + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + raise Constraint_Error with "Cannot take Fraction of inf."; + end if; + + X2.Exp := -1; + -- Proper choice because format of e_Real + -- is not normalized. The effect of the -1 is + -- to shift the internal format down to the normalized format. + + return X2; + + end Fraction; + + ------------- + -- Compose -- + ------------- + + -- S'Compose (X, Exp) = Fraction (X) * Machine_Radix ** Exp + -- + -- e-k + -- Let v be the value X*T'Machine_Radix , where k is the + -- normalized exponent of X. If v is a machine number of + -- the type T, or if |v|GT'Model_Small, the function yields + -- v; otherwise, it yields either one of the machine + -- numbers of the type T adjacent to v. Constraint_Error + -- is optionally raised if v is outside the base range of + -- S. A zero result has the sign of Fraction when S'Signed_ + -- Zeros is True. + -- + function Compose + (Fraction : e_Real; + Exponent : e_Integer) + return e_Real + is + X2 : e_Real := Fraction; + begin + if Fraction.Is_Zero then + return Zero; + end if; + + if Fraction.Is_Infinite then + raise Constraint_Error with "Cannot compose inf."; + end if; + + X2.Exp := Exponent - 1; + -- The minus 1 comes from the Fraction(X) operation. + + if X2.Exp < Min_Exponent or else X2.Exp > Max_Exponent then + raise Constraint_Error with "Exponent out of range in Compose operation."; + end if; + + return X2; + end Compose; + + ------------- + -- Scaling -- + ------------- + + -- S'Scaling (X, Exp) + -- Exp + -- Let v be the value X*T'Machine_Radix . If v is a + -- machine number of the type T, or if |v|GT'Model_Small, + -- the function yields v; otherwise, it yields either one + -- of the machine numbers of the type T adjacent to v. + -- Constraint_Error is optionally raised if v is outside + -- the base range of S. A zero result has the sign of X + -- when S'Signed_Zeros is True. + -- + function Scaling + (X : e_Real; + Adjustment : e_Integer) + return e_Real + is + X2 : e_Real := X; + begin + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + raise Constraint_Error with "Cannot scale inf."; + end if; + + X2.Exp := X.Exp + Adjustment; + + if X2.Exp < Min_Exponent or else X2.Exp > Max_Exponent then + raise Constraint_Error with "Exp out of range in Scaling operation."; + end if; + + return X2; + end Scaling; + + ---------------- + -- Truncation -- + ---------------- + + -- Strip off fraction for both + and - numbers. + -- This sets all digits beyond the decimal point to 0.0. + -- The function yields the value [x] when X > 0.0. ([x] by defn + -- is floor: largest int less than or equal to x.) When x is zero, + -- the result has the sign of X; a zero result otherwise has a + -- positive sign. When X < 0.0, then its [x]+1 unless [x] = x. + -- Notice assume Digit_Index'Last = Ultimate_Digit. + + function Truncation + (X : e_Real) + return e_Real + is + Result : e_Real; + First_Zeroed_Out_Digit : Digit_Index; + No_Of_Digits : constant e_Integer := Ultimate_Digit + 1; + No_Of_Digits_To_Keep : constant e_Integer := X.Exp + 1; + -- Normalized Exponent of X. Remember, if X.Exp = 0,then X has one + -- non-zero digit that's >= 1.0. So (Exp + 1)=1 means keep 1 digit + begin + + if X.Is_Zero then + return Zero; + end if; + + if No_Of_Digits_To_Keep < 1 then + return Zero; + end if; + -- According to the internal format of X, X.Exp = 0 means X >= 1.0. + -- If X.Exp < 0 then X < 1.0. + + if X.Is_Infinite then + return X; + --Print_Text ("Cannot truncate inf."); + --raise Constraint_Error; + end if; + + if No_Of_Digits_To_Keep >= No_Of_Digits then + return X; + end if; + + -- So now No_Of_Digits_To_keep < No_Of_Digits which implies that + -- No_Of_Digits_To_keep <= Ultimate_Digit + -- Remember, Digit_Index starts at 0, and is a subtype of e_Integer. + + Result := X; + First_Zeroed_Out_Digit := Digit_Index'First + No_Of_Digits_To_keep; + + for I in First_Zeroed_Out_Digit .. Ultimate_Digit loop + Result.Digit (I) := Digit_Zero; + end loop; + + return Result; + + end Truncation; + + ------------- + -- Ceiling -- + ------------- + + -- Let [x] = floor(x). + -- Function yields the value [x]+1, unless [x] = x, in which case, + -- function returns x. When x is zero, the + -- result has the sign of x; a zero result otherwise has a + -- negative sign when S'Signed_Zeros is True. + + function Ceiling (X : e_Real) + return e_Real + is + Result : e_Real; + Lowest_Order_Digit : e_Integer; + begin + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + return X; + end if; + + -- Step 1. + -- Special case, X.Exp < 0 so X < 1.0. Have to round 0.0 up to 1.0. + -- We know X /= 0.0. if X.Exp < 0, then know |X| < 1.0. If + -- X > 0.0, then result must be 1.0; if X < 0.0 then result + -- is 0.0. So can save some time: + if X.Exp < 0 then + if X.Is_Positive then + return One; + else + return Zero; + end if; + end if; + + -- Step 2. + -- Now know that X.Exp >= 0. Have to do some work: + + Result := Truncation (X); + + if not X.Is_Positive then -- we're done by defn: Ceiling = Trunc. + return Result; + end if; + + -- Now know that X is positive. Must add one if Trunc(x) /= x. + -- We also know that Result.Exp >= 0, by defn of Trunc. + Lowest_Order_Digit := Result.Exp; + + if (Lowest_Order_Digit <= Ultimate_Digit) and then Result < X then + if Result.Digit (Lowest_Order_Digit) < Digit_Radix_Minus_1 then + Result.Digit (Lowest_Order_Digit) + := Result.Digit (Lowest_Order_Digit) + Digit_One; + else + Result := Result - One; + end if; + end if; + + return Result; + + end Ceiling; + + ----------- + -- Floor -- + ----------- + + -- Function yields the value [x]. ([x] by defn is floor: largest int + -- less than or equal to x.) This equals trunc(x) when x > 0.0, + -- and = trunc(x)-1, when X < 0.0, unless Trunc(x) = x. When x = 0.0, + -- result has the sign of x; a zero result otherwise has a + -- negative sign when S'Signed_Zeros is True. + + function Floor (X : e_Real) return e_Real is + Result : e_Real; + Lowest_Order_Digit : e_Integer; + begin + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + return X; + end if; + + -- Step 1. + -- Special case, X.Exp < 0. + -- Know X /= 0.0. if X.Exp < 0, then know |X| < 1.0. If then + -- X > 0.0, then result must be 0.0; if X < 0.0 then result + -- is -1.0. So can save some time: + if X.Exp < 0 then + if X.Is_Positive then + return Zero; + else + return -One; + end if; + end if; + + -- Step 2. + -- Now know that X.Exp >= 0. Have to do some work: + + Result := Truncation (X); + + if X.Is_Positive then -- we're done by defn: Floor = Trunc. + return Result; + end if; + + -- Now know that X is negative. Must subtract one if Trunc(x) > x. + -- We also know that Result.Exp >= 0, by defn of Trunc, unless trunc + -- returned Zero. (But it didn't cause X.Exp started out >= 0). + + Lowest_Order_Digit := Result.Exp; + + if (Lowest_Order_Digit <= Ultimate_Digit) and then Result > X then + if Result.Digit (Lowest_Order_Digit) > Digit_Zero then + Result.Digit (Lowest_Order_Digit) + := Result.Digit (Lowest_Order_Digit) - Digit_One; + else + Result := Result - One; + end if; + end if; + + return Result; + + end Floor; + + ------------------------------------- + -- Round_Away_Smallest_Guard_Digit -- + ------------------------------------- + + -- Round away Digit_Index'Last + + function Round_Away_Smallest_Guard_Digit (X : e_Real) return e_Real is + Result : e_Real := X; + Penultimate : constant Digit_Index := Digit_Index'Last-1; + begin + + if X.Digit(Digit_Index'Last) <= Half_Radix then + Result.Digit (Digit_Index'Last) := Digit_Zero; + else + -- X is not Zero. + Result.Digit (Digit_Index'Last) := Digit_Zero; + if X.Digit(Penultimate) < Digit_Radix_Minus_1 then + Result.Digit(Penultimate) := Result.Digit(Penultimate) + Digit_One; + else + if Result.Is_Positive then + Result := Result + e_Real_Model_Epsilon_1; --1 unit in penultimate digit. + else + Result := Result - e_Real_Model_Epsilon_1; + end if; + end if; + end if; + + return Result; + + end Round_Away_Smallest_Guard_Digit; + + ------------- + -- Machine -- + ------------- + + -- Rounds away smallest of the 2 guard digits of X. + + function Machine (X : e_Real) return e_Real is + Y : e_Real; + begin + Y := Round_Away_Smallest_Guard_Digit (X); + return Y; + end Machine; + + --------------- + -- Copy_Sign -- + --------------- + + -- S'Copy_Sign denotes a function with the following specification: + -- function S'Copy_Sign (Value, Sign : T) return T + -- If the value of Value is nonzero, the function yields a + -- result whose magnitude is that of Value and whose sign + -- is that of Sign; otherwise, it yields the value zero. + -- Constraint_Error is optionally raised if the result is + -- outside the base range of S. A zero result has the sign + -- of Sign when S'Signed_Zeros is True. + -- + function Copy_Sign (Value, Sign : e_Real) return e_Real is + Result : e_Real := Value; + begin + if Value.Is_Zero then + return Zero; + end if; + + -- following holds even if Value is inf: + Result.Is_Positive := Sign.Is_Positive; + return Result; + + end Copy_Sign; + + -------------- + -- Adjacent -- + -------------- + + -- If t=x, the function yields x; otherwise, it yields the + -- machine number of the type T adjacent to x in the + -- direction of t, if that machine number exists. If the + -- result would be outside the base range of S, Constraint_ + -- Error is raised. When T'Signed_Zeros is True, a zero + -- result has the sign of X. When t is zero, its sign has + -- no bearing on the result. + -- + --function Adjacent (X, Towards : e_Real) return e_Real is + --begin + --end Adjacent; + + -------------- + -- Rounding -- + -------------- + + -- The function yields the integral value nearest to x, + -- rounding away from zero if x lies exactly halfway + -- between two integers. A zero result has the sign of X + -- when S'Signed_Zeros is True. + -- + function Rounding (X : e_Real) return e_Real is + Result : e_Real; + Half : constant e_Real := +0.5; + Del : e_Real; + begin + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + return X; + end if; + + Result := Truncation (X); + Del := Abs (X - Result); + + if not (Del < Half) then + if X.Is_Positive then + Result := Result + One; -- because Trunc chopped it toward zero + else + Result := Result - One; + end if; + end if; + + return Result; + + end Rounding; + + ----------------------- + -- Unbiased_Rounding -- + ----------------------- + + -- The function yields the integral value nearest to x, + -- rounding toward the even integer if x lies exactly + -- halfway between two integers. A zero result has the + -- sign of X when S'Signed_Zeros is True. + -- + function Unbiased_Rounding (X : e_Real) return e_Real is + Result : e_Real; + Half : constant e_Real := +0.5; + Del : e_Real; + Least_Significant_Digit : e_Integer; + begin + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + return X; + end if; + + Result := Truncation (X); + Del := Abs (X - Result); + + --if Del < Half then -- result is unmodified + + if Del > Half then + if X.Is_Positive then + Result := Result + One; -- because Trunc chopped it toward zero + else + Result := Result - One; + end if; + end if; + + if Are_Equal (Del, Half) then + + -- Must find out if Result (= Truncation (X) = int) is even or not. + -- If its not even, then add (or subtract) One as above. + -- To find out, must examine lowest order bit of least significant + -- digit. + + Least_Significant_Digit := Exponent (Result) - 1; + -- If Least_Significant_Digit = 0 then Exponent (Result) = +1 cause + -- it returns the normalized Exp. + + if (Least_Significant_Digit REM 2 = 1) then -- its odd + if X.Is_Positive then + Result := Result + One; -- because Trunc chopped it toward zero + else + Result := Result - One; + end if; + end if; + + end if; + + return Result; + + end Unbiased_Rounding; + + --------------- + -- Remainder -- + --------------- + + -- Code, algorithm and comments from Ken Dritz. (Ada 9X Language study note.) + -- Modified to use e_Real's, so errors my own. + -- It does seem to work well .. much more accurate than the simple alternatives + -- I tried for Sin, Cos arguments, (but that is all the testing I've done.) + -- For nonzero y, let v be the value x-n*y, where n is the + -- integer nearest to the exact value of x/y; if + -- |n-x/y|=1/2, then n is chosen to be even. If v is a + -- machine number of the type T, the function yields v; + -- otherwise, it yields zero. Constraint_Error is raised + -- if y is zero. A zero result always has a positive sign. + + function Remainder + (X, Y : e_Real) + return e_Real + is + Residue, Temp, Reducer, Reducer_Head, Reducer_Tail, N : e_Real; + Abs_Y : constant e_Real := Abs (Y); + -- See comments above about the possibility of overflow here on + -- radix-complement machines. + Scaled_Up, Negated : Boolean; + CONST1 : constant e_Real + := Scaling (One, e_Real_Machine_Emin + e_Real_Machine_Mantissa - 2); + CONST2 : constant e_Real + := Scaling (One, e_Real_Machine_Emin + e_Real_Machine_Mantissa - 1); + begin + if Y.Is_Zero then + raise Constraint_Error with "Must have Y /= 0 in function Remainder."; + end if; + + Residue := X; + Negated := False; + + loop + + -- This loop is executed at most once more than the difference between + -- the exponents of X and Y. + + if Copy_Sign (One, Residue) < Zero then + -- The following two statements are to be executed when the sign of + -- Residue is negative, that is, when Residue is less than zero or is + -- a negative zero. Simply comparing Residue to zero is not good + -- enough when T'Signed_Zeros is True. An actual implementation might + -- instead examine the sign bit. In an implementation in which + -- T'Signed_Zeros is False, the condition above can be simplified to + -- Residue < 0.0. + Residue := -Residue; + Negated := not Negated; + end if; + -- At this point, Residue has a positive sign, and Negated records the + -- parity of sign flippings that Residue has undergone. + + exit when Residue < Abs_Y; + + -- At this point, Residue is greater than or equal to Abs_Y. Its + -- exponent is the same as, or greater than, that of Abs_Y. + + Reducer := Compose (Abs_Y, Exponent(Residue)); + + -- Reducer now has the fraction of Abs_Y and the exponent of Residue. + -- Thus, it is a (possibly large) exact multiple of Abs_Y. + + if Reducer > Residue then + + -- Reducer is greater than Residue only when + -- T'Fraction(Abs_Y) is greater than T'Fraction(Residue). + -- Reduce its exponent by one. + Reducer := Scaling(Reducer, -1); + -- It can be proved that underflow cannot occur in the above scaling. + + -- At this point, 1.0 < Residue/Reducer < e_Real(T'Machine_Radix). + N := Unbiased_Rounding (Residue / Reducer); + -- Thus, 1.0 <= N <= e_Real (Machine_Radix). + + -- Now basically want to subtract N*Reducer from Residue exactly, + -- but the product may have one too many digits to be represented + -- exactly. That occurs when the exponent of N*Reducer exceeds that + -- of Reducer; in the present case, that can happen for N as small as + -- two. + + -- The following almost works: + -- Reducer_Head := T'Leading_Part(Reducer, 1); + -- Reducer_Tail := Reducer - Reducer_Head; + -- Residue := (Residue - N*Reducer_Head) - N*Reducer_Tail; + -- It fails only when Reducer is so small that underflow occurs when + -- subtracting Reducer_Head from it. Note that this is only a problem + -- when T'Denorm is False; when T'Denorm is True, the above suffices. + + if Reducer < CONST1 then + -- Reducer is near the underflow threshold, and of course Residue + -- is near Reducer. Scale both of them up by a sufficient amount + -- to prevent underflow when subtracting Reducer_Head from Reducer; + -- scale back down later. + Residue := Scaling(Residue, e_Real_Machine_Mantissa); + Reducer := Scaling(Reducer, e_Real_Machine_Mantissa); + Scaled_Up := True; + else + Scaled_Up := False; + end if; + + Reducer_Head := Leading_Part (Reducer, 1); + Reducer_Tail := Reducer - Reducer_Head; + -- Because cancellation occurs in the above subtraction, the result is + -- exact. + + -- Now the subtraction can be performed in two stages. + Residue := (Residue - N*Reducer_Head) - N*Reducer_Tail; + -- In the present case, although N*Reducer can have too many digits to + -- be representable, it cannot overflow. + + if Scaled_Up then + -- Scale back down. Note that underflow can occur in rare + -- circumstances here (i.e., when T'Denorm is False and the + -- remainder is less than the underflow threshold, which requires + -- that Y be near the underflow threshold and X be near a multiple + -- of Y). The specification calls for zero to be returned, but + -- T'Scaling might not return a zero when it underflows. If it + -- does, and the zero is properly signed, the if-then-else below + -- can be replaced by the else part (or by the equivalent + -- multiplication or division, if it yields a properly signed + -- zero on underflow). + if Abs (Residue) < CONST2 then + Residue := Copy_Sign (Zero, Residue); + else + Residue := Scaling (Residue, -E_Real_Machine_Mantissa); + end if; + end if; + + else + + -- This case is for Reducer <= Residue. + + -- At this point, 1.0 <= Residue/Reducer < e_Real(T'Machine_Radix). + + N := Unbiased_Rounding (Residue / Reducer); + + -- Thus, 1.0 <= N <= e_Real(T'Machine_Radix). + + -- Here, the technique for subtracting N*Reducer exactly from Residue + -- is different. In the present case, N*Reducer may have one too many + -- digits to be represented exactly only when the rounding was upward, + -- hence (N-1.0)*Reducer must necessarily be representable. Also, + -- N*Reducer could even overflow, but (N-1.0)*Reducer cannot. + + if N > One then + -- The optimization represented by the above test is probably + -- worthwhile. + Residue := Residue - (N - One) * Reducer; + end if; + + Residue := Residue - Reducer; + -- The above subtraction can underflow when T'Denorm is False, in + -- which case the desired result is zero. It is assumed that when + -- subtraction underflows, it underflows to zero. + + end if; + + -- Residue may now be negative, but its absolute value is less than or + -- equal to half of Reducer. + + end loop; + + -- At this point, Residue has a positive sign and a magnitude less than that + -- of Abs_Y. If Residue is greater than half of Abs_Y, correct it by + -- subtracting Abs_Y one more time. We do this without computing half of + -- Abs_Y, which could underflow or be inexact. + + Temp := Residue - Abs_Y; + -- The above can underflow. It is assumed here that underflow produces a + -- zero result. Note that Temp now has a negative sign (a zero produced on + -- underflow is presumably a negative zero when T'Signed_Zeros is True). + + if Temp + Residue > Zero then + -- True only if Residue is greater than half of Abs_Y, or if the + -- computation of Temp underflowed to zero. Note that the condition + -- might, on some machines, be more efficiently evaluated as + -- -Temp < Residue, or even as abs Temp < Residue. + Residue := Temp; + end if; + + -- The above step might even be slightly more efficiently evaluated as + -- follows (here Temp is the negation of the value computed above and + -- hence always has a positive sign): + -- Temp := Abs_Y - Residue; + -- if Temp < Residue then + -- Residue := -Temp; + -- end if; + -- This version, which is clearly equivalent but harder to motivate, is + -- used in the binary case at the end of this LSN. + + -- The desired remainder is now Residue, with a possible sign flip + -- (i.e., if Negated is True at this point). + + if Negated then + return -Residue; + else + return Residue; + end if; + + end Remainder; + + +-- SECTION III. +-- +-- Routines for conversion from Real to e_Real and back again. + + + No_Of_Usable_Bits_In_Real : constant := 52; + + pragma Assert (Real'Machine_Mantissa >= No_Of_Usable_Bits_In_Real); + + No_Of_e_Digits_Per_Real : constant + := (No_Of_Usable_Bits_In_Real-1) / No_Of_Bits_In_Radix + 1; + -- Used only by: Make_Extended to convert a Real object into an e_Real. + -- Equals: Ceiling (No_Of_Usable_Bits_In_Digit / No_Of_Bits_In_Radix) + + + ------------------- + -- Make_Extended -- + ------------------- + + function Make_Extended + (X : Real) + return e_Real + is + Result : e_Real; -- Initialized to zero (important). + Abs_X : Real := Abs (X); + Exponent : e_Integer := 0; + begin + + if not X'Valid then + raise Constraint_Error with "Failure in routine Make_Real: Input is inf or NaN."; + end if; + + if X = 0.0 then + return Zero; + elsif X = -0.0 then + return Zero; + else + Result.Is_Zero := False; + end if; + + -- Abs_X = Abs(X): + + if Abs_X > X then + Result.Is_Positive := False; + else + Result.Is_Positive := True; + end if; + + -- Get power of 2 exponent and 1st digit. This is not usually in an + -- inner loop, so do it the brute force way. If Abs(X) < 1.0 + -- then keep multiplying by Radix until 1.0 <= X <= Radix-1. + -- Strip off the fraction to get the first digit. + + if Abs_X < Real_One then -- mult. by Radix until >= 1.0: + + for I in e_Integer loop + Abs_X := Abs_X * Real_Radix; + Exponent := Exponent - 1; + if Abs_X >= Real_One then + exit; + end if; + end loop; + -- Abs_X is now 1.0 <= Abs_X < Radix. When strip off the fractional + -- part with Real_Floor, then Abs_X will be in 1.0..Radix-1. + + elsif Abs_X >= Real_Radix then -- divide by Radix until just right: + + for I in e_Integer loop + Abs_X := Abs_X * Inverse_Radix; + Exponent := Exponent + 1; + if Abs_X < Real_Radix then + exit; + end if; + end loop; + -- Abs_X is now 1.0 <= Abs_X < Radix. When strip off the fractional + -- part with Real_Floor, then Abs_X will be in 1.0..Radix-1. + + else -- Abs_X is in desired range: + + Exponent := 0; + + end if; + + Result.Exp := Exponent; + + -- Now we've got Result.Exp, Result.Is_Positive, Result.Is_Zero all set. + -- Is_Infinite is initialized to False. Next get the first digit: + + Result.Digit(0) := Digit_Floor (Abs_X); + Abs_X := Abs_X - Real (Result.Digit(0)); + + -- Now just the Abs_X < 1.0 fraction remains in Abs_X. + -- Optimization: if Abs_X = 0.0 then return early. Result is + -- already initialized to zero...no need to get next digits. + + if Abs_X = 0.0 then + return Result; + end if; + + -- Get subsequent digits. These digits are in the range + -- 0.0 <= X <= Radix-1. (Run the loop longer by 1 for safety.) + + for I in Digit_Index range Digit_Index'First+1..No_Of_e_Digits_Per_Real loop + Abs_X := Abs_X * Real_Radix; + Result.Digit(I) := Digit_Floor (Abs_X); + Abs_X := Abs_X - Real (Result.Digit(I)); + end loop; + + if Abs_X > Real_One then + raise Constraint_Error with "Error in Make_Extended. Probably bad input."; + end if; + + return Result; + + end Make_Extended; + + --------- + -- "+" -- + --------- + + -- Only works in range of Real (15 digits usually). + -- So X = 2**62 raises Constraint_Error if Real'Digits = 15. + + function "+" (X : Integer) return e_Real + is + X_Real : constant Real := Real (X); + begin + if Abs X_Real > 2.0**(Real'Machine_Mantissa-1) then + raise Constraint_Error with "Can't make extended. Argument too large."; + end if; + return Make_Extended (X_Real); + end "+"; + + ------------------ + -- Make_e_Digit -- + ------------------ + + function Make_e_Digit + (X : Real) + return e_Digit + is + Ext_Result : e_Real; -- Initialized to zero. + Result : e_Digit; -- Initialized to zero. + begin + + Ext_Result := Make_Extended (X); + + if Ext_Result.Digit(1) /= Digit_Zero or Ext_Result.Digit(2) /= Digit_Zero then + raise Constraint_Error with "Error in Make_e_Digit: arg not in range."; + end if; + + if Ext_Result.Is_Zero then + return Result; + end if; + + Result.Exp := Ext_Result.Exp; + Result.Is_Positive := Ext_Result.Is_Positive; + Result.Is_Zero := Ext_Result.Is_Zero; + Result.Digit := Ext_Result.Digit(0); + + return Result; + + end Make_e_Digit; + + --------- + -- "-" -- + --------- + + -- Same as Make_Extended, but changes the sign. + + function "-" (X : Real) return e_Real is + Z : e_Real := Make_Extended (X); + begin + Z.Is_Positive := not Z.Is_Positive; + + if Z.Is_Zero then -- get sign right again + Z.Is_Positive := True; + end if; + return Z; + end; + + --------------- + -- Make_Real -- + --------------- + + -- Most of the arithmetic here is between powers of the machine_radix. + -- Results should be exact out to the last place of Real. But + -- can't be guaranteed. + + function Make_Real (X : e_Real) + return Real + is + Result : Real := Real_Zero; + Mantissa : Real := Real_Zero; + begin + + if X.Is_Zero then + return Real_Zero; + end if; + + if X.Is_Infinite then + raise Constraint_Error with "Failure in routine Make_Real: Number is infinite."; + end if; + + -- Here is the general case. It produces a Mantissa that is a Factor + -- of Radix larger than the Normalized Fraction that appears in + -- Value_Of_Real = Normalized Fraction * Radix**Normalized_Exponent. + -- + --Power_Of_Radix := 1.0; + --Result := Real (X.Digit(0)); + --for I in Digit_Index range 1..No_Of_e_Digits_Per_Real-1 loop + --Power_Of_Radix := Power_Of_Radix * Inverse_Radix; + --Result := Result + Power_Of_Radix * Real (X.Digit(I)); + --end loop; + -- + -- The following is the usual case. This is the inner product form. + -- This sometimes gives the best results because it is more often + -- done in the machine's extended arithmetic, if that's available. + -- The following produces a Mantissa that is a Factor + -- of Radix larger than the Normalized_Fraction that appears in + -- Value_Of_Real = Normalized_Fraction * Radix**Normalized_Exponent. + -- Recall that X.Exp is less than the Normalized exponents by 1. + + Mantissa := Real (X.Digit(0)) + + Real (X.Digit(1)) * Inverse_Radix + + Real (X.Digit(2)) * Inverse_Radix_Squared + + Real (X.Digit(3)) * Inverse_Radix_Squared * Inverse_Radix + + Real (X.Digit(4)) * Inverse_Radix_Squared * Inverse_Radix_Squared; + + + -- Possible overflows are left to the external float package to raise. + -- underflows to Zero are done explicitly. + + if Integer(X.Exp) * No_Of_Bits_In_Radix < Real'Machine_Emin then + Result := 0.0; + else + Result := Mantissa * Real_Radix**Integer(X.Exp); + end if; + + -- Here is the Ada94 way: + -- + --Real_Exponent_Shift := No_Of_Bits_In_Radix * Integer (X.Exp - 1.0); + --Result := Real_Scaling (Mantissa, Real_Exponent_Shift); + + -- The scaling function multiplies + -- Mantissa by Real'Machine_Radix ** Real_Exponent_Shift. + + -- At present leave it up to Real floating point to raise the + -- constraint errors if they exist. + + if X.Is_Positive then + null; + else + Result := -Result; + end if; + + return Result; + + end Make_Real; + + +-- SECTION II. +-- +-- Standard arithmetic operators. + + + --------- + -- Abs -- + --------- + + function "Abs" (X : e_Real) return e_Real is + X2 : e_Real := X; + begin + X2.Is_Positive := True; + return X2; + end "Abs"; + + --------- + -- "-" -- + --------- + + function "-" (X : e_Real) return e_Real is + X2 : e_Real := X; + begin + + X2.Is_Positive := not X2.Is_Positive; + + if X2.Is_Zero then + X2 := Zero; + end if; + return X2; + end "-"; + + ------------------- + -- Abs_Is_Lesser -- + ------------------- + + -- Is Abs(X) less than Abs(Y)? + -- This performs the comparison for all digits: 0..Digit_Index'Last. + -- The user is epected to call "Round" first if he wants the comparison + -- in the range 0..Last_Correct_Digit == 0..Digit_Index'Last-No_Of_Guard_Digits. + + function Abs_Is_Lesser (X, Y : e_Real) + return Boolean + is + begin + + -- Step 0. Handle the infinities. inf < inf raises c.e. but not here. + + if X.Is_Infinite and not Y.Is_Infinite then + return False; -- |X| > |Y| + elsif not X.Is_Infinite and Y.Is_Infinite then + return True; -- |X| < |Y| + end if; + + -- Step 0b. Handle the Zeros. Another case where the Exp does not tell + -- us the magnitude of the number. + + if X.Is_Zero and not Y.Is_Zero then + return True; -- |X| < |Y| + elsif not X.Is_Zero and Y.Is_Zero then + return False; -- |X| > |Y| + elsif X.Is_Zero and Y.Is_Zero then + return False; -- |X| = |Y| + end if; + + -- Step 1. Find the lesser number, Exponent-wise. Must have the + -- number normalized or the following is false. Also must have filtered + -- out the special cases in which Exp is unrelated to the size of + -- numbers: Zero and Infinity. + + if X.Exp < Y.Exp then + return True; + elsif X.Exp > Y.Exp then + return False; + end if; + + -- Step 2. If got this far, then the Exponents are equal. Find the + -- the first unequal digit. The following makes use of the fact that + -- the digits are essentially INTEGER values (all zeros beyond the + -- the decimal point.) + + for I in Digit_Index'First .. Digit_Index'Last loop + if X.Digit(I) < Y.Digit(I) then + return True; + elsif X.Digit(I) > Y.Digit(I) then + return False; + end if; -- if got this far, then the digits are equal + -- so continue on to the next digit and try again. + end loop; + + -- If got this far, then the numbers are equal. + return False; + + end Abs_Is_Lesser; + + -------------------- + -- Abs_Is_Greater -- + -------------------- + + -- Is Abs(X) greater than Abs(Y)? + + function Abs_Is_Greater (X, Y : e_Real) return Boolean is + begin + -- Step 0. Handle the infinities. + + if X.Is_Infinite and not Y.Is_Infinite then + return True; -- |X| > |Y| + elsif not X.Is_Infinite and Y.Is_Infinite then + return False; -- |Y| > |X| + end if; + + -- Step 0b. Handle the Zeros. Another case where the Exp does not tell + -- us the magnitude of the number. + + if X.Is_Zero and not Y.Is_Zero then + return False; -- |X| < |Y| + elsif not X.Is_Zero and Y.Is_Zero then + return True; -- |X| > |Y| + elsif X.Is_Zero and Y.Is_Zero then + return False; -- |X| = |Y| + end if; + + -- Step 1b. Find the larger number, Exponent-wise. Must have the + -- number normalized or the following is false. + + if X.Exp > Y.Exp then + return True; + elsif X.Exp < Y.Exp then + return False; + end if; + + -- Step 2. If got this far, then the Exponents are equal. Find the + -- the first unequal digit. The following makes use of the fact that + -- the digits are essentially INTEGER valued (all zeros beyond the + -- the decimal point.) + + for I in Digit_Index'First .. Digit_Index'Last loop + if X.Digit(I) > Y.Digit(I) then + return True; + elsif X.Digit(I) < Y.Digit(I) then + return False; + end if; -- if got this far, then the digits are equal + -- so continue on to the next digit and try again. + end loop; + + -- If got this far, then the numbers are equal up to Digit_Index'Last. + return False; + + end Abs_Is_Greater; + + --------------- + -- Are_Equal -- + --------------- + + -- X equals Y? Checks all digits except the digits beyond Digit_Index'Last. + -- No rounding is performed. + -- Can use this routine recognize Positive_Infinity and Zero. + + function Are_Equal (X, Y : e_Real) + return Boolean + is + begin + -- both zero, go home early: + + if X.Is_Zero AND Y.Is_Zero then + return True; + end if; + + -- one is zero and the other isn't go home early: + + if X.Is_Zero XOR Y.Is_Zero then + return False; + end if; + + -- Check signs. Return False if they have different signs. + -- We've already checked for Zero's. + + if X.Is_Positive XOR Y.Is_Positive then + return False; + end if; + + -- both infinite but have different signs, then + -- the above step already returned false. + -- Make inf = inf so one can use this functions to recognize inf. + -- Another reasonable option would be to make it false. + + if X.Is_Infinite AND Y.Is_Infinite then + return True; + end if; + + -- One is infinite, the other not: + + if X.Is_Infinite XOR Y.Is_Infinite then + return False; + end if; + + -- ANDing and XORing Is_Zero and Is_Infinite now know + -- that the neither of the numbers is Zero or Infinite. + -- Check equality, Exponent-wise. Must have both + -- numbers normalized or the following doesn't work. Remember that the + -- the only unnormalized nums are Zero, and the 2 infinities. If got + -- this far then neither X nor Y is one of those three. + + if X.Exp /= Y.Exp then + return False; + end if; + + -- got this far, then the Exponents are equal. Find the + -- the first unequal digit. Makes use of the fact that the digits are + -- essentially integer valued. + + for I in Digit_Index loop + if X.Digit(I) /= Y.Digit(I) then + return False; + end if; -- if got this far, then the digits are equal + -- so continue onto the next digit and try again. + end loop; + + --If got this far, then digits, exponent, and sign are equal. + return True; + + end Are_Equal; + + ------------------- + -- Are_Not_Equal -- + ------------------- + + function Are_Not_Equal (X, Y : e_Real) return Boolean is + begin + return NOT Are_Equal (X, Y); + end Are_Not_Equal; + + --------- + -- ">" -- + --------- + + -- Is X > Y? + function ">" (X, Y : e_Real) return Boolean is + begin + + + -- Step 0. Check Zeros. + + if X.Is_Zero AND Y.Is_Zero then + return False; + end if; + + + -- Step 0b. Need some optimizations for the common case in which one + -- attempts to determine Positivity by X > Zero or signs by Zero > Y. + -- The following lets infinities through for the non-zero part. + + if (not X.Is_Zero) AND Y.Is_Zero then + if X.Is_Positive then + return True; -- X is pos. but not zero, then (X > Zero). + else + return False; -- X is neg. but not zero, then not (X > Zero). + end if; + end if; + + if X.Is_Zero AND (not Y.Is_Zero) then + if Y.Is_Positive then + return False; -- Y is pos. but not zero, then not (Zero > Y). + else + return True; -- Y is neg. but not zero, then (Zero > Y). + end if; + end if; + + -- Step 1. Now do things more systematically. + -- Check signs. Notice that these give us efficient way to + -- check sign of a number. If X is negative, this is fast because + -- Zero is classified as positive. So go home early if: + + if X.Is_Positive and not Y.Is_Positive then + return True; + elsif not X.Is_Positive and Y.Is_Positive then + return False; + end if; + + -- Step 1b. Now they are either both positive or both negative. + -- If they are both inf, then raise ce, since don't know: + + if X.Is_Infinite AND Y.Is_Infinite then + raise Constraint_Error with "Constraint_Error in routine >. Arguments are inf."; + end if; + + -- Step 2. Now they are either both positive or both negative. + -- If they are both neg. return true if Abs X < Abs Y. + + if X.Is_Positive and Y.Is_Positive then + return Abs_Is_Greater (X, Y); -- Abs X > Abs Y + else + return Abs_Is_Lesser (X, Y); -- Abs X < Abs Y + end if; + + end ">"; + + + --------- + -- "<" -- + --------- + + function "<" (X, Y : e_Real) return Boolean is + begin + + + -- Step 0. Check Zeros. + + if X.Is_Zero AND Y.Is_Zero then + return False; + end if; + + + -- Step 0b. Need some optimizations for the common case in which one + -- attempts to determine signs by X < Zero or positivity by Zero < Y. + -- The following lets infinities through for the non-zero part. + + if (not X.Is_Zero) AND Y.Is_Zero then + if X.Is_Positive then + return False; -- X is pos. but not zero, then not (X < Zero). + else + return True; -- X is neg. but not zero, then (X < Zero). + end if; + end if; + + if X.Is_Zero AND (not Y.Is_Zero) then + if Y.Is_Positive then + return True; -- Y is pos. but not zero, then (Zero < Y). + else + return False; -- Y is neg. but not zero, then not (Zero < Y). + end if; + end if; + + + -- Step 1. Now do things more sytematically. + -- Check signs. Notice that these give us efficient way to + -- check sign of a number. If X is negative, this is fast because + -- Zero is classified as positive. (If want to find if its Pos., use + -- not (X < Zero). Since they aren't both 0 go home early if: + + if X.Is_Positive and not Y.Is_Positive then + return False; + elsif not X.Is_Positive and Y.Is_Positive then + return True; + end if; + + + -- Step 1b. Now they are either both positive or both negative. + -- If they are both inf, then raise ce: + + if X.Is_Infinite AND Y.Is_Infinite then + raise Constraint_Error with "Error in routine <. Arguments are inf."; + end if; + + + -- Step 2. Now they are either both positive or both negative. + -- If they are both neg. return true if Abs X > Abs Y. + + if X.Is_Positive and Y.Is_Positive then + return Abs_Is_Lesser (X, Y); -- Abs X < Abs Y + else + return Abs_Is_Greater (X, Y); -- Abs X > Abs Y + end if; + + end "<"; + + ---------- + -- ">=" -- + ---------- + + function ">=" + (X, Y : e_Real) + return Boolean + is + begin + return (not (X < Y)); + end ">="; + + ---------- + -- "<=" -- + ---------- + + function "<=" + (X, Y : e_Real) + return Boolean + is + begin + return (not (X > Y)); + end "<="; + + ---------- + -- Add -- + ---------- + + -- Add the numbers. The individual digits may overflow the 0..Radix-1 range + -- but not the range of the base floating point number used to represent the + -- digit. Carrying is done later. + + function Add + (Larger : e_Real; + Smaller : e_Real; + Digit_Shift : Digit_Index) + return e_Real + is + Z : e_Real; + begin + if Digit_Shift > Digit_Index'First then + for I in Digit_Index'First .. Digit_Shift-1 loop + Z.Digit(I) := Larger.Digit(I); + end loop; + end if; + for I in Digit_Shift .. Digit_Index'Last loop + Z.Digit(I) := Larger.Digit(I) + Smaller.Digit(I - Digit_Shift); + end loop; + + return Z; + + end Add; + + --pragma Inline (Add); + + -------------- + -- Subtract -- + -------------- + + -- Subtract the smaller from the larger. If they have the same + -- exponent, (ie Digit_Shift = 0), + -- then use the quantity "First_Unequal_Digit" to optimize + -- the subtraction, by inserting zeros for all of the equal digits. + -- We already verified that Larger and Smaller are not equal. + + procedure Subtract + (Larger : in e_Real; + Smaller : in e_Real; + Digit_Shift : in Digit_Index; + First_Unequal_Digit : in Digit_Index; + Result : out e_Real; + Extra_Guard_Digit : out Digit_Type) + is + I : Digits_Base; + begin + + if Digit_Shift = 0 then -- We can make use of First_Unequal_Digit. + + if First_Unequal_Digit > 0 then + for I in 0 .. First_Unequal_Digit-1 loop + Result.Digit(I) := Digit_Zero; + end loop; + end if; + for I in First_Unequal_Digit .. Digit_Index'Last loop + Result.Digit(I) := Larger.Digit(I) - Smaller.Digit(I); + end loop; + + Extra_Guard_Digit := Digit_Zero; -- important initialization. + + else + + for I in 0 .. Digit_Shift-1 loop + Result.Digit(I) := Larger.Digit(I); + end loop; + for I in Digit_Shift .. Digit_Index'Last loop + Result.Digit(I) := Larger.Digit(I) - Smaller.Digit(I - Digit_Shift); + end loop; + + I := Digit_Index'Last + 1; + Extra_Guard_Digit := -Smaller.Digit(I - Digit_Shift); + -- Here the Larger.Digit(I) = 0.0 because it ran out of digits. + + end if; + + end Subtract; + + --pragma Inline (Subtract); + + ---------------------------- + -- Borrow_For_Subtraction -- + ---------------------------- + + -- Do the Borrowing. This can have much overhead in "-" or "+", so some + -- optimizations are performed. If the digits have become less than zero + -- then must borrow from the next higher order digit: subtract 1 from that + -- digit, and add Radix to digit in question. Start + -- at the least significant digit, Digit_Index'Last, work up to point at which + -- the subtraction began: Digit_Shift. Digit_Shift is the first digit of + -- Z on which a subtraction was performed. After that, unless a borrow is + -- performed, the process may end. Borrowing should be extremely + -- rare, so don't do unless necessary. + -- The Extra_Guard_Digit is a virtual digit at index Digit_Index'Last+1. Very + -- important in improving precision in some cases: particularly subtracting + -- a small number from 1.0. + + procedure Borrow_For_Subtraction + (Z : in out e_Real; + Extra_Guard_Digit : in Digit_Type; + Digit_Shift : in Digit_Index) + is + First_Nonzero_Digit : Digits_Base := Digit_Index'Last+1; + All_The_Digits_Are_Zero : Boolean := True; + + Borrow : constant Digit_Type := -Digit_One; + Guard_Digit : Digit_Type := Extra_Guard_Digit; + I : Digits_Base; + begin + + + -- This is the general case in which many numbers were subtracted: + -- Here it is possible that Borrow > 1.0. + -- if Digit_Shift < Digit_Index'Last then + -- for I in reverse Digit_Shift+1..Digit_Index'Last loop + -- if Z.Digit(I) < 0.0 then + -- Borrow := Real_Floor (Z.Digit(I) * Inverse_Radix); + -- Z.Digit(I) := Z.Digit(I) - Borrow * Radix; -- Borrow is < 0.0. + -- Z.Digit(I-1) := Z.Digit(I-1) + Borrow; + -- end if; + -- end loop; + -- end if; + + + -- We are subtracting only 2 numbers, so borrow at most 1 digit. + + -- Special case: the extra guard digit at Digit_Index'Last+1: + --Borrow := -Digit_One; + I := Digit_Index'Last+1; + if Guard_Digit < Digit_Zero then + Guard_Digit := Guard_Digit + Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Borrow; + end if; + + if Digit_Shift < Digit_Index'Last then + --Borrow := -Digit_One; + for I in reverse Digit_Shift+1 .. Digit_Index'Last loop + if Z.Digit(I) < Digit_Zero then + Z.Digit(I) := Z.Digit(I) + Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Borrow; + end if; + end loop; + end if; + + + -- Step 1. Do everything between the 2nd digit and Digit_Shift. + -- If no borrowing is performed, then are done, since these are the + -- digits on which no subtractions were performed initially. (With the + -- exception of digit Digit_Shift: still must check that it + -- is not < 0.0.) + + -- The general case: + -- Borrow_Loop: + -- for I in reverse Digit_Index'First+1 .. Digit_Shift loop + -- if Z.Digit(I) < 0.0 then + -- Borrow := Real_Floor (Z.Digit(I) * Inverse_Radix); + -- Z.Digit(I) := Z.Digit(I) - Borrow * Radix; + -- Z.Digit(I-1) := Z.Digit(I-1) + Borrow; + -- else + -- exit Borrow_Loop; + -- end if; + -- end loop Borrow_Loop; + + + -- We are subtracting only 2 numbers, so borrow at most 1 digit. + + --Borrow := -Digit_One; + Borrow_Loop: + for I in reverse Digit_Index'First+1..Digit_Shift loop + if Z.Digit(I) < Digit_Zero then + Z.Digit(I) := Z.Digit(I) + Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Borrow; + else + exit Borrow_Loop; + end if; + end loop Borrow_Loop; + + + -- Step 2. If Z.Digit(0) < 0.0 then the result is < 0.0, which means + -- a failure in the "+" or "-" routines below. + + if not Disable_Program_Error_Tests then + if Z.Digit(0) < Digit_Zero then + raise Program_Error with "Some error in Borrow_For_Subtraction."; + end if; + end if; + + + -- Step 3. Normalize the result if the highest order Digit is zero. + -- Shift the exponent accordingly. Recall that Z should not be Zero; + -- checked for that possibility before subtracting. + -- So shift the entire mantissa left by the number of leading zeros, + -- and decrement the exponent by the same amount. If do any left-shifts, + -- then put at the end of the mantissa the extra guard digit dragged + -- along just for this event. + + First_Nonzero_Digit := Digit_Index'Last + 1; + All_The_Digits_Are_Zero := True; + for I in Digit_Index loop + if Z.Digit(I) /= Digit_Zero then + First_Nonzero_Digit := I; + All_The_Digits_Are_Zero := False; + exit; + end if; + end loop; + + if All_The_Digits_Are_Zero then + if Guard_Digit = Digit_Zero then + -- But we checked equality of X and Y. + -- Only time this happened it was a compiler bug. + -- but maybe not an err? + Z := Zero; + if not Disable_Program_Error_Tests then + raise Program_Error with "Might be a bug in Borrow_For_Subtraction."; + end if; + else + -- This is certainly possible: + Z.Digit(0) := Guard_Digit; + Z.Exp := Z.Exp - e_Integer (First_Nonzero_Digit); + end if; + end if; + + if not All_The_Digits_Are_Zero then -- First_Nonzero_Digit < Max_Index+1 + + if First_Nonzero_Digit > 0 then -- shift the mantissa left by this amount + + for I in 0 .. Digit_Index'Last-First_Nonzero_Digit loop + Z.Digit(I) := Z.Digit(I + First_Nonzero_Digit); + end loop; + -- Shift the mantissa left by this amount. + + for I in Digit_Index'Last-First_Nonzero_Digit+1 .. Digit_Index'Last loop + Z.Digit(I) := Digit_Zero; + end loop; + -- Set the rest of the mantissa to 0.0. + + Z.Digit(Digit_Index'Last-First_Nonzero_Digit+1) := Guard_Digit; + -- Even tho' set Digit to 0.0 above, set it right now. + + Z.Exp := Z.Exp - e_Integer (First_Nonzero_Digit); + + end if; + + end if; + + -- If First_Nonzero_Digit = 0, the usual case, then are done. + + end Borrow_For_Subtraction; + + --pragma Inline (Borrow_For_Subtraction); + + ------------------------ + -- Carry_For_Addition -- + ------------------------ + + -- Do the carrying. This can have much overhead, so some + -- optimizations are performed. If the digits have become larger + -- than Radix-1 then must break the digit into 2 parts and add the larger + -- to a higher order digit. This carry distance is at most one digit, + -- rather than the 2 possible in the multiplication routine. Start + -- at the least significant digit, Digit_Index'Last, work up to point at which + -- the addition began: Digit_Shift. Digit_Shift is the first digit of + -- Z on which an addition was performed. After that, unless a carry is + -- performed, the process may be ended. Carrying should be extremely + -- rare, so don't do unless necessary. + + procedure Carry_For_Addition + (Z : in out e_Real; + Digit_Shift : in Digit_Index) + is + Digit_Minus_1 : Digit_Type := Digit_Zero; + We_Are_Finished : Boolean := False; + Must_Normalize : Boolean := False; + Carry : constant Digit_Type := Digit_One; + begin + -- Step 1. Do the carrying among the digits that have been added to each + -- other (Digit_ID in Digit_Shift+1..Digit_Index'Last). Actually, + -- Digit_ID = Digit_Shift had an addition performed to it also..that's + -- dealt with in step 2. + -- + -- This is the general case. Useful for an optimized Sum(many numbers). + -- if Digit_Shift < Digit_Index'Last then + -- for I in reverse Digit_Shift+1..Digit_Index'Last loop + -- if Z.Digit(I) > Radix_Minus_1 then + -- Carry := Real_Floor (Z.Digit(I) * Inverse_Radix); + -- Z.Digit(I) := Z.Digit(I) - Carry * Radix; + -- Z.Digit(I-1) := Z.Digit(I-1) + Carry; + -- end if; + -- end loop; + -- end if; + -- + -- We're summing at most 2 numbers, so Carry is at most 1.0. + + if Digit_Shift < Digit_Index'Last then + for I in reverse Digit_Shift+1 .. Digit_Index'Last loop + if Z.Digit(I) > Digit_Radix_Minus_1 then + --Carry := Digit_One; + Z.Digit(I) := Z.Digit(I) - Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Carry; + end if; + end loop; + end if; + We_Are_Finished := False; -- We have at least Digit(Digit_Shift) to check. + + + -- Step 2. Do everything between the 2nd digit and Digit_Shift. + -- If no carry is performed, then we're done, since these are the + -- digits on which no additions were performed initially. (With the + -- exception of digit Digit_Shift: still must check that it + -- is not larger than Radix-1.) + -- + -- Carry_Loop: + -- for I in reverse Digit_Index'First+1 .. Digit_Shift loop + -- if Z.Digit(I) > Radix_Minus_1 then + -- Carry := Real_Floor (Z.Digit(I) * Inverse_Radix); + -- Z.Digit(I) := Z.Digit(I) - Carry * Radix; + -- Z.Digit(I-1) := Z.Digit(I-1) + Carry; + -- else + -- We_Are_Finished := True; + -- exit Carry_Loop; + -- end if; + -- end loop Carry_Loop; + + -- When summing at most 2 numbers: + + Carry_Loop: + for I in reverse Digit_Index'First+1..Digit_Shift loop + if Z.Digit(I) > Digit_Radix_Minus_1 then + --Carry := Digit_One; + Z.Digit(I) := Z.Digit(I) - Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Carry; + else + We_Are_Finished := True; + exit Carry_Loop; + end if; + end loop Carry_Loop; + + -- Step 3. If left the carry_loop early, then go home now. + -- No need to normalize the result (i.e. make sure that the first + -- digit is not 0). No need to increment Z.Exp. This should be the usual + -- case. First however, a debugging test: + + if We_Are_Finished then + if not Disable_Program_Error_Tests then + if Z.Digit(0) <= Digit_Zero then + raise Program_Error with "Some error in Carrying for + operator."; + end if; + end if; + return; + end if; + + -- Step 4. Perform the final carry if Z.Digit(0) > Radix-1 (to a digit + -- that doesn't exist yet, called Digit_Minus_1.) + + -- Must_Normalize := False; + -- if Z.Digit(0) > Radix_Minus_1 then + -- Carry := Real_Floor (Z.Digit(0) * Inverse_Radix); + -- Z.Digit(0) := Z.Digit(0) - Carry * Radix; + -- Digit_Minus_1 := Carry; + -- Must_Normalize := True; + -- end if; + + Must_Normalize := False; + if Z.Digit(0) > Digit_Radix_Minus_1 then + --Carry := Digit_One; + Z.Digit(0) := Z.Digit(0) - Digit_Radix; + Digit_Minus_1 := Carry; -- Digit_Minus_1 is initially 0.0. + Must_Normalize := True; + end if; + + -- Step 5. Normalize the result if Digit(0) was > Radix-1, + -- hence a carry occurred to a larger digit. + -- Is it possible that Digit(0) is 0 and Digit_minus_1 is also 0? + -- No. To get Digit(0) to zero, it would have to = Radix..then a + -- carry to Digit_Minus_1 would make it zero. But then Digit_Minus_1 + -- would be non-zero. + + if Must_Normalize then + + Z.Exp := Z.Exp + 1; + + for I in reverse Digit_Index'First+1 .. Digit_Index'Last loop + Z.Digit(I) := Z.Digit(I-1); + end loop; + Z.Digit(0) := Digit_Minus_1; + + end if; + + -- Test for failure in algorithm: + + if not Disable_Program_Error_Tests then + if Z.Digit(0) <= Digit_Zero then + raise Program_Error with "Some error in Carrying for + operator."; + end if; + end if; + + end Carry_For_Addition; + + --pragma Inline (Carry_For_Addition); + + --------- + -- "+" -- + --------- + + -- Just the identity operator, so you can type A := +1.23E+02 + -- and the statement will be accepted whether or not A is e_Real. + + function "+" (X : e_Real) return e_Real is + begin + return X; + end "+"; + + --------- + -- "+" -- + --------- + + -- Surprizingly complicated. + + function "+"(X, Y : e_Real) return e_Real is + Z : e_Real; + Delta_Exp : e_Integer; + Digit_Shift : Digit_Index := 0; + First_Unequal_Digit : Digit_Index := 0; + + Extra_Guard_Digit : Digit_Type := Digit_Zero; -- Important init. (for subtraction) + + Final_Sign_Is_Positive : Boolean; + Mantissas_Are_Equal : Boolean; + + type Max_Info is (X_Is_Max, Y_Is_Max); + Max_Num_ID : Max_Info := X_Is_Max; + + type Add_Choice is (Add_Them, Subtract_Y_From_X, Subtract_X_From_Y); + Add_Code : Add_Choice; + begin + + -- Step 0. If Either of the numbers is 0.0, then return the other. + + if Y.Is_Zero then + return X; + elsif X.Is_Zero then + return Y; + end if; + + -- Step 0b. If one is infinite, but not the other, return the infinity, + -- sign of the inf unchanged. If both are inf, say inf + inf = +inf. + -- And say inf - inf raises c.e., because don't know what it is. + + if Y.Is_Infinite and not X.Is_Infinite then + return Y; + elsif not Y.Is_Infinite and X.Is_Infinite then + return X; + end if; + + if Y.Is_Infinite and X.Is_Infinite then + if not X.Is_Positive and not Y.Is_Positive then + return Negative_Infinity; + elsif X.Is_Positive and Y.Is_Positive then + return Positive_Infinity; + else + raise Constraint_Error with "Subtraction of inf by inf is undefined."; + end if; + end if; + + + -- Step 1. Find the larger number, Exponent-wise, and return it if it is + -- so much larger than the other that there is no addition to be done. + -- If they are equal, exponent-wise, then say X is the larger. + + if Y.Exp > X.Exp then + Max_Num_ID := Y_Is_Max; + else + Max_Num_ID := X_Is_Max; + end if; + + Delta_Exp := Abs (X.Exp - Y.Exp); + if Delta_Exp > e_Integer(Digit_Index'Last) then -- ie, Delta_Exp >= No_Of_Digits + case Max_Num_ID is + when X_Is_Max => + return X; + when Y_Is_Max => + return Y; + end case; + end if; + + + -- Step 2. When the exponents are equal, and subtraction is going to be + -- done (ie, one of the numbers is negative, the other pos., X>0 XOR Y>0), + -- need more information about which number is smaller. + -- Now correctly find the larger (Abs-wise) even if the Exp's are equal. + -- Only do this in the subtraction case; if then the numbers turn + -- out to be equal, then return Zero. It is important to handle that + -- special case here. + + First_Unequal_Digit := Digit_Index'First; + + if (X.Is_Positive XOR Y.Is_Positive) and then X.Exp = Y.Exp then + + -- Find the first non-equal word in the mantissas: + Mantissas_Are_Equal := True; + for I in Digit_Index'First .. Digit_Index'Last loop + if X.Digit(I) /= Y.Digit(I) then + Mantissas_Are_Equal := False; + First_Unequal_Digit := I; + exit; + end if; + end loop; + + -- We're finished if the Exp's are equal, the Mantissas are equal + -- and we're subtracting one from the other: + + if Mantissas_Are_Equal then + return Zero; + end if; + + -- Find the larger of the Two (Absolute values of course): + + if X.Digit(First_Unequal_Digit) > Y.Digit(First_Unequal_Digit) then + Max_Num_ID := X_Is_Max; + else + Max_Num_ID := Y_Is_Max; + end if; + + end if; + + + -- Step 3. Do add or subtract? Depends on their signs. + + if X.Is_Positive and Y.Is_Positive then + Add_Code := Add_Them; + Final_Sign_Is_Positive := True; + end if; + + if (not X.Is_Positive) and (not Y.Is_Positive) then + Add_Code := Add_Them; -- add 2 neg nums as tho they were pos. + Final_Sign_Is_Positive := False; + end if; + + if (not X.Is_Positive) and Y.Is_Positive then + case Max_Num_ID is + when X_Is_Max => + Add_Code := Subtract_Y_From_X; -- I mean Abs(X) - Abs(Y) + Final_Sign_Is_Positive := False; + when Y_Is_Max => + Add_Code := Subtract_X_From_Y; + Final_Sign_Is_Positive := True; + end case; + end if; + + if X.Is_Positive and (not Y.Is_Positive) then + case Max_Num_ID is + when X_Is_Max => + Add_Code := Subtract_Y_From_X; -- I mean Abs(X) - Abs(Y) + Final_Sign_Is_Positive := True; + when Y_Is_Max => + Add_Code := Subtract_X_From_Y; -- I mean Abs(Y) - Abs(X) + Final_Sign_Is_Positive := False; + end case; + end if; + + + -- Step 4. We're now ready to do the adding (or subtracting). + -- The adding and subtracting are separated from the + -- carrying/borrowing/normalizing process, because i) the + -- adding can then be vectorized or otherwise optimized and + -- ii) in other versions many numbers will be summed (not just X and Y), + -- in which case it really pays off to do the normalizing + -- and carrying just once, after many additions, because the + -- overhead of carrying can be higher than summing. + + Digit_Shift := Digit_Index (Delta_Exp); + + case Add_Code is + when Add_Them => + + case Max_Num_ID is + when X_Is_Max => + Z := Add (Larger => X, Smaller => Y, Digit_Shift => Digit_Shift); + when Y_Is_Max => + Z := Add (Larger => Y, Smaller => X, Digit_Shift => Digit_Shift); + end case; + + when Subtract_Y_From_X => + + Subtract (Larger => X, Smaller => Y, Digit_Shift => Digit_Shift, + First_Unequal_Digit => First_Unequal_Digit, + Result => Z, Extra_Guard_Digit => Extra_Guard_Digit); + + when Subtract_X_From_Y => + + Subtract (Larger => Y, Smaller => X, Digit_Shift => Digit_Shift, + First_Unequal_Digit => First_Unequal_Digit, + Result => Z, Extra_Guard_Digit => Extra_Guard_Digit); + + end case; + + + -- Step 5. Do everything except the carrying or borrowing. + -- We were careful about subtracting the smaller from the larger, so + -- the following steps can be done before or after the carrying, (provided + -- its only 2 numbers are summing). + + if Final_Sign_Is_Positive then + Z.Is_Positive := True; + else + Z.Is_Positive := False; + end if; + + -- Set Z.Exp but remember it still may be raised by 1 (if carrying occurs), + -- or lowered by 1 if borrowing occurs. + case Max_Num_ID is + when X_Is_Max => + Z.Exp := X.Exp; + when Y_Is_Max => + Z.Exp := Y.Exp; + end case; + + -- The Z = 0 case has already been considered. + Z.Is_Zero := False; + + + -- Do the carrying or borrowing, as the case may be. These also + -- normalize the number, and produce an additional correction to the Exp. + + case Add_Code is + when Add_Them => + + Carry_For_Addition (Z, Digit_Shift); + + when Subtract_Y_From_X | Subtract_X_From_Y => + + Borrow_For_Subtraction (Z, Extra_Guard_Digit, Digit_Shift); + + end case; + + + -- Step 6. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. This analysis is all isolated to the end + -- of the arithmetic routines so that it is more easily modified to + -- raise exceptions if that is what is desired. In order to do it + -- here, must assume that the parmeters Min_Exponent and Max_Exponent + -- limit the dynamic range of the Exp to about 1/4 of that allowed + -- by the base type used to represent the exponent. This is + -- checked in the spec with an assertion. (The reason is, the above + -- code will go outside the accepted range of Exp with out being + -- checked till down here.) This limit is OK because the base type + -- allows excessively large exponents anyway. + + if Z.Exp < Min_Exponent then + Z := Zero; + end if; + + if Z.Exp > Max_Exponent then + if Z.Is_Positive then + Z := Positive_Infinity; + else + Z := Negative_Infinity; + end if; + end if; + + return Z; + + end "+"; + + --------- + -- "-" -- + --------- + + -- Subtract Y from X: + + function "-"(X, Y : e_Real) return e_Real is + Y2 : e_Real := Y; + begin + Y2.Is_Positive := not Y.Is_Positive; + return X + Y2; + end "-"; + + ------------------------------------ + -- Do_Carrying_For_Multiplication -- + ------------------------------------ + + -- The digits have probably overflowed their allotted range of 0..Radix-1. + -- Break the digits into three parts: + -- digit = d2 * 2**(Radix*2) + d1 * 2**(Radix*1) + d0 * 2**(Radix*0) + -- where d_n is in the range 0..Radix-1. + -- Carry d2 to two digits to the left of Digit; carry d1 one digit to the + -- left of Digit. So Carry_Minus_1 = d1, Carry_Minus_2 = d2. + + procedure Do_Carrying_For_Multiplication + (Z : in out e_Real; + Digit_Minus_1 : in out Digit_Type; + Digit_Minus_2 : in out Digit_Type) + is + Carry_Minus_1, Carry_Minus_2 : Digit_Type := Digit_Zero; -- Essential init. + The_Digit : Digit_Type; + I : Digit_Index; + begin + + --******************************************************************* + -- The real valued digits are effectively integers in the range + -- 0..2**63-1. (Std. vers.) Break them into three words: 1st 30 bits (d0), + -- 2nd 30 bits (d1), and last 3 bits (d2). (If Radix is 2**29, then these + -- will be 29 bit, 29 bit, 5 bit word repectively.) The 3rd (5 bit) word + -- will be Carried 2 digits to the left (Carry_Minus_2). The second + -- 29 bit word will be carried 1 digit to the left. The remaining + -- lowest order 29 bit word will be the desired digit. + -- + -- Overhead is large from carrying so don't calculate and + -- carry the 3rd (5 bit) word unless necessary. This carry's usually + -- not necessary for the lowest order digits, because usually + -- the bits in the words are random, so half the time the word is + -- > Radix/2, half smaller. Or, = Radix / 2. + -- Assume = (assume independence). Then + -- < SUM(X*Y) > = SUM <(X*Y)> = SUM = SUM **2. So after + -- 4 sums (I = 3), you break the Radix**2 barrier, on the average. + -- So the optimization matters when the total number of digits is + -- small. When there are many digits, then it doesn't help much + -- but the overhead from trying is small enough that might as well + -- optimize for the common case: relatively small number of total + -- digits. + --******************************************************************* + + for I in reverse Digit_Index'First+2 .. Digit_Index'Last loop + The_Digit := Z.Digit(I); + if The_Digit >= Digit_Radix_Squared then + Carry_Minus_2 := Shift_Right_2x_No_of_Bits_in_Radix (The_Digit); + The_Digit := The_Digit - Carry_Minus_2 * Digit_Radix_Squared; + Z.Digit(I-2) := Z.Digit(I-2) + Carry_Minus_2; + end if; + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (The_Digit); + Z.Digit(I) := The_Digit - Carry_Minus_1 * Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Carry_Minus_1; + end loop; + + -- Special case I = Digit_Index'First + 1 = 1. + + I := Digit_Index'First + 1; + The_Digit := Z.Digit(I); + if The_Digit >= Digit_Radix_Squared then + Carry_Minus_2 := Shift_Right_2x_No_of_Bits_in_Radix (The_Digit); + The_Digit := The_Digit - Carry_Minus_2 * Digit_Radix_Squared; + Digit_Minus_1 := Digit_Minus_1 + Carry_Minus_2; + end if; + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (The_Digit); + Z.Digit(I) := The_Digit - Carry_Minus_1 * Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Carry_Minus_1; + + -- Special case I = Digit_Index'First = 0 + + I := Digit_Index'First; + The_Digit := Z.Digit(I); + if The_Digit >= Digit_Radix_Squared then + Carry_Minus_2 := Shift_Right_2x_No_of_Bits_in_Radix (The_Digit); + The_Digit := The_Digit - Carry_Minus_2 * Digit_Radix_Squared; + Digit_Minus_2 := Digit_Minus_2 + Carry_Minus_2; + end if; + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (The_Digit); + Z.Digit(I) := The_Digit - Carry_Minus_1 * Digit_Radix; + Digit_Minus_1 := Digit_Minus_1 + Carry_Minus_1; + + -- Special case I = Digit_Index'First - 1 + + if Digit_Minus_1 > Digit_Radix_minus_1 then + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (Digit_Minus_1); + Digit_Minus_1 := Digit_Minus_1 - Carry_Minus_1 * Digit_Radix; + Digit_Minus_2 := Digit_Minus_2 + Carry_Minus_1; + end if; + + end Do_Carrying_For_Multiplication; + + --pragma Inline (Do_Carrying_For_Multiplication); + + --------------- + -- Normalize -- + --------------- + + -- Normalize the result if the highest order (negative Index) digits are + -- non-zero. (The usual case.) + -- Shift Mantissa and the exponent accordingly. (The canonical form + -- requires that the first digit is non-zero.) + + procedure Normalize + (Z : in out e_Real; + Digit_Minus_1 : in Digit_Type; + Digit_Minus_2 : in Digit_Type) + is + First_Nonzero_Digit : Digit_Index := Digit_Index'First; -- Init. essential + First_Nonzero_Digit_Is_Minus_1 : Boolean := False; -- Init. essential + First_Nonzero_Digit_Is_Minus_2 : Boolean := False; -- Init. essential + All_Digits_Are_Zero : Boolean := False; -- Init. essential + begin + -- Step 0. Infinities and Zero. + + if Z.Is_Infinite then + return; + end if; + + Z.Is_Zero := False; -- Will be toggled if all digits 0. + + -- Step 1. Find the first non-zero digit: + + if Digit_Minus_2 > Digit_Zero then + First_Nonzero_Digit_Is_Minus_2 := True; + elsif Digit_Minus_1 > Digit_Zero then + First_Nonzero_Digit_Is_Minus_1 := True; + + else + + All_Digits_Are_Zero := True; + for I in Digit_Index loop + if Z.Digit(I) /= Digit_Zero then + First_Nonzero_Digit := I; -- So First_Nonzero_Digit <= Digit_Index'Last. + All_Digits_Are_Zero := False; + exit; + end if; + end loop; + + end if; + + if All_Digits_Are_Zero then + Z := Zero; + return; + end if; + + -- Step 2. Shift the array to the right if the Minus_N digits are + -- non Zero. Shift the array to the left if necessary (shouldn't be). + + if First_Nonzero_Digit_Is_Minus_2 then -- Shift right by 2: + + for I in reverse Digit_Index'First+2 .. Digit_Index'Last loop + Z.Digit(I) := Z.Digit(I - 2); + end loop; + Z.Digit(1) := Digit_Minus_1; + Z.Digit(0) := Digit_Minus_2; + Z.Exp := Z.Exp + 2; + + elsif First_Nonzero_Digit_Is_Minus_1 then -- Shift right by 1: + + for I in reverse Digit_Index'First+1 .. Digit_Index'Last loop + Z.Digit(I) := Z.Digit(I - 1); + end loop; + Z.Digit(0) := Digit_Minus_1; + Z.Exp := Z.Exp + 1; + + elsif First_Nonzero_Digit > Digit_Index'First then + -- Shift left by val of First_Non...: + + for I in 0 .. Digit_Index'Last-First_Nonzero_Digit loop + Z.Digit(I) := Z.Digit(I + First_Nonzero_Digit); + end loop; + for I in Digit_Index'Last-First_Nonzero_Digit+1 .. Digit_Index'Last loop + Z.Digit(I) := Digit_Zero; + end loop; + + Z.Exp := Z.Exp - e_Integer (First_Nonzero_Digit); --assumes Digit_Index'First=0 + + end if; + + -- Test for failure in algorithm: + if not Disable_Program_Error_Tests then + if Z.Digit(0) > Digit_Radix_Minus_1 or Z.Digit(0) <= Digit_Zero then + raise Program_Error with "Some error in Normalization for * operator."; + end if; + end if; + + end Normalize; + + --pragma Inline (Normalize); + + ---------------------------- + -- General_Multiplication -- + ---------------------------- + + function General_Multiplication + (X, Y : e_Real) + return e_Real + is + Z : e_Real; + Digit_Minus_1, Digit_Minus_2 : Digit_Type := Digit_Zero; -- Essential init + + Sum : Digit_Type := Digit_Zero; + No_Of_Digits, No_Of_Segments, Remaining_Sums : Digits_Base; + Starting_Digit, Ending_k : Digit_Index; + Start_Sum, End_Sum : Digit_Index; + Allowed_Digits_Per_Carry : constant := Sums_per_Carry + 1; + -- If you sum 9 numbers you only do 8 sums. + begin + + -- Step 0. Handle the Zeros. We'll say 0 * infinity is 0, since inf is + -- really just a large finite number. + + if X.Is_Zero or Y.Is_Zero then + return Zero; + end if; + + Z.Is_Zero := False; -- toggled below if underflow. + + -- Step 1. If one or more is infinite..Notice inf * inf = inf here. + + if Y.Is_Infinite or X.Is_Infinite then + if X.Is_Positive xor Y.Is_Positive then -- opposite signs. + return Negative_Infinity; + else + return Positive_Infinity; + end if; + end if; + + -- Step 2. Handle the signs and exponents: + + Z.Is_Positive := not (X.Is_Positive XOR Y.Is_Positive); + + Z.Exp := X.Exp + Y.Exp; -- Will be further adjusted by Carry/Normalize. + + + No_Of_Digits := Digit_Index'Last + 1; + No_Of_Segments := No_Of_Digits / Allowed_Digits_Per_Carry; + Remaining_Sums := No_Of_Digits REM Allowed_Digits_Per_Carry; + + + -- Z has been initialized to Zero: essential if Remaining_Sums = 0 + + -- First do the stragglers, digits of index (k in 0..Remaining_Sums-1): + + if Remaining_Sums > 0 then + for Digit_ID in Digit_Index loop + + Ending_k := Digit_Index'Min (Digit_ID, Remaining_Sums-1); + + Sum := Digit_Zero; + for k in Digit_Index'First .. Ending_k loop + Sum := Sum + X.Digit(k) * Y.Digit(Digit_ID - k); + end loop; + + Z.Digit(Digit_ID) := Sum; -- init Z. + + end loop; + + Do_Carrying_For_Multiplication (Z, Digit_Minus_1, Digit_Minus_2); + end if; + + -- Now do the segments of length (up to) (usually) 32: + + if No_Of_Segments > 0 then + for Segment in 0 .. No_Of_Segments-1 loop + + Start_Sum := (Segment+0) * Allowed_Digits_Per_Carry + Remaining_Sums; + End_Sum := (Segment+1) * Allowed_Digits_Per_Carry + Remaining_Sums - 1; + --End_Sum := Start_Sum + (Allowed_Digits_Per_Carry - 1); + Starting_Digit := Start_Sum; + + for Digit_ID in Starting_Digit .. Digit_Index'Last loop + + Ending_k := Digit_Index'Min (Digit_ID, End_Sum); + + Sum := Digit_Zero; + for k in Start_Sum .. Ending_k loop + Sum := Sum + X.Digit(k) * Y.Digit(Digit_ID - k); + end loop; + + Z.Digit(Digit_ID) := Z.Digit(Digit_ID) + Sum; + -- Z.Digit(Digit_ID) is close enough to 0 (ie, < Radix-1) that this + -- does not count as a sum in the Sums_Per_Carry rule. + -- That's why Allowed_Sums_Per_Carry = Sums_Per_Carry+1 here. + + end loop; + + Do_Carrying_For_Multiplication (Z, Digit_Minus_1, Digit_Minus_2); + + end loop; + end if; + + -- Must Normalize: shift digit array to make sure that the first digit + -- is non-zero: if Infinity or Zero gets this far, then problem occurs. + -- Should catch in normalize. + + Normalize (Z, Digit_Minus_1, Digit_Minus_2); + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. In order to do it + -- here, must assume that the parmeters Min_Exponent and Max_Exponent + -- limit the dynamic range of the Exp to about 1/4 of that allowed + -- by the base type used to represent the exponent. This is + -- checked in the spec with an assertion. (The reason is, the above + -- code will go well outside the accepted range of Exp with out being + -- checked till down here.) This limit is OK because the base type + -- allows excessively large exponents anyway, up to 2**31-1. + + if Z.Exp < Min_Exponent then + Z := Zero; + end if; + + if Z.Exp > Max_Exponent then + if Z.Is_Positive then + Z := Positive_Infinity; + else + Z := Negative_Infinity; + end if; + end if; + + return Z; + + end General_Multiplication; + + ------------ + -- Square -- + ------------ + + -- Suppose Z, and X are composed of n digits 0..n-1. Then X*X is + -- + -- Xn-1 := X0*Xn-1 + X1*Xn-2 + .. + Xn-1*X0 + -- ... + -- X2 := X0*X2 + X1*X1 + X2*X0 + -- X1 := X0*X1 + X1*X0 + -- X0 := X0*X0 + -- + -- Now follows the upper half of the table, which produces words beyond + -- the precision of the two number X and X that are being multiplied. These + -- don't calculate. (Just make N larger if more precision is needed). + -- + procedure Square + (X : in out e_Real) + is + Digit_Minus_1, Digit_Minus_2 : Digit_Type := Digit_Zero; -- Essential init + + Ultimate_No_of_Digits : constant := Ultimate_Digit + 1; + -- equals: Digit_Index'Last - Digit_Index'First + 1 = Mantissa'Length + + ------------------------------------- + -- Product_if_digits_fewer_than_17 -- + ------------------------------------- + + procedure Product_if_digits_fewer_than_17 + is + pragma Assert (Ultimate_No_of_Digits >= 5); --because no if-then for 1st 5. + pragma Assert (Ultimate_No_of_Digits < 17); + pragma Assert (No_Of_Bits_In_Radix <= 30); + -- (not a) or b is same as a implies b. + -- no need to carry until the end if following evaluate to true: + pragma Assert (not (No_Of_Bits_In_Radix = 30) or Ultimate_No_of_Digits <= 8); + pragma Assert (not (No_Of_Bits_In_Radix = 29) or Ultimate_No_of_Digits <= 32); + pragma Suppress (Index_Check); + A : Mantissa renames X.Digit; + S : constant Digit_Index := Digit_Index'First; + begin + -- Ultimate_No_of_Digits is named number, so optimizer should + -- eliminate unused blocks of code below...not that it matters. + + -- need to do intermediate carrying if No_Of_Bits_In_Radix >= 30 + -- and more than 8 digits. + + -- ! Must be done in the following order ! + + if Ultimate_No_of_Digits >= 16 then + A (S+15) :=(A(S+0)*A(S+15) + A(S+1)*A(S+14) + A(S+2)*A(S+13) + + A(S+3)*A(S+12) + A(S+4)*A(S+11) + A(S+5)*A(S+10) + + A(S+6)*A(S+9) + A(S+7)*A(S+8))*Digit_Two; + end if; + + if Ultimate_No_of_Digits >= 15 then + A (S+14) :=(A(S+0)*A(S+14) + A(S+1)*A(S+13) + A(S+2)*A(S+12) + + A(S+3)*A(S+11) + A(S+4)*A(S+10) + A(S+5)*A(S+9) + + A(S+6)*A(S+8))*Digit_Two + + A(S+7)*A(S+7); + end if; + + if Ultimate_No_of_Digits >= 14 then + A (S+13) :=(A(S+0)*A(S+13) + A(S+1)*A(S+12) + A(S+2)*A(S+11) + + A(S+3)*A(S+10) + A(S+4)*A(S+9) + A(S+5)*A(S+8) + + A(S+6)*A(S+7))*Digit_Two; + end if; + + if Ultimate_No_of_Digits >= 13 then + A (S+12) :=(A(S+0)*A(S+12) + A(S+1)*A(S+11) + A(S+2)*A(S+10) + + A(S+3)*A(S+9) + A(S+4)*A(S+8) + A(S+5)*A(S+7))*Digit_Two + + A(S+6)*A(S+6); + end if; + + if Ultimate_No_of_Digits >= 12 then + A (S+11) :=(A(S+0)*A(S+11) + A(S+1)*A(S+10) + A(S+2)*A(S+9) + + A(S+3)*A(S+8) + A(S+4)*A(S+7) + A(S+5)*A(S+6))*Digit_Two; + end if; + + if Ultimate_No_of_Digits >= 11 then + A (S+10) := (A(S+0)*A(S+10) + A(S+1)*A(S+9) + A(S+2)*A(S+8) + + A(S+3)*A(S+7) + A(S+4)*A(S+6)) * Digit_Two + + A(S+5)*A(S+5); + end if; + + if Ultimate_No_of_Digits >= 10 then + A (S+9) := (A(S+0)*A(S+9) + A(S+1)*A(S+8) + A(S+2)*A(S+7) + + A(S+3)*A(S+6) + A(S+4)*A(S+5)) * Digit_Two; + end if; + + if Ultimate_No_of_Digits >= 9 then + A (S+8) := (A(S+0)*A(S+8) + A(S+1)*A(S+7) + A(S+2)*A(S+6) + + A(S+3)*A(S+5)) * Digit_Two + + A(S+4)*A(S+4); + end if; + + if Ultimate_No_of_Digits >= 8 then + A (S+7) := (A(S+0)*A(S+7) + A(S+1)*A(S+6) + A(S+2)*A(S+5) + + A(S+3)*A(S+4)) * Digit_Two; + end if; + + if Ultimate_No_of_Digits >= 7 then + A (S+6) := (A(S+0)*A(S+6) + A(S+1)*A(S+5) + A(S+2)*A(S+4)) * Digit_Two + + A(S+3)*A(S+3); + end if; + + if Ultimate_No_of_Digits >= 6 then + A (S+5) := (A(S+0)*A(S+5) + A(S+1)*A(S+4) + A(S+2)*A(S+3)) * Digit_Two; + end if; + + A (S+4) := A(S+2)*A(S+2) + + (A(S+0)*A(S+4) + A(S+1)*A(S+3)) * Digit_Two; + + A (S+3) :=(A(S+0)*A(S+3) + A(S+1)*A(S+2)) * Digit_Two; + + A (S+2) := A(S+1)*A(S+1) + + A(S+0)*A(S+2) * Digit_Two; + + A (S+1) := A(S+0)*A(S+1) * Digit_Two; + + A (S+0) := A(S+0)*A(S+0); + + end Product_if_digits_fewer_than_17; + + begin + + -- Step 0. Handle the Zeros. We'll say 0 * infinity is 0, since inf is + -- really just a large finite number. + + if X.Is_Zero then + return; + end if; + + -- Step 2. Do the multiplication. We can only sum 32 elements of the sum + -- before the carrys must be done (in one stnd setting). + + -- We use the inner product version (inner loop is an + -- dot-product.) To get the outer product version (BLAS routine DAXPY), + -- interchange the order of the loops. + -- + -- Here's the idea: + -- for Digit_ID in Digit_Index loop + -- Sum := 0.0; + -- for k in 0..Digit_ID loop + -- Sum := Sum + X(k) * Y(Digit_ID-k); + -- end loop; + -- Z(Digit_ID) := Sum; + -- end loop; + -- + -- Break sum into segments of 32 each in index k. Perform a carry + -- after each sum of 32 elements. + + if (Ultimate_No_of_Digits < 9 and then No_Of_Bits_In_Radix <= 30) or + (Ultimate_No_of_Digits < 17 and then No_Of_Bits_In_Radix <= 29) then + + Product_if_digits_fewer_than_17; + + X.Exp := X.Exp + X.Exp; -- Will be further adjusted by "Normalize.." routine. + + Do_Carrying_For_Multiplication (X, Digit_Minus_1, Digit_Minus_2); + + -- Must Normalize: shift digit array to make sure that the first digit + -- is non-zero: if Infinity or Zero gets this far, then problem occurs. + -- Should catch in normalize. + + Normalize (X, Digit_Minus_1, Digit_Minus_2); + + X.Is_Positive := True; + + else + + X := General_Multiplication (X, X); + return; + + end if; + + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. This is all isolated to the end + -- of the arithmetic routines so that it is easily modified to + -- raise exceptions if that's what is desired. In order to do it + -- here, must assume that the parmeters Min_Exponent and Max_Exponent + -- limit the dynamic range of the Exp to about 1/4 of that allowed + -- by the base type used to represent the exponent. This is + -- checked in the spec with an assertion. (The reason is, the above + -- code will go well outside the accepted range of Exp with out being + -- checked till down here.) This limit is OK because the base type + -- allows excessively large exponents anyway, up to 2**31-1. + + if X.Exp < Min_Exponent then + X := Zero; + end if; + + if X.Exp > Max_Exponent then + X := Positive_Infinity; + end if; + + end Square; + + ------------------- + -- Multiply_Stnd -- + ------------------- + + -- Suppose Z, X, and Y are composed of n digits 0..n-1. Then X*Y is + -- + -- Z0 := X0*Y0 + -- Z1 := X0*Y1 + X1*Y0 + -- Z2 := X0*Y2 + X1*Y1 + X2*Y0 + -- ... + -- Zn-1 := X0*Yn-1 + X1*Yn-2 + .. + Xn-1*Y0 + -- + -- Now follows the upper half of the table, which produces words beyond + -- the precision of the two number X and Y that are being multiplied. These + -- don't calculate. (Just make N larger if more precision is needed). + -- If n is minimal for efficiency reasons, then it would be more efficient + -- in some calculations to make use the following instead of increasing the + -- number of digits to get the required precision. + -- + -- Zn := Xn-1*Y1 + ... + X1*Yn-1 + -- ... + -- Z2n-2 := Xn-1*Yn-1 + -- + function Multiply_Stnd + (X, Y : e_Real) + return e_Real + is + Z : e_Real := X; + Digit_Minus_1, Digit_Minus_2 : Digit_Type := Digit_Zero; -- Essential init + + Ultimate_No_of_Digits : constant := Ultimate_Digit + 1; + -- equals: Digit_Index'Last - Digit_Index'First + 1 = Mantissa'Length + + ------------------------------------- + -- Product_if_digits_fewer_than_17 -- + ------------------------------------- + + procedure Product_if_digits_fewer_than_17 + is + pragma Assert (Ultimate_No_of_Digits >= 5); --because no if-then's for 1st 5. + pragma Assert (Ultimate_No_of_Digits < 17); + pragma Assert (No_Of_Bits_In_Radix <= 30); + -- (not a) or b is same as a implies b. + -- no need to carry until the end if following evaluate to true: + pragma Assert (not (No_Of_Bits_In_Radix = 30) or Ultimate_No_of_Digits <= 8); + pragma Assert (not (No_Of_Bits_In_Radix = 29) or Ultimate_No_of_Digits <= 32); + pragma Suppress (Index_Check); + A : Mantissa renames Z.Digit; + B : Mantissa renames Y.Digit; + S : constant Digit_Index := Digit_Index'First; + begin + -- would need to do intermediate carrying if + -- No_Of_Bits_In_Radix >= 30 and Ultimate_No_of_Digits > 5. + + -- Ultimate_No_of_Digits is named number, so optimizer should + -- eliminate unused blocks of code below... + + if Ultimate_No_of_Digits >= 16 then + A(S+15) := A(S+0)*B(S+15) + A(S+1)*B(S+14) + A(S+2)*B(S+13) + + A(S+3)*B(S+12) + A(S+4)*B(S+11) + A(S+5)*B(S+10) + + A(S+6)*B(S+9) + A(S+7)*B(S+8) + A(S+8)*B(S+7) + + A(S+9)*B(S+6) + A(S+10)*B(S+5) + A(S+11)*B(S+4) + + A(S+12)*B(S+3) + A(S+13)*B(S+2) + A(S+14)*B(S+1) + + A(S+15)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 15 then + A(S+14) := A(S+0)*B(S+14) + A(S+1)*B(S+13) + A(S+2)*B(S+12) + + A(S+3)*B(S+11) + A(S+4)*B(S+10) + A(S+5)*B(S+9) + + A(S+6)*B(S+8) + A(S+7)*B(S+7) + A(S+8)*B(S+6) + + A(S+9)*B(S+5) + A(S+10)*B(S+4) + A(S+11)*B(S+3) + + A(S+12)*B(S+2) + A(S+13)*B(S+1) + A(S+14)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 14 then + A(S+13) := A(S+0)*B(S+13) + A(S+1)*B(S+12) + A(S+2)*B(S+11) + + A(S+3)*B(S+10) + A(S+4)*B(S+9) + A(S+5)*B(S+8) + + A(S+6)*B(S+7) + A(S+7)*B(S+6) + A(S+8)*B(S+5) + + A(S+9)*B(S+4) + A(S+10)*B(S+3) + A(S+11)*B(S+2) + + A(S+12)*B(S+1) + A(S+13)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 13 then + A(S+12) := A(S+0)*B(S+12) + A(S+1)*B(S+11) + A(S+2)*B(S+10) + + A(S+3)*B(S+9) + A(S+4)*B(S+8) + A(S+5)*B(S+7) + + A(S+6)*B(S+6) + A(S+7)*B(S+5) + A(S+8)*B(S+4) + + A(S+9)*B(S+3) + A(S+10)*B(S+2) + A(S+11)*B(S+1) + + A(S+12)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 12 then + A(S+11) := A(S+0)*B(S+11) + A(S+1)*B(S+10) + A(S+2)*B(S+9) + + A(S+3)*B(S+8) + A(S+4)*B(S+7) + A(S+5)*B(S+6) + + A(S+6)*B(S+5) + A(S+7)*B(S+4) + A(S+8)*B(S+3) + + A(S+9)*B(S+2) + A(S+10)*B(S+1) + A(S+11)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 11 then + A(S+10) := A(S+0)*B(S+10) + A(S+1)*B(S+9) + A(S+2)*B(S+8) + + A(S+3)*B(S+7) + A(S+4)*B(S+6) + A(S+5)*B(S+5) + + A(S+6)*B(S+4) + A(S+7)*B(S+3) + A(S+8)*B(S+2) + + A(S+9)*B(S+1) + A(S+10)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 10 then + A(S+9) := A(S+0)*B(S+9) + A(S+1)*B(S+8) + A(S+2)*B(S+7) + + A(S+3)*B(S+6) + A(S+4)*B(S+5) + A(S+5)*B(S+4) + + A(S+6)*B(S+3) + A(S+7)*B(S+2) + A(S+8)*B(S+1) + + A(S+9)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 9 then + A(S+8) := A(S+0)*B(S+8) + A(S+1)*B(S+7) + A(S+2)*B(S+6) + + A(S+3)*B(S+5) + A(S+4)*B(S+4) + A(S+5)*B(S+3) + + A(S+6)*B(S+2) + A(S+7)*B(S+1) + A(S+8)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 8 then + A(S+7) := A(S+0)*B(S+7) + A(S+1)*B(S+6) + A(S+2)*B(S+5) + + A(S+3)*B(S+4) + A(S+4)*B(S+3) + A(S+5)*B(S+2) + + A(S+6)*B(S+1) + A(S+7)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 7 then + A(S+6) := A(S+0)*B(S+6) + A(S+1)*B(S+5) + A(S+2)*B(S+4) + + A(S+3)*B(S+3) + A(S+4)*B(S+2) + A(S+5)*B(S+1) + + A(S+6)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 6 then + A(S+5) := A(S+0)*B(S+5) + A(S+1)*B(S+4) + A(S+2)*B(S+3) + + A(S+3)*B(S+2) + A(S+4)*B(S+1) + A(S+5)*B(S+0); + end if; + + A(S+4) := A(S+0)*B(S+4) + A(S+1)*B(S+3) + A(S+2)*B(S+2) + + A(S+3)*B(S+1) + A(S+4)*B(S+0); + + A(S+3) := A(S+0)*B(S+3) + A(S+1)*B(S+2) + A(S+2)*B(S+1) + + A(S+3)*B(S+0); + + A(S+2) := A(S+0)*B(S+2) + A(S+1)*B(S+1) + A(S+2)*B(S+0); + + A(S+1) := A(S+0)*B(S+1) + A(S+1)*B(S+0); + + A(S+0) := A(S+0)*B(S+0); + + end Product_if_digits_fewer_than_17; + + begin + -- Step 0. Handle the Zeros. We'll say 0 * infinity is 0, since inf is + -- really just a large finite number. + + if X.Is_Zero or Y.Is_Zero then + return Zero; + end if; + + Z.Is_Zero := False; -- toggled below if underflow. + + -- Step 1. If one or more is infinite..Notice inf * inf = inf here. + + if Y.Is_Infinite or X.Is_Infinite then + if X.Is_Positive xor Y.Is_Positive then -- opposite signs. + return Negative_Infinity; + else + return Positive_Infinity; + end if; + end if; + + -- Step 2. Handle the signs and exponents: + + Z.Is_Positive := not (X.Is_Positive XOR Y.Is_Positive); + + Z.Exp := X.Exp + Y.Exp; -- Will be further adjusted by "Carry.." routine. + + + -- Step 3. Do the multiplication. Can only sum (say) 32 elements of + -- the sum before the carrys must be done. + + -- We use the inner product version (inner loop is an + -- dot-product.) To get the outer product version (BLAS routine DAXPY), + -- interchange the order of the loops. + -- + -- Here's the idea: + -- for Digit_ID in Digit_Index loop + -- Sum := 0.0; + -- for k in 0..Digit_ID loop + -- Sum := Sum + X(k) * Y(Digit_ID-k); + -- end loop; + -- Z(Digit_ID) := Sum; + -- end loop; + -- + -- Break sum into segments of 32 each in index k. Perform a carry + -- after each sum of 32 elements. + + if (Ultimate_No_of_Digits < 9 and then No_Of_Bits_In_Radix <= 30) or + (Ultimate_No_of_Digits < 17 and then No_Of_Bits_In_Radix <= 29) then + + Product_if_digits_fewer_than_17; -- Z = Z * Y (Z has been initialized to X). + + Do_Carrying_For_Multiplication (Z, Digit_Minus_1, Digit_Minus_2); + -- the procedure requires initialized (0) Digit_Minus_1, ... + + else + + Z := General_Multiplication (X, Y); + return Z; + + end if; + + -- Must Normalize: shift digit array to make sure that the first digit + -- is non-zero: if Infinity or Zero gets this far, then problem occurs. + -- Should catch in normalize. + + Normalize (Z, Digit_Minus_1, Digit_Minus_2); + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. In order to do it + -- here, must assume that the parmeters Min_Exponent and Max_Exponent + -- limit the dynamic range of the Exp to about 1/4 of that allowed + -- by the base type used to represent the exponent. This is + -- checked in the spec with an assertion. (The reason is, the above + -- code will go well outside the accepted range of Exp with out being + -- checked till down here.) This limit is OK because the base type + -- allows excessively large exponents anyway, up to 2**31-1. + + if Z.Exp < Min_Exponent then + Z := Zero; + end if; + + if Z.Exp > Max_Exponent then + if Z.Is_Positive then + Z := Positive_Infinity; + else + Z := Negative_Infinity; + end if; + end if; + + return Z; + + end Multiply_Stnd; + + pragma Inline (Multiply_Stnd); + + ---------- + -- Mult -- + ---------- + + -- Suppose Z, X, and Y are composed of n digits 0..n-1. Then X*Y is + -- + -- Z0 := X0*Y0 + -- Z1 := X0*Y1 + X1*Y0 + -- Z2 := X0*Y2 + X1*Y1 + X2*Y0 + -- ... + -- Zn-1 := X0*Yn-1 + X1*Yn-2 + .. + Xn-1*Y0 + -- + -- Now follows the upper half of the table, which produces words beyond + -- the precision of the two number X and Y that are being multiplied. These + -- don't calculate. (Just make N larger if more precision is needed). + -- If n is minimal for efficiency reasons, then it would be more efficient + -- in some calculations to make use the following instead of increasing the + -- number of digits to get the required precision. + -- + -- Zn := Xn-1*Y1 + ... + X1*Yn-1 + -- ... + -- Z2n-2 := Xn-1*Yn-1 + -- + procedure Mult + (X : in out e_Real; + Y : in e_Real) + is + Digit_Minus_1, Digit_Minus_2 : Digit_Type := Digit_Zero; -- Essential init + + Ultimate_No_of_Digits : constant := Ultimate_Digit + 1; + -- equals: Digit_Index'Last - Digit_Index'First + 1 = Mantissa'Length + + ------------------------------------- + -- Product_if_digits_fewer_than_17 -- + ------------------------------------- + + procedure Product_if_digits_fewer_than_17 + is + pragma Assert (Ultimate_No_of_Digits >= 5); --because no if-then's for 1st 5. + pragma Assert (Ultimate_No_of_Digits < 17); + pragma Assert (No_Of_Bits_In_Radix <= 30); + -- (not a) or b is same as a implies b. + -- no need to carry until the end if following evaluate to true: + pragma Assert (not (No_Of_Bits_In_Radix = 30) or Ultimate_No_of_Digits <= 8); + pragma Assert (not (No_Of_Bits_In_Radix = 29) or Ultimate_No_of_Digits <= 32); + pragma Suppress (Index_Check); + A : Mantissa renames X.Digit; + B : Mantissa renames Y.Digit; + S : constant Digit_Index := Digit_Index'First; + --Result : Mantissa; + begin + -- would need to do intermediate carrying if + -- No_Of_Bits_In_Radix >= 30 and Ultimate_No_of_Digits > 5. + + -- Ultimate_No_of_Digits is named number, so optimizer should + -- eliminate unused blocks of code below... + + -- ! Must be done in the following order ! + + if Ultimate_No_of_Digits >= 16 then + A (S+15) := A(S+0)*B(S+15) + A(S+1)*B(S+14) + A(S+2)*B(S+13) + + A(S+3)*B(S+12) + A(S+4)*B(S+11) + A(S+5)*B(S+10) + + A(S+6)*B(S+9) + A(S+7)*B(S+8) + A(S+8)*B(S+7) + + A(S+9)*B(S+6) + A(S+10)*B(S+5) + A(S+11)*B(S+4) + + A(S+12)*B(S+3) + A(S+13)*B(S+2) + A(S+14)*B(S+1) + + A(S+15)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 15 then + A (S+14) := A(S+0)*B(S+14) + A(S+1)*B(S+13) + A(S+2)*B(S+12) + + A(S+3)*B(S+11) + A(S+4)*B(S+10) + A(S+5)*B(S+9) + + A(S+6)*B(S+8) + A(S+7)*B(S+7) + A(S+8)*B(S+6) + + A(S+9)*B(S+5) + A(S+10)*B(S+4) + A(S+11)*B(S+3) + + A(S+12)*B(S+2) + A(S+13)*B(S+1) + A(S+14)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 14 then + A (S+13) := A(S+0)*B(S+13) + A(S+1)*B(S+12) + A(S+2)*B(S+11) + + A(S+3)*B(S+10) + A(S+4)*B(S+9) + A(S+5)*B(S+8) + + A(S+6)*B(S+7) + A(S+7)*B(S+6) + A(S+8)*B(S+5) + + A(S+9)*B(S+4) + A(S+10)*B(S+3) + A(S+11)*B(S+2) + + A(S+12)*B(S+1) + A(S+13)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 13 then + A (S+12) := A(S+0)*B(S+12) + A(S+1)*B(S+11) + A(S+2)*B(S+10) + + A(S+3)*B(S+9) + A(S+4)*B(S+8) + A(S+5)*B(S+7) + + A(S+6)*B(S+6) + A(S+7)*B(S+5) + A(S+8)*B(S+4) + + A(S+9)*B(S+3) + A(S+10)*B(S+2) + A(S+11)*B(S+1) + + A(S+12)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 12 then + A (S+11) := A(S+0)*B(S+11) + A(S+1)*B(S+10) + A(S+2)*B(S+9) + + A(S+3)*B(S+8) + A(S+4)*B(S+7) + A(S+5)*B(S+6) + + A(S+6)*B(S+5) + A(S+7)*B(S+4) + A(S+8)*B(S+3) + + A(S+9)*B(S+2) + A(S+10)*B(S+1) + A(S+11)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 11 then + A (S+10) := A(S+0)*B(S+10) + A(S+1)*B(S+9) + A(S+2)*B(S+8) + + A(S+3)*B(S+7) + A(S+4)*B(S+6) + A(S+5)*B(S+5) + + A(S+6)*B(S+4) + A(S+7)*B(S+3) + A(S+8)*B(S+2) + + A(S+9)*B(S+1) + A(S+10)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 10 then + A (S+9) := A(S+0)*B(S+9) + A(S+1)*B(S+8) + A(S+2)*B(S+7) + + A(S+3)*B(S+6) + A(S+4)*B(S+5) + A(S+5)*B(S+4) + + A(S+6)*B(S+3) + A(S+7)*B(S+2) + A(S+8)*B(S+1) + + A(S+9)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 9 then + A (S+8) := A(S+0)*B(S+8) + A(S+1)*B(S+7) + A(S+2)*B(S+6) + + A(S+3)*B(S+5) + A(S+4)*B(S+4) + A(S+5)*B(S+3) + + A(S+6)*B(S+2) + A(S+7)*B(S+1) + A(S+8)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 8 then + A (S+7) := A(S+0)*B(S+7) + A(S+1)*B(S+6) + A(S+2)*B(S+5) + + A(S+3)*B(S+4) + A(S+4)*B(S+3) + A(S+5)*B(S+2) + + A(S+6)*B(S+1) + A(S+7)*B(S+0); + end if; + + if Ultimate_No_of_Digits >= 7 then + A (S+6) := A(S+0)*B(S+6) + A(S+1)*B(S+5) + A(S+2)*B(S+4) + + A(S+3)*B(S+3) + A(S+4)*B(S+2) + A(S+5)*B(S+1) + + A(S+6)*B(S+0); + end if; + + + if Ultimate_No_of_Digits >= 6 then + A (S+5) := A(S+0)*B(S+5) + A(S+1)*B(S+4) + A(S+2)*B(S+3) + + A(S+3)*B(S+2) + A(S+4)*B(S+1) + A(S+5)*B(S+0); + end if; + + + A (S+4) := A(S+0)*B(S+4) + A(S+1)*B(S+3) + A(S+2)*B(S+2) + + A(S+3)*B(S+1) + A(S+4)*B(S+0); + + A (S+3) := A(S+0)*B(S+3) + A(S+1)*B(S+2) + A(S+2)*B(S+1) + + A(S+3)*B(S+0); + + A (S+2) := A(S+0)*B(S+2) + A(S+1)*B(S+1) + A(S+2)*B(S+0); + + A (S+1) := A(S+0)*B(S+1) + A(S+1)*B(S+0); + + A (S+0) := A(S+0)*B(S+0); + + end Product_if_digits_fewer_than_17; + + begin + + -- Step 0. Handle the Zeros. We'll say 0 * infinity is 0, since inf is + -- really just a large finite number. + + if X.Is_Zero or Y.Is_Zero then + X := Zero; + return; + end if; + + -- Step 1. If one or more is infinite..Notice inf * inf = inf here. + + if Y.Is_Infinite or X.Is_Infinite then + if X.Is_Positive xor Y.Is_Positive then -- opposite signs. + X := Negative_Infinity; + else + X := Positive_Infinity; + end if; + return; + end if; + + -- Step 3. Do the multiplication. We can only sum 32 elements of the sum + -- before the carrys must be done. + + -- We use the inner product version (inner loop is an + -- dot-product.) To get the outer product version (BLAS routine DAXPY), + -- interchange the order of the loops. + -- + -- Here's the idea: + -- for Digit_ID in Digit_Index loop + -- Sum := 0.0; + -- for k in 0..Digit_ID loop + -- Sum := Sum + X(k) * Y(Digit_ID-k); + -- end loop; + -- Z(Digit_ID) := Sum; + -- end loop; + -- + -- Break sum into segments of 32 each in index k. Perform a carry + -- after each sum of 32 elements. + + if (Ultimate_No_of_Digits < 9 and then No_Of_Bits_In_Radix <= 30) or + (Ultimate_No_of_Digits < 17 and then No_Of_Bits_In_Radix <= 29) then + + -- Notice we only change X here, not above, in case the else is used. + + X.Is_Positive := not (X.Is_Positive XOR Y.Is_Positive); + + X.Exp := X.Exp + Y.Exp; -- Will be further adjusted by "Normalize.." routine. + + Product_if_digits_fewer_than_17; + + Do_Carrying_For_Multiplication (X, Digit_Minus_1, Digit_Minus_2); + + else + + X := General_Multiplication (X, Y); + return; + + end if; + + Normalize (X, Digit_Minus_1, Digit_Minus_2); + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. This analysis is all isolated to the end + -- of the arithmetic routines so that it is more easily modified to + -- raise exceptions if that's what is desired. In order to do it + -- here, must assume that the parmeters Min_Exponent and Max_Exponent + -- limit the dynamic range of the Exp to about 1/4 of that allowed + -- by the base type used to represent the exponent. This is + -- checked in the spec with an assertion. (The reason is, the above + -- code will go well outside the accepted range of Exp with out being + -- checked till down here.) This limit is OK because the base type + -- allows excessively large exponents anyway, up to 2**31-1. + + if X.Exp < Min_Exponent then + X := Zero; + end if; + + if X.Exp > Max_Exponent then + if X.Is_Positive then + X := Positive_Infinity; + else + X := Negative_Infinity; + end if; + end if; + + end Mult; + + ----------------------- + -- Multiply_In_Place -- + ----------------------- + + function Multiply_In_Place + (X, Y : e_Real) + return e_Real + is + Z : e_Real := X; + begin + Mult (Z, Y); + return Z; + end Multiply_In_Place; + + function "*" (X : e_Real; Y : e_Real) return e_Real renames Multiply_In_Place; + --function "*" (X : e_Real; Y : e_Real) return e_Real renames Multiply_stnd; + -- These 2 about the same. Multiply_In_Place calls procedure Mult. + + --------- + -- "*" -- + --------- + + -- Multiply the first and only digit of X by Y. + + function "*"(X : e_Digit; + Y : e_Real) return e_Real is + + Z : e_Real; + Digit_Minus_1 : Digit_Type := Digit_Zero; --Essential init + Digit_Minus_2 : constant Digit_Type := Digit_Zero; --Essential init + Carry_Minus_1 : Digit_Type := Digit_Zero; --Essential init + I : Digit_Index; + + begin + + + -- Step 0. Sign etc. + -- Notice this assumes 0 * inf = 0. + + if X.Is_Zero or Y.Is_Zero then + return Zero; + end if; + + + -- Step 1. Infinities. Digit can't be inf. + + Z.Is_Positive := not (X.Is_Positive XOR Y.Is_Positive); + + if Y.Is_Infinite then + if Z.Is_Positive then + return Positive_Infinity; + else + return Negative_Infinity; + end if; + end if; + + Z.Is_Zero := False; + Z.Exp := X.Exp + Y.Exp; + + for I in Digit_Index'First .. Digit_Index'Last loop + Z.Digit(I) := X.Digit * Y.Digit(I); + end loop; + + + -- Step 2. Do the carries. This is simpler than the more general version + -- because you carry at most one digit. (The Max is of Digit*Digit is + -- Radix**2 - 2*Radix + 1. This results in a carry of approx. Radix. Add + -- this the the next higher order digit to get a max of Radix**2 - Radix. + -- So still don't overflow the Range over which a single carry is + -- all that's needed: 0..Radix**2 - 1.) + -- Carry_Minus_2 is always zero, and Digit_Minus_2 is always 0. + + for I in reverse Digit_Index'First+1 .. Digit_Index'Last loop + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (Z.Digit(I)); + Z.Digit(I) := Z.Digit(I) - Carry_Minus_1 * Digit_Radix; + Z.Digit(I-1) := Z.Digit(I-1) + Carry_Minus_1; + end loop; + + -- Special case I = Digit_Index'First = 0 + + I := 0; + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (Z.Digit(I)); + Z.Digit(I) := Z.Digit(I) - Carry_Minus_1 * Digit_Radix; + Digit_Minus_1 := Carry_Minus_1; + + + -- Step 3. Must Normalize: shift digit array to make sure that the first + -- digit is non-zero. + + Normalize (Z, Digit_Minus_1, Digit_Minus_2); + + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. + + if Z.Exp < Min_Exponent then + Z := Zero; + end if; + + if Z.Exp > Max_Exponent then + if Z.Is_Positive then + Z := Positive_Infinity; + else + Z := Negative_Infinity; + end if; + end if; + + return Z; + + end "*"; + + ------------- + -- Scaling -- + ------------- + + -- S'Scaling (X, Exp) + -- Exp + -- Let v be the value X*T'Machine_Radix . If v is a + -- machine number of the type T, or if |v|GT'Model_Small, + -- the function yields v; otherwise, it yields either one + -- of the machine numbers of the type T adjacent to v. + -- Constraint_Error is optionally raised if v is outside + -- the base range of S. A zero result has the sign of X + -- when S'Signed_Zeros is True. + -- + function Scaling (X : e_Digit; + Adjustment : e_Integer) return e_Digit is + X2 : e_Digit := X; + begin + if X.Is_Zero then + return X2; + end if; + + X2.Exp := X.Exp + Adjustment; + + if X2.Exp < Min_Exponent or else X2.Exp > Max_Exponent then + raise Constraint_Error with "Exp out of range in Scaling e_Digit operation."; + end if; + + return X2; + end Scaling; + + ------------------- + -- Make_Extended -- + ------------------- + + -- Turn Digit into Extended: + + function Make_Extended (X : e_Digit) return e_Real is + Z : e_Real; -- initialized to Zero. Import. + begin + + Z.Digit(0) := X.digit; + Z.Is_Positive := X.Is_Positive; + Z.Exp := X.Exp; + Z.Is_Zero := X.Is_Zero; + Z.Is_Infinite := False; + return Z; + + end Make_Extended; + + ------------ + -- Sum_Of -- + ------------ + + -- Optimized SUM routine for e_Digit + e_Real -> e_Real. + + function Sum_Of (X : e_Digit; Y : e_Real) return e_Real is + Z : e_Real := Y; + Delta_Exp : e_Integer; + + New_Digit_1 : Digit_Type := Digit_Zero; + New_Digit_2 : Digit_Type := Digit_Zero; + Need_To_Carry : Boolean := False; + + type Max_Info is (X_Is_Max, Y_Is_Max); + Max_Num_ID : Max_Info := X_Is_Max; + + begin + + -- Step 0. If Either of the numbers is 0.0, then return the other. + + if X.Is_Zero then + return Y; + elsif Y.Is_Zero then + return Make_Extended (X); + end if; + + -- Step 0b. If X is infinite, it doesn't matter what do. + + if Y.Is_Infinite then + return Y; + end if; + + -- Step 0c. If one is positive and the other neg., then are lazy. + -- Do it the slow way. + + if X.Is_Positive XOR Y.Is_Positive then + return (Y + Make_Extended (X)); + end if; + + -- Step 1. Now they either both Positive or both Neg. Sum them if its + -- easy and return with the right sign. Start by finding the larger + -- number, Exponent-wise. *Notice Y.Exp = X.Exp is classified as Y_Is_Max.* + + if X.Exp > Y.Exp then + Max_Num_ID := X_Is_Max; + else + Max_Num_ID := Y_Is_Max; + end if; + + Delta_Exp := Abs (X.Exp - Y.Exp); + + if Delta_Exp > e_Integer(Digit_Index'Last) then -- ie, Delta_Exp >= No_Of_Digits + case Max_Num_ID is + when Y_Is_Max => + return Y; + when X_Is_Max => + return Make_Extended (X); + end case; + end if; + + -- Step 2. If the digit X has the smaller exponent, then try + -- an optimization. Otherwise just use the full scale "+". + -- We verified above that Delta_Exp is in range of index of X.Digit. + -- The optimization covers the most common case by far in the "/" routine + -- and most other uses of this function: Y > X, and same sign. + -- We allow at most one carry..if more are required, give up trying. + + if Max_Num_ID = Y_Is_Max then + + New_Digit_1 := X.digit + Y.Digit(Delta_Exp); + if New_Digit_1 > Digit_Radix_Minus_1 then + Need_To_Carry := True; + end if; + + if Need_To_Carry and then (Delta_Exp = 0) then -- would have to normalize. + goto Abort_Optimization; + end if; + + if Need_To_Carry then + New_Digit_1 := New_Digit_1 - Digit_Radix; + New_Digit_2 := Y.Digit(Delta_Exp-1) + Digit_One; -- Carry. + if New_Digit_2 > Digit_Radix_Minus_1 then -- give up trying. + goto Abort_Optimization; + end if; + end if; + + -- If got this far, then are going through with it. + -- just change the 1 or 2 digits of X, call it Z, and return it: + + -- Z := Y; + -- Z is initialized to Y: + + Z.Digit(Delta_Exp) := New_Digit_1; + if Need_To_Carry then + Z.Digit(Delta_Exp-1) := New_Digit_2; + end if; + + -- Recall that X and Y have same sign. This must be the + -- sign of Z. OK, the Z := Y establishes this. + + return Z; + + end if; + + <> + + -- Step 3. If got this far, the optimization failed. Do it the + -- slow way. + + return (Y + Make_Extended (X)); + + end Sum_Of; + + --------- + -- "/" -- + --------- + + -- Calculate Quotient = X / Y. + -- Schoolboy algorithm. Not fast. Get Quotient a digit at a time. + -- Estimate trial Next_Digit by dividing first few digits of X by first few + -- digits of Y. Make this ratio into the proper range for a digit. + -- Multiply this Next_Digit by Y to get Product. (This step is why do it + -- a digit at a time. The above multiplication by Y can be highly optimized + -- then.) Subtract Product from X to get Remainder. Increment Quotient + -- by Next_Digit. Repeat the above steps with X replaced by Remainder. + -- Continue until Remainder has an exponent so small that subsequent + -- Next_Digit's are too small to contribute to Quotient. + -- + -- Few remarks: Next_Digit may not really be the next digit in Quotient. + -- It should usually be, but sometimes its too small, and very rarely, its + -- too large. Nevertheless, you increment Quotient by Next_Digit and + -- converge on the answer. Also, when Next_Digit is too large, Remainder + -- becomes negative, and one *subtracts* the next Next_Digit from quotient, + -- and *adds* the next Product to Remainder, and continues doing this as + -- long as Remainder is negative. In other words can converge on the + -- quotient from either above or below. So are calculating each + -- iteration another pseudo-digit, DigitJ, such that, for example. + -- + -- Y * (Digit0 + Digit1 + Digit3 - Digit4 + Digit5 ...) = X, + -- + -- Add up the digits to get the approximation of X / Y. + -- Some are negative (rare), some positive, but they add up to Quotient. + -- More precisely, + -- + -- X - Y * (Digit0 + Digit1 + Digit3 - Digit4 + Digit5 ...) = Remainder, + -- + -- where X.Exp - Remainder.Exp > Digit_Index'Last. Further + -- Digits contribute negligably to Quotient when this inequality holds. + -- For example, if there are 2 digits, then Digit_Index'Lasts = 1, so that if + -- X.Exp - Remainder.Exp > Digit_Index'Last = 1 then the difference in + -- exponents guarantees that subsequent iterations contribute to digits + -- beyond Digit_Index'Last in the Quotient. + + function "/" (X, Y : e_Real) + return e_Real + is + Delta_Remainder : e_Real; + Remainder_Of_X : e_Real := X; -- init important. + Quotient : e_Real; -- Initialized to zero (important). + Next_Digit : e_Digit; -- Initialized to zero (important). + Next_Digit_Val : Digit_Type; + Real_Next_Digit_Val : Real; + Real_Y, Inverse_Real_Y, Real_Remainder_Of_X : Real; + Decrement_Digit_Exp : Boolean := False; + + Count : e_Integer := 0; + Max_Allowed_Iterations : constant e_Integer := Digit_Index'Last * 2; + begin + -- Step 0. Handle Zeros and Infinities. Do signs at very end. Until + -- the very end pretend that Quotient is positive. + + if Y.Is_Zero then + raise Constraint_Error with "Division by zero."; + end if; + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite and Y.Is_Infinite then + raise Constraint_Error with "Division of inf by inf is undefined."; + end if; + + if X.Is_Infinite then + if (X.Is_Positive xor Y.Is_Positive) then + return Negative_Infinity; + else + return Positive_Infinity; + end if; + end if; + + if Y.Is_Infinite and (not X.Is_Infinite) then + return Zero; + end if; + + -- Step 1. We initialized Remainder_Of_X to X. Below make it positive, + -- and assume all quantities are positive. Signs are set at end. + -- Inverse_Real_Y is in range [1.0/(Radix-Eps) .. 1.0]. + -- Real_Remainder_Of_X is in range [1.0 .. (Radix-Eps)]. + -- Next_Digit is Real_Floor (Real_Remainder_Of_X*Inverse_Real_Y) where + -- the product has been multiplied by Radix if its less than 1.0. + -- Possible range of Next_Digit: Max is Floor (Radix-eps / 1.0), which + -- is Radix-1. Min is Floor (Radix * (1.0 / (Radix-eps))) = 1.0. + + Real_Y := Real (Y.Digit(0)) + + Real (Y.Digit(1)) * Inverse_Radix + + Real (Y.Digit(2)) * Inverse_Radix_Squared; + + Inverse_Real_Y := Real_One / Real_Y; + + -- Pretend all quantities are positive till end. + Quotient.Is_Positive := True; + Remainder_Of_X.Is_Positive := True; -- This may go negative. + + -- Iterate until the remainder is small enough: + + Iteration: for Iter in e_Integer range 1..Max_Allowed_Iterations loop + + -- Initialized to X, so its not zero. Must lead with a nonzero digit. + -- Important here to sum ALL three leading digits of Remainder_Of_X. + + Real_Remainder_Of_X := Real (Remainder_Of_X.Digit(0)) + + Real (Remainder_Of_X.Digit(1))*Inverse_Radix + + Real (Remainder_Of_X.Digit(2))*Inverse_Radix_Squared; + + Real_Next_Digit_Val := Real_Remainder_Of_X * Inverse_Real_Y; + + -- Need Next_Digit in proper range for Mult_For_Div fctn: 1..Radix-1: + + Decrement_Digit_Exp := False; + if Real_Next_Digit_Val < Real_One then + Real_Next_Digit_Val := Real_Next_Digit_Val * Real_Radix; + Decrement_Digit_Exp := True; + end if; + + Next_Digit_Val := Digit_Floor (Real_Next_Digit_Val); + + if Next_Digit_Val > Digit_Radix_Minus_1 then + Next_Digit_Val := Digit_Radix_Minus_1; + end if; + + --if Next_Digit_Val < Digit_One then -- Should never happen, but test it. + -- Next_Digit_Val := Digit_One; + --end if; + + + -- Step 2. We now have Next_Digit, which is approx. Remainder_Of_X / Y. + -- We are ready to make Next_Digit an e_Digit number. + -- (so "*" can be optimized.) It must be in + -- the range 1..Radix-1, so truncate first. It has the exponent of + -- the Remainder_Of_X.Exp - Y.Exp, but decremented by 1.0 if multiplied + -- Next_Digit by Radix above. + -- Below remember that Next_Digit was initialized to Zero above, + -- and all that is changed in this loop is the digit value, and the Exp. + -- Also want the product of Next_Digit and Y to be >0, regardless of Y. + -- The conversion from Real to e_Digit could be done by the + -- function Make_e_Digit, but lets optimize: + + Next_Digit.Digit := Next_Digit_val; + Next_Digit.Exp := Remainder_Of_X.Exp - Y.Exp; + Next_Digit.Is_Zero := False; + Next_Digit.Is_Positive := True; + + if Decrement_Digit_Exp then + Next_Digit.Exp := Next_Digit.Exp - 1; + end if; + + + -- Step 3. Make the trial product of the next digit with the divisor.. + -- this will be subtracted from the remainder. + + Delta_Remainder := Next_Digit * Y; + Delta_Remainder.Is_Positive := True; + + + -- Step 3. Calculate the new "Quotient" and the new "Remainder_Of_X". + -- Add Extended_Next_Digit to Quotient, if Old Remainder was > 0. + -- Subtract Extended_Next_Digit from Quotient if Old Remainder was < 0. + -- If the Old Remainder = 0 then are done; that was checked on + -- previous pass of loop by line below. It was checked initially by + -- by checking if X = 0. (Remainder was initialized to X, and then made + -- positive). + + if Remainder_Of_X.Is_Positive then + -- Add Next_Digit to Quotient, and subtract Delta_Remainder from Remainder. + Delta_Remainder.Is_Positive := False; + Next_Digit.Is_Positive := True; + else + -- Subtract Next_Digit from Quotient, and add Delta_Remainder to Remainder. + Delta_Remainder.Is_Positive := True; + Next_Digit.Is_Positive := False; + end if; + + Remainder_Of_X := Remainder_Of_X + Delta_Remainder; + Quotient := Sum_Of (Next_Digit, Quotient); + + + -- Step 4. Are finished? + -- Remainder_Of_X.Exp started at X.Exp. Have made it small enough? + -- Remember that the calls above may have returned Infinity. Or if they + -- returned Zero, then the Exponent is 0. (None of these should happen tho.) + + if Remainder_Of_X.Is_Zero then exit Iteration; end if; + + if Remainder_Of_X.Is_Infinite then exit Iteration; end if; + + if (X.Exp-Remainder_Of_X.Exp) > Digit_Index'Last then exit Iteration; end if; + + Count := Iter; + + end loop Iteration; + + -- Max_Allowed_Iterations is twice the number usually required. I've + -- never seen it happen, but just in case: + + if Count = Max_Allowed_Iterations then -- Should raise error? + raise Program_Error with "Convergence problem in division routine."; + end if; + --Print_Text (Integer'Image(Count)); + --Print_Text (" Iterations."); + --text_io.new_line; + + + -- Step 5. Set Sign of Quotient. Handle over and under flow. + -- Here underflow goes to Zero, overflow to Infinity. + -- We can do this here because allowed room to maneuver in range of Exp. + + Quotient.Is_Positive := not (X.Is_Positive xor Y.Is_Positive); + + if Quotient.Exp < Min_Exponent then + Quotient := Zero; + end if; + + if Quotient.Exp > Max_Exponent then + if Quotient.Is_Positive then + Quotient := Positive_Infinity; + else + Quotient := Negative_Infinity; + end if; + end if; + + return Quotient; + + end "/"; + + --------- + -- "*" -- + --------- + + -- Multiply the first and only digit of X by the first and only digit + -- of Y. the reduction in Carrying is the big win here. + + function "*"(X : e_Digit; + Y : e_Digit) return e_Real is + + Z : e_Real; -- Init important. + Digit_Minus_1 : Digit_Type := Digit_Zero; -- Init important. + Digit_Minus_2 : constant Digit_Type := Digit_Zero; -- Init important. + Carry_Minus_1 : Digit_Type := Digit_Zero; -- Init important. + I : Digit_Index; + + begin + + + -- Step 0. Zeros. + + if X.Is_Zero or Y.Is_Zero then + return Zero; + end if; + + Z.Is_Zero := False; + + + -- Step 1. Sign and Infinities. Digits can't be inf. + + Z.Is_Positive := not (X.Is_Positive XOR Y.Is_Positive); + + Z.Exp := X.Exp + Y.Exp; + + Z.Digit(0) := X.Digit * Y.Digit; + + + -- Step 2. Do the carries. This is simpler than the more general version + -- because you carry at most one digit. (The Max is of Digit*Digit is + -- Radix**2 - 2*Radix + 1. This results in a carry of approx. Radix. Add + -- this the the next higher order digit to get a max of Radix**2 - Radix. + -- So still don't overflow the Range over which a single carry is + -- all that's needed: 0..Radix**2 - 1.) + -- Carry_Minus_2 is always zero, and Digit_Minus_2 is always 0. + + I := 0; + Carry_Minus_1 := Shift_Right_No_of_Bits_in_Radix (Z.Digit(I)); + Z.Digit(I) := Z.Digit(I) - Carry_Minus_1 * Digit_Radix; + Digit_Minus_1 := Carry_Minus_1; + + + -- Step 3. Must Normalize: shift digit array to make sure that the first + -- digit is non-zero. + + Normalize (Z, Digit_Minus_1, Digit_Minus_2); + + + -- Step 4. Handle over and under flow. Here underflow goes to Zero, + -- overflow to Infinity. + + if Z.Exp < Min_Exponent then + Z := Zero; + end if; + + if Z.Exp > Max_Exponent then + if Z.Is_Positive then + Z := Positive_Infinity; + else + Z := Negative_Infinity; + end if; + end if; + + return Z; + + end "*"; + + + --------- + -- "/" -- + --------- + + -- The only difference between this "/" and the general "/" is that + -- the (e_Digit * e_Real) operation has been replaced + -- with an (e_Digit * e_Digit) operation. This produces + -- big savings in time. This is used in elementary math function + -- routines, where the efficiency is important. + + function "/" (X : e_Real; Y : e_Digit) return e_Real is + Delta_Remainder : e_Real; + Remainder_Of_X : e_Real := X; -- init important. + Quotient : e_Real; -- Initialized to zero (important). + Next_Digit : e_Digit; -- Initialized to zero (important). + Real_Next_Digit_Val : Real; + Next_Digit_Val : Digit_Type; + Real_Y, Inverse_Real_Y, Real_Remainder_Of_X : Real; + Decrement_Digit_Exp : Boolean := False; + + Count : e_Integer := 0; + Max_Allowed_Iterations : constant e_Integer := Digit_Index'Last * 2; + begin + + + -- Step 0. Handle Zeros and Infinities. Do signs at very end. Until + -- the very end pretend that Quotient is positive. + + if Y.Is_Zero then + raise Constraint_Error with "Division by zero."; + end if; + + if X.Is_Zero then + return Zero; + end if; + + if X.Is_Infinite then + if (X.Is_Positive xor Y.Is_Positive) then + return Negative_Infinity; + else + return Positive_Infinity; + end if; + end if; + + + -- Step 1. We initialized Remainder_Of_X to X. Below make it positive, + -- and assume all quantities are positive. Signs are set at end. + -- Possible range of Next_Digit: Max is Floor (Radix-eps / 1.0), which + -- is Radix-1. Min is Floor (Radix * (1.0 / (Radix-eps))) = 1.0. + -- Remember, X and Y are normalized and we've tested for 0's, so + -- both are in the range 1..Radix-1. + + Real_Y := Real (Y.Digit); -- >= 1.0, since its quantized. + Inverse_Real_Y := Real_One / Real_Y; + + -- Pretend all quantities are positive till end. + Quotient.Is_Positive := True; + Remainder_Of_X.Is_Positive := True; -- This may go negative. + + -- Iterate until the remainder is small enough. + + Iteration: for Iter in e_Integer range 1..Max_Allowed_Iterations loop + + Real_Remainder_Of_X := + Real (Remainder_Of_X.Digit(0)) + + Real (Remainder_Of_X.Digit(1))*Inverse_Radix + + Real (Remainder_Of_X.Digit(1))*Inverse_Radix_Squared; + + Real_Next_Digit_Val := Real_Remainder_Of_X * Inverse_Real_Y; + + -- Need Next_Digit in proper range for Mult_For_Div fctn: 1..Radix-1: + + Decrement_Digit_Exp := False; + if Real_Next_Digit_Val < Real_One then + Real_Next_Digit_Val := Real_Next_Digit_Val * Real_Radix; + Decrement_Digit_Exp := True; + end if; + + Next_Digit_Val := Digit_Floor (Real_Next_Digit_Val); + + if Next_Digit_Val > Digit_Radix_Minus_1 then + Next_Digit_Val := Digit_Radix_Minus_1; + end if; + + --if Next_Digit_Val < Digit_One then -- Should never happen. Perform tests w/o + -- Next_Digit_Val := Digit_One; + --end if; + + + -- Step 2. We now have Next_Digit, which is approx. Remainder_Of_X / Y. + -- We are ready to make Next_Digit an e_Digit number. + -- (so "*" can be optimized.) It must be in + -- the range 1..Radix-1, so truncate first. It has the exponent of + -- the Remainder_Of_X.Exp - Y.Exp, but decremented by 1.0 if multiplied + -- Next_Digit by Radix above. + -- Below remember that Next_Digit was initialized to Zero above, + -- and all that is changed in this loop is the digit value, and the Exp. + -- Also want the product of Next_Digit and Y to be >0, regardless of Y. + -- The conversion from Real to e_Digit could be done by the + -- function Make_e_Digit, but lets optimize: + + Next_Digit.Digit := Next_Digit_Val; + Next_Digit.Exp := Remainder_Of_X.Exp - Y.Exp; + Next_Digit.Is_Zero := False; -- Is this always true? + Next_Digit.Is_Positive := True; + + if Decrement_Digit_Exp then + Next_Digit.Exp := Next_Digit.Exp - 1; + end if; + + + -- Step 3. Make the trial product of the next digit with the divisor.. + -- this will be subtracted from the remainder. + + Delta_Remainder := Next_Digit * Y; + Delta_Remainder.Is_Positive := True; + + + -- Step 3. Calculate the new "Quotient" and the new "Remainder_Of_X". + -- Add Extended_Next_Digit to Quotient, if Old Remainder was > 0. + -- Subtract Extended_Next_Digit from Quotient if Old Remainder was < 0. + -- If the Old Remainder = 0 then are done; that was checked on + -- previous pass of loop by line below. It was checked initially by + -- by checking if X = 0. (Remainder was initialized to X, and then made + -- positive). + + if Remainder_Of_X.Is_Positive then + -- Add Next_Digit to Quotient, and + -- subtract Delta_Remainder from Remainder. + Delta_Remainder.Is_Positive := False; + Next_Digit.Is_Positive := True; + else + -- Subtract Next_Digit from Quotient, and + -- add Delta_Remainder to Remainder. + Delta_Remainder.Is_Positive := True; + Next_Digit.Is_Positive := False; + end if; + + Remainder_Of_X := Remainder_Of_X + Delta_Remainder; + Quotient := Sum_Of (Next_Digit, Quotient); + + + -- Step 4. Are finished? + -- Remainder_Of_X.Exp started at X.Exp. Have made it small enough? + -- Remember that the calls above may have returned Infinity. Or if they + -- returned Zero, then the Exponent is 0. (None of these should happen tho.) + + if Remainder_Of_X.Is_Zero then exit Iteration; end if; + + if Remainder_Of_X.Is_Infinite then exit Iteration; end if; + + if (X.Exp-Remainder_Of_X.Exp) > Digit_Index'Last then exit Iteration; end if; + + Count := Iter; + + end loop Iteration; + + + -- Max_Allowed_Iterations is twice the number usually required. I've + -- never seen it happen, but just in case: + + if Count = Max_Allowed_Iterations then -- Should raise error? + raise Program_Error with "Convergence problem in division routine."; + end if; + --Print_Text (Integer'Image(Count)); + --Print_Text (" Iterations."); + --text_io.new_line; + + + -- Step 4. Set Sign of Quotient. Handle over and under flow. + -- Here underflow goes to Zero, overflow to Infinity. + + Quotient.Is_Positive := not (X.Is_Positive xor Y.Is_Positive); + + if Quotient.Exp < Min_Exponent then + Quotient := Zero; + end if; + + if Quotient.Exp > Max_Exponent then + if Quotient.Is_Positive then + Quotient := Positive_Infinity; + else + Quotient := Negative_Infinity; + end if; + end if; + + return Quotient; + + end "/"; + + ---------- + -- "**" -- + ---------- + + -- Standard algorithm. Write N in binary form: + -- + -- N = 2**m0 + 2**m1 + 2**m2 + ... + 2**mn, + -- + -- where the m0, m1, m2...are the indices of the nonzero binary digits of N. + -- Then + -- + -- X**N = X**(2**m0) * X**(2**m1) * X**(2**m2) * ... * X**(2**mn). + -- + -- We have written X as product of Powers of X. + -- Powers of X are obtained by squaring X, squaring the square of X, + -- etc. + -- + -- 0.0**0 is defined to be 1.0. Anything to the 0 is defined to be 1. + -- + function "**" + (X : e_Real; + N : Integer) + return e_Real + is + Power_Of_X, Product : e_Real; + Last_Bit_ID : constant Integer := Integer'Size - 2; + subtype Bit_Index is Integer range 0..Last_Bit_ID; + type Bit_Array is array(Bit_Index) of Integer; + Bit : Bit_Array := (others => 0); + Exp : Integer := Abs (N); + Final_Bit : Bit_Index; + Exp_Is_Even : Boolean; + begin + -- The following seems to be what the lrm implies, even if X=0.0: + -- If the exponent N is zero then return 1. (If X is inf this makes + -- sense since inf is really just a large finite number.) + + if N = 0 then + return One; + end if; + + -- Next, if the argument X is zero, set the result to zero. + -- If the Exponent is negative then raise constraint error. + + if X.Is_Zero then + if N > 0 then + return Zero; + else + raise Constraint_Error with "Error in ** operation, division by 0.0."; + end if; + end if; + + -- If the argument X is inf, set the result to inf, or 0 if N < 0. + -- If X < 0 then sign depends on whether Exp is even or odd. + -- (IS THIS TRUE if N=0? Will assume NO.) + + if X.Is_Infinite then + Exp_Is_Even := ((Exp rem 2) = 0); + + if X.Is_Positive then + Product := Positive_Infinity; + else + if Exp_Is_Even then + Product := Positive_Infinity; + else + Product := Negative_Infinity; + end if; + end if; + + if N < 0 then + return Zero; + else + return Product; + end if; + end if; + + -- Should try to avoid possible run-time errors (if, for example, + -- N * X.Exp > Max_Exponent)? No: the following + -- algorithm uses extended precision "*", which will overflow to + -- inf and short-circuit the process efficiently. + -- + -- Get binary form of the exponent Exp = Abs(N) + + for I in Bit_Index loop + Bit(I) := Exp REM 2; + Exp := Exp / 2; + Final_Bit := I; + if Exp = 0 then + exit; + end if; + end loop; + + -- Do the arithmetic. + + Product := One; -- X**0 + Power_Of_X := X; -- X**(2**0). Preserves sign of X if N is odd. + if Bit(Bit_Index'First) /= 0 then + --Product := Product * Power_Of_X; + Mult (Product, Power_Of_X); + end if; + for I in Bit_Index'First+1 .. Final_Bit loop + --Power_Of_X := Power_Of_X * Power_Of_X; -- X**(2**I) + Square (Power_Of_X); -- X**(2**I), good speed-up + if Bit(I) = 1 then + --Product := Product * Power_Of_X; + Mult (Product, Power_Of_X); + end if; + end loop; + + -- Under flow to zero. THe "/" operator should correctly do this, but + -- its important to other routines, so make sure its done right here: + + if Product.Is_Infinite and N < 0 then + return Zero; + end if; + + if N < 0 then + Product := One / Product; -- notice we've already checked for X=0. + end if; + + -- need to do the final over/under flow check? No. The "/" and + -- "*" did that for us above. + + return Product; + + end "**"; + +end Extended_Real; + diff --git a/arbitrary/extended_real.ads b/arbitrary/extended_real.ads new file mode 100644 index 0000000..b073d79 --- /dev/null +++ b/arbitrary/extended_real.ads @@ -0,0 +1,752 @@ + +-- PACKAGE Extended_Real +-- +-- package Extended_Real provides: +-- An arbitrary precision floating-point data type: e_Real. +-- +-- Lower limit on precision is 28 decimals. No upper limit is +-- enforced. All internal arithmetic is done on 64-bit Integers, +-- so its most efficient on 64-bit CPU's. The package is Pure. +-- Floating point attributes (Ada 95) are implemented as function +-- calls. The package exports standard floating point operators: +-- "*", "+", "/", "**", "Abs", "<", ">", "<=" , ">=", etc. +-- The standard operators make it easy to modify existing code to +-- use extended precision arithmetic. Procedure calls Mult(X,Y) and +-- Square(X) are also provided. They do multiplication "in-place", +-- (overwrite X with the result) and are somewhat faster than the +-- equivalent X := X*Y, and X := X*X. +-- +-- To set the precision search below for: +-- +-- Desired_Decimal_Digit_Precision +-- +-- package Extended_Real.Elementary_Functions provides: +-- Sin, Cos, Sqrt, Arcsin, Arccos, Arctan, Log, Exp, Reciprocal (1/x), +-- Reciprocal_Nth_Root (x to the power of -1/N), Divide, and "**" for +-- e_Real arguments and e_Real exponents. Routines are Ada 95'ish. +-- +-- package e_Derivs provides: +-- Extended precision routines for taking high order derivatives of +-- functions. Functions made from "*", "+", "/", "**", Sin, Cos, +-- Sqrt, Arcsin, Arccos, Arctan, Log, Exp, Compose = f(g(x)), +-- and Reciprocal can be differentiated to order specified by user. +-- +-- package Extended_Real.IO provides: +-- Text to extended-precision e_Real translation routines, and +-- e_Real to Text translation routines. +-- +-- package Extended_Real.Rand provides: +-- a (very) basic Random Number Generator. Really just for making +-- test vectors. +-- +-- procedure e_real_demo_1.adb is: +-- an introductory routine that demonstrates use of Extended_Real. +-- +-- procedure e_function_demo_1.adb is: +-- an introductory routine that demonstrates use of +-- Extended_Real.Elementary_Functions. +-- +-- procedure e_jacobi_eigen_demo_1.adb demonstrates: +-- extended-precision eigen-decomposition on Hilbert's matrix using +-- package e_Jacobi_Eigen. +-- +-- package e_Jacobi_Eigen is: +-- a Jabobi iterative eigen-decomposition routine +-- that shows how easy it is to upgrade a floating point routine +-- to extended precision. e_Jacobi_Eigen uses package Extended_Real. +-- +-- good optimization on gcc/GNAT: +-- -gnatNp -O3 -march="your machine architecture" -funroll-loops -ffast-math +-- (sometimes:-ftree-vectorize -funroll-all-loops -falign-loops=4, +-- -falign-loops=3, or -frename-registers are worth trying.) +-- +-- latest GNAT (gcc 4.3) try +-- gnatmake -gnatNp -O3 -march=native -mtune=native -funroll-loops -ffast-math +-- +-- Always do a preliminary run which exercizes Assertions, and other Checks: +-- -gnato -gnatV -gnata +-- +-- +-- Because precision is arbitrary, Extended_Real is not specially +-- optimized for any particular precision. The chosen design works best +-- in the limit of 100's of decimal digits. If the package had been +-- designed for 32 decimal digits of precision, then almost every feature +-- of the design would have been different. On the other hand, performance +-- seems to be respectable on 64-bit CPU's even at the lower limit (eg +-- 28 or 38 decimal digits). (Comparison is with Intel's machine-optimized +-- 32 decimal-digit floating point: i.e. Intel Fortran Real*16 on an Intel +-- 64-bit CPU.) 32 digit floating point is probably the most often used +-- (and most often needed) extended precision floating point. +-- Most Fortrans (including the gcc Fortran) don't offer anything higher +-- than 18 digit floating point. +-- +-- Common applications: +-- 0. Estimation of error in lower precision floating-point calculations. +-- 1. Evaluation of constants for math routines and Table-driven algorithms. +-- 2. Evaluation of series solutions of special function, especially when +-- the terms are constructed of large factorials and exponentials. +-- 3. Evaluation of recurrance relations for special functions. +-- +-- Generics greatly reduce the work you have to do in modifying programs +-- to use extended floating point: +-- +-- 1. place generic formal declarations +-- of the required extended arithmetic functions at the the top of the +-- package or subprogram to be modified. +-- +-- 2. use the unary "-" and "+" routines that convert Real to Extended: +-- +-- so that declarations +-- Number : Generic_Formal_Type := +1.234; +-- and statements like +-- Z := (+4.567834E+012) * X; +-- will be acceptible to both Real and Extended types. +-- +-- Underflows to Zero. Overflows to Positive or Negative infinity. I am +-- still trying to decide if the Infinities are worth the trouble, but the +-- trouble isn't great and there seem to be benefits. Sometimes you can +-- put off worrying about overflow in intermediate calculation and test +-- for it at the end by testing for Positive_Infinity. There are no NaNs. +-- +-- At the expense of purity, error messages using text_io can be +-- re-enabled in the body - see top of body of Extended_Real. +-- +--*************************************************************************** +-- +-- SECTION I. +-- +-- Constants and overflow/underflow/constraint_error conventions. +-- +-- To test an arbitrary X : e_Real to see if X is Zero or infinity use the +-- function Are_Equal (X, Zero) etc. Its written to make the test efficiently. +-- +-- Underflows are to (unsigned) Zero; overflows to (signed) infinity: +-- +-- Infinity here means a finite number that is too large to represent +-- in the floating point, and whose inverse is too small to represent in +-- floating point. The following conventions seemed sensible. Treat inf's +-- as Constraint Errors if uneasy with them. Assuming X is a positive e_Real: +-- 0*inf = 0, inf * inf = inf, X / inf = 0, |X| * -inf = -inf, +-- inf + inf = inf, X + inf = inf, -inf * inf = -inf, X - inf = -inf, +-- inf > X = True, -inf < X = True, inf > -inf = True, +-- (inf = -inf) = False +-- +-- Constraint_Error: +-- +-- The following ops have no sensible meaning, so Constraint_Error (ce) is +-- raised. +-- inf - inf => ce, inf / inf => ce, X / 0 => ce, inf / 0 => ce, +-- inf < inf => ce. +-- +--*************************************************************************** +-- SECTION II. +-- +-- Standard arithmetic operators. +-- +-- The arithmetic performed by these routines is supposed to be correct out +-- to the number of decimals specified by Desired_Decimal_Digit_Precision, +-- which you type in at the beginning of the spec. But the arithmetic is +-- actually performed on digits well beyond this limit in order to guarantee +-- this level of accuracy. The values +-- held by these extra digits (guard digits) are usually almost correct. +-- None of the following operators rounds away these guard digits. +-- Rounding is done explicitly by calling Round_Away_Smallest_Guard_Digit(). +-- In particular, none of the following comparison operators ("<", ">=", etc.) +-- rounds away guard digits of operands before performing the comparison. +-- All of them perform their comparisons out to the final guard digit. +-- To reduce much confusion, I decided to leave rounding entirely up to the +-- user, with Round_Away_.... Whether its best to round or not +-- depends on the algorithm. In a lot of algorithms its better to +-- round before you use the "Are_Equal" operator, and better not to round +-- when you use the "<" and ">" operators. +-- +-- "=" or Are_Equal(X, Zero) is the most efficient way to find out if X +-- is Zero. Same for Infinity. X < Zero is the efficient way find Sign of X. +-- X > Zero is efficient way to test positivity of X. Zero < X and +-- Zero > X are also handled efficiently. +-- +--*************************************************************************** +-- SECTION III. +-- +-- Routines for conversion from Real to e_Real and back again. +-- +-- Makes it easy to write generics that can be instantiated with either +-- conventional floating point of this extended floating point. +-- The unary + and - are here to make it easier to convert programs from +-- ordinary floating point to extended, by making it easy to replace +-- +-- X : Generic_Float_Type := 1.2345; --here instantiate w/ Float. +-- X := 4.5678 * Y; --here instantiate w/ Float. +-- +-- with +-- +-- X : Generic_Float_Type := +1.2345; --here instantiate w/ e_Real or Float. +-- X := (+4.5678) * Y; --same here. +-- +-- Now you can instantiate the generic with either e_Real or Float, +-- (but you have to add the unary "+" to the list of generic formals.) +-- +--*************************************************************************** +-- SECTION V. +-- +-- Real * Extended operations. +-- +-- More efficient operations. The "Real" is not your ordinary real, but +-- something in the range 0.0 .. Radix-1, and integer valued, though it +-- can have an negative or positive exponent. So its not very appropriate +-- for general use; +-- +-- The Real * Extended operations can be particularly efficient if +-- the Real number is in the same range as a Digit, ie, 0..Radix-1. +-- So we define a type e_Digit, a single real number with an +-- exponent. These mixed multiplication "*" and "/" ops are used by the +-- ascii to real_extended and real_extended to ascii translators, +-- and by Newton's method calculations of elementary functions. +-- This efficiency only comes if the real number can be represented by +-- a single digit: integer values in the range 0..Radix-1, (times +-- an exponent in a power-of-2 Radix. e.g. 0.5 is OK, 1.0/3.0 is not.) +-- Make_e_Digit will raise a constraint error if the range of the +-- intended real number is wrong. +-- +--********************************************************************** +-- INTERNAL FORMAT OF e_Real +-- +-- Internally the extended numbers are stored in such a way that the +-- value of e_Real number X is +-- +-- Max +-- X = Radix**Exp * SUM {Radix**(-I) * Digit(I)}. +-- I=0 +-- +-- Externally, the user sees e_Real (via the Exponent, and Fraction +-- attribute functions) as tho' it were normalized. In other words, the +-- value of X is +-- +-- Max +-- X = Radix**Exp * SUM {Radix**(-I-1) * Digit(I)} +-- I=0 +-- +-- Exp is called the "normalized" exponent. If Exp is the normalized exponent +-- then, say, a binary number would be written: +-- +-- 0.111011010001 * 2**(Exp). +-- +-- In other words the first binary digit in the mantissa is of power 2**(-1). +-- It is important to know this because the function Real'Exponent(x) returns +-- the *normalized* exponent, and the function Real'Fraction(x) returns +-- x * 2**(-Exp) where Exp is the normalized exponent. So in the above case, +-- 'Fraction would return 0.111011010001. +-- Also, in normalized form, the first binary digit of the mantissa is always +-- non-zero. +--*************************************************************************** + +generic + + type Real is digits <>; + -- Make it 15 digits or greater. This is checked. + -- This is usually the type you are going to replace with e_Real. + +package Extended_Real is + + pragma Pure (Extended_Real); + + pragma Assert (Real'Digits >= 15); + + type e_Real is private; -- The extended precision floating pt type. + + -- Instructions: + -- The only things that need to be adjusted by the user are + -- + -- Desired_Decimal_Digit_Precision + -- and + -- Desired_No_Of_Bits_In_Radix + -- + -- The 2 parameters follow next, along with instructions. + + Desired_Decimal_Digit_Precision : constant := 28; + -- If you request 28 Decimal Digits, you usually get 29 or more. + -- If you request 29 to 37 Decimal Digits, you usually get 38 or more. + -- If you request 38 to 46 Decimal Digits, you usually get 47 or more. + -- If you request 47 to 55 Decimal Digits, you usually get 56 or more. + -- (And so on, in jumps of 9. Assumes Desired_No_Of_Bits_In_Radix = 30.) + -- + -- The simple operators "*", "+", "/" usually give the best precision. + -- They should get the 1st guard digit right, and by themselves: + -- If you request 28 Decimal Digits, they're good to about 37 Decimal Digits. + -- If you request 37 Decimal Digits, they're good to about 46 Decimal Digits. + -- + -- Large complicated floating point computations will usually get both + -- guard digits wrong and additional error will accumulate, so: + -- If you request 28 Decimal Digits, ultimately expect <28 Decimal Digits. + -- + -- Lower limit on Desired_Decimal_Digit_Precision is 28. + + pragma Assert (Desired_Decimal_Digit_Precision >= 28); + + + Desired_No_Of_Bits_In_Radix : constant := 30; + -- Anything under 31 works, but should be adjusted for best performance: + -- 30 is good if Desired_Decimal_Digit_Precision is 28 to 55. + -- 29 is good standard setting (use it when Desired_Decimal_Digit_Precision > 55). + -- 28 is good if Desired_Decimal_Digit_Precision >> 200. (But experiment.) + -- + -- 30 is necessary if you want the minimum decimal digits setting: 28. + -- (If you choose 29 bits in Radix, you will get more decimals than you expect.) + + pragma Assert (Desired_No_Of_Bits_In_Radix <= 30); + + + type e_Int is range -2**31+1 .. 2**31-1; + subtype e_Integer is e_Int'Base; + -- Type of Exponent. Also takes the place of Universal_Integer in the + -- "attribute" functions defined below for e_Real. + -- Keep it 32-bit. Smallest usually fastest. + + pragma Assert (e_Integer'Size <= 32); + -- try fit e_Reals into small space; not essential, but Larger is slower. + + + Zero : constant e_Real; + One : constant e_Real; + Positive_Infinity : constant e_Real; + Negative_Infinity : constant e_Real; + -- To test an arbitrary X : e_Real to see if X is Zero or infinity use the + -- function: Are_Equal (X, Zero), or X = Zero etc. Testing for Zero is fast. + -- Infinity here means a finite number that is too large to represent in the + -- floating point, and whose inverse is too small to represent in floating + -- point Zero is positive. + + + + -- SECTION II. Standard operators. + -- + -- To reduce much confusion, rounding is entirely up to the + -- user, with Round_Away_Guard_Digits(). Whether its best to round or not + -- depends on the algorithm. For example, in some cases it is better to + -- round before you use the "Are_Equal" operator, and better not to round + -- when you use the "<" and ">" operators. (see intro.) + + + function "*" (X, Y : e_Real) return e_Real; -- inline can slow it down. + + function "+" (X, Y : e_Real) return e_Real; -- inline can slow it down. + + function "-" (X, Y : e_Real) return e_Real; + + function "+" (X : e_Real) return e_Real; + function "-" (X : e_Real) return e_Real; + + function "/" (X, Y : e_Real) return e_Real; + + function "**"(X : e_Real; + N : Integer) return e_Real; + + procedure Square (X : in out e_Real); + -- Same as X := X * X; (but usually faster if < 120 decimal digits.) + + procedure Mult + (X : in out e_Real; + Y : in e_Real); + -- Same as X := X * Y; (but usually faster if < 120 decimal digits.) + + function "Abs" (X : e_Real) return e_Real; + + function Are_Equal (X, Y : e_Real) return Boolean; + -- Return true only if + -- equality is exact in the cases of Zero and the 2 infinities. + -- Are_Equal(X, Zero) is the most efficient way to find out if X is Zero. + -- Same for Infinity. X < Zero is the efficient way find Sign of X. + -- X > Zero is efficient way to test positivity of X. Zero < X etc. OK too. + + function "<" (X, Y : e_Real) return Boolean; + function "<=" (X, Y : e_Real) return Boolean; + function ">" (X, Y : e_Real) return Boolean; + function ">=" (X, Y : e_Real) return Boolean; + function "=" (X, Y : e_Real) return Boolean renames Are_Equal; + + function Are_Not_Equal (X, Y : e_Real) return Boolean; -- not Are_Equal + + -- SECTION III. Conversions between Real to e_Real. (see intro.) + + + function Make_Real (X : e_Real) return Real; + + function Make_Extended (X : Real) return e_Real; + function "+" (X : Real) return e_Real renames Make_Extended; + function "-" (X : Real) return e_Real; + -- The above 3 functions are identical, except "-" changes sign of X. + -- Makes it easy to write generics that can be instantiated with either + -- conventional floating point of this extended floating point, via: + -- X : Generic_Float_Type := +1.2345; + + function "+" (X : Integer) return e_Real; + -- Only works in range of Real (15 digits usually). + -- + -- raises Constraint_Error + -- + -- if X is greater than about 10**Real'Digits. + -- So X = 2**62 raises Constraint_Error if Real'Digits = 15. + -- Its really just for making e_Reals out of small ints: +7. + + + -- SECTION IV. Ada9X oriented attributes. + -- + -- Below: Machine attributes and the function calls (like Truncation). + -- More information on the machine model is given in the introduction. + -- The machine model is static, so none of the Machine oriented attributes, + -- and none of the functions reflect varying precision. (see intro.) + -- + -- Written in the spirit of the Ada attributes, but the fit is never + -- perfect. + + function Remainder (X, Y : e_Real) return e_Real; + + function Copy_Sign (Value, Sign : e_Real) return e_Real; + + + function e_Real_Machine_Rounds return Boolean; + function e_Real_Machine_Overflows return Boolean; + function e_Real_Signed_Zeros return Boolean; + function e_Real_Denorm return Boolean; + -- These functions always return False. + + function e_Real_Machine_Emax return e_Integer; + + function e_Real_Machine_Emin return e_Integer; + + function e_Real_Machine_Mantissa return e_Integer; + -- Always returns Mantissa'Length: all the digits including guards. + -- + -- NOT binary digits, NOT decimal digits. + + + function e_Real_Machine_Radix return Real; + -- Usually 2.0**29 or 2.0**30 for Integer digits; 2.0**24 for Flt pt digits. + -- Returns: No_of_Bits_in_Radix (as a Real type). + + function Leading_Part (X : e_Real; + Radix_Digits : e_Integer) return e_Real; + -- Example: to set to zero all but the first digit of X use + -- First_Digit := Leading_Part (X, 1); + + function Exponent (X : e_Real) return e_Integer; + -- By convention return 0 for Zero. Else return nomalized Expon. + -- Returns Max_Exponent+2 for the 2 infinities. + -- NOT decimal, and NOT binary Exponent. + + function Fraction (X : e_Real) return e_Real; + + function Compose (Fraction : e_Real; + Exponent : e_Integer) return e_Real; + + function Scaling ( X : e_Real; + Adjustment : e_Integer) return e_Real; + + + -- Chop off fractional parts. + -- + -- Rounding, Unbiased_Rounding, Ceiling, Floor return e_Real + -- with Zero fractions. + + function Rounding (X : e_Real) return e_Real; + + function Unbiased_Rounding (X : e_Real) return e_Real; + + function Truncation (X : e_Real) return e_Real; + + function Ceiling (X : e_Real) return e_Real; + + function Floor (X : e_Real) return e_Real; + + + -- Round away guard digits. + -- + -- function Machine rounds away the smallest Guard digit. + -- There's no one right way to round away Guard Digits or choose + -- Model_Epsilon's. Doesn't follow the Ada95 model for rounding + -- 64-bit floats. That model doesn't seem to fit very well. + + function e_Real_Model_Epsilon return e_Real; + -- At present this calls: e_Real_Model_Epsilon_2 which is + -- 1 unit in the 3rd smallest digit. (The 3rd smallest digit + -- is the 1st digit that is larger than the 2 guard digits.) + + function e_Real_Machine_Epsilon return e_Real; + -- At present this calls: e_Real_Model_Epsilon_1 which is + -- 1 unit in the 2nd smallest digit. (The 2nd smallest digit + -- is the larger of the 2 guard digits.) + + function Machine (X : e_Real) return e_Real; + -- This calls: + -- Round_Away_Smallest_Guard_Digit + -- + + function Round_Away_Smallest_Guard_Digit (X : e_Real) return e_Real; + + function e_Real_Model_Epsilon_1 return e_Real; + -- One unit in the 2nd smallest digit. + + function e_Real_Model_Epsilon_2 return e_Real; + -- One unit in the 3rd smallest digit. (The smallest digit that + -- is *not* a Guard_Digit. + -- + -- Guard_Digits = 2 always; assume neither of them is correct: + -- if there's 3 digits of Radix 2^30 then eps_2 is 2^(-30). + -- if there's 4 digits of Radix 2^30 then eps_2 is 2^(-60). + -- if there's 5 digits of Radix 2^30 then eps_2 is 2^(-90) or about 10**-27. + -- + -- So Eps_2 is the smallest number s/t eps_2+.999999999999 /= .999999999999 + -- when you remove both guard digits. + + + -- SECTION V. Digit * Extended operations. + -- + -- More efficient operations. "Digit" is not your ordinary real, but + -- something in the range 0.0 .. Radix-1.0, and integral valued, though it + -- can have a negative exponent. So the following is not very appropriate + -- for general use; in the '83 version we export it so that it can be used by + -- elementary function packages. + + + type e_Digit is private; + + function "*" (X : e_Digit; Y : e_Real) return e_Real; + + function "/" (X : e_Real; Y : e_Digit) return e_Real; + + function Sum_Of (X : e_Digit; Y : e_Real) return e_Real; + + function "+" (X : e_Digit; Y : e_Real) return e_Real renames Sum_Of; + + function Scaling (X : e_Digit; Adjustment : e_Integer) return e_Digit; + -- Multiply X by Radix**N where N = Adjustment. + + function Make_Extended (X : e_Digit) return e_Real; + + function Make_e_Digit (X : Real) return e_Digit; + -- X must be a whole number: 0.0, 1.0, 2.0 etc. in the range 0..Radix-1, + -- times some integer power of the Radix. So 0.5 is OK, but not 1/3. + + function Number_Of_Guard_Digits return e_Integer; + -- Constant. To get number of digits that are being correctly calculated + -- (by conservative estimates) use + -- No_Correct_Digits = Present_Precision - Number_Of_Guard_Digits. + + function Minimum_No_Of_Digits_Allowed return e_Integer; + -- Constant. Includes guard digits. + + +private + + -- + -- SECTION VII. Make the Data structure for e_Real. + -- + + -- Using 32-bit ints for the Digits: (don't do it) + + --No_Of_Usable_Bits_In_Digit : constant := 31; -- bad idea; lots of trouble. + --No_Of_Bits_In_Radix : constant := 13; -- can't use 14 or > + + -- Using 64-bit floats for the Digits: (don't bother) + + --No_Of_Usable_Bits_In_Digit : constant := 53; -- if using flt pt Mantissa (slow) + --No_Of_Bits_In_Radix : constant := 24; + + -- Using 64-bit ints for the Digits: + + No_Of_Usable_Bits_In_Digit : constant := 63; -- Integer; must allow neg. vals + No_Of_Bits_In_Radix : constant := Desired_No_Of_Bits_In_Radix; + + -- 30 is good if Desired_Decimal_Digit_Precision is 28 to 55. + -- 29 is good standard setting (especially: Desired_Decimal_Digit_Precision > 55). + -- 28 is good if Desired_Decimal_Digit_Precision is in the 100's. (But experiment.) + + Sums_Per_Carry : constant := 2**(No_Of_Usable_Bits_In_Digit-2*No_Of_Bits_In_Radix)-1; + -- Sums_Per_Carry : This is number of sums you can accumulate during + -- multiplication before the carrys need to be performed. + -- + -- You can do a large number of X*Y < Radix*Radix products, and then sum + -- (Sums_Per_Carry+1) of them before a Carry is necessary in multiplication. + + -- a implies b is same as not (a) or b: + pragma Assert (not (No_Of_Bits_In_Radix = 30) or Sums_Per_Carry <= 8-1); + pragma Assert (not (No_Of_Bits_In_Radix = 29) or Sums_Per_Carry <= 32-1); + pragma Assert (not (No_Of_Bits_In_Radix = 28) or Sums_Per_Carry <= 128-1); + pragma Assert (not (No_Of_Bits_In_Radix = 27) or Sums_Per_Carry <= 512-1); + pragma Assert (not (No_Of_Bits_In_Radix = 26) or Sums_Per_Carry <= 2048-1); + + -- + -- Now that we know: No_Of_Bits_In_Radix, + -- + -- get number of binary digits and extended digits needed to make e_Real. + -- Use the following formula for the number of Binary digits needed + -- to meet Desired Decimal Digit precision D: + -- + -- Binary_Digits >= Ceiling (D * Log_Base_2_Of_10) + 1 + -- + -- where D = Desired_Decimal_Digit_Precision, and + -- where Log_Base_2_Of_10 = 3.321928094887362. + -- Ceiling of Real numbers with static declarations? + -- + -- Ceiling (3.321928094887 * D) <= Ceiling (3.322 * D) + -- = Ceiling((3322.0 * D) / 1000.0) + -- = (3322 * D - 1) / 1000 + 1 + -- D is integer valued, so use integer Ceiling (A / B) = (A - 1) / B + 1. + -- (for positive A). The above + -- steps give us the number of binary digits required: No_Of_B_Digits. + -- Next: min number of Radix 2.0**No_Of_Bits_In_Radix digits: No_Of_e_Digits. + -- To get No_Of_e_Digits divide by No_Of_Bits_In_Radix and take the Ceiling. + -- + + -- B is for binary, E for extended: + + ILog_Base_2_Of_10_x_1000 : constant := 3322; -- Round UP. + D : constant := Desired_Decimal_Digit_Precision - 2; + No_Of_B_Digits : constant := (ILog_Base_2_Of_10_x_1000 * D - 1) / 1000 + 2; + No_Of_e_Digits : constant := (No_Of_B_Digits - 1) / No_Of_Bits_In_Radix + 1; + + -- + -- The following parameter settings give us 2 more words in the Mantissa + -- than required. These two are essential in getting the full desired + -- precision in extensive floating pt calculation, and also in IO, and in + -- functions that are evaluated by Newton's method. + -- (At least one such guard digit is essential anyway, to compensate for + -- leftward shift of the mantissa during normalization.) + -- The index of the digits is a subtype of the Exponent type because + -- there are frequent conversions between the two. + -- + -- An assertion verifies that there are 2 guard digits. + -- + -- e_Integer is used as the type of the index of the extended digits + -- because e_Integer is the type of the exponent (defined below). + -- There's a close relationship between the exponent of the number and the + -- index of the digits of the number. (They are often scaled + -- simultaneously, there is a relationship between their ultimate ranges, + -- so they are given the same type here.) + -- Log_Base_2_Of_10 : constant := 3.321928094887362; + -- + + No_Of_Guard_Digits : constant := 2; + -- Guard_Digits are extra digits of precision at the end of the mantissa. + -- + -- The 2nd Guard_Digit makes + -- the Elementary Math Functions full precision (or almost full). + -- Also the IO routines need 2 Guard_Digits. + + pragma Assert (No_Of_Guard_Digits = 2); + + + -- The following are not decimal digits. + + Ultimate_Correct_Digit : constant e_Integer := No_Of_e_Digits - 1; + Ultimate_Digit : constant e_Integer := No_Of_Guard_Digits + Ultimate_Correct_Digit; + + subtype Digits_Base is e_Integer range 0..Ultimate_Digit+1; + subtype Digit_Index is Digits_Base range 0..Ultimate_Digit; + + pragma Assert (Digit_Index'First = 0); + -- some of the arithmetic in "+" assumes this. + + -- The following are not decimal digits. + + Min_No_Of_Correct_Digits : constant := 3; + -- Things stop working if this is less than 3. + + Min_No_Of_Digits : constant := Min_No_Of_Correct_Digits + 2; + -- The 2 is the min number of guard digits. + + pragma Assert (Ultimate_Digit >= Min_No_Of_Digits - 1); + pragma Assert (Ultimate_Digit >= Min_No_Of_Correct_Digits+No_Of_Guard_Digits-1); + -- Digits go from 0..Ultimate_Digit + + + Max_Exponent : constant e_Integer := 2 ** (e_Integer'Size - 5); + Min_Exponent : constant e_Integer := -Max_Exponent; + -- The exponent is usually 16 or 32 bit int. Limits on its range are set + -- below what the base type allows: no more than 1/4 the dynamic range + -- of the base type. If we use 1/8 of that limit, it allows us to delay + -- overflow check to end of most routines (except "**"). If we use 1/32 + -- of that limit, it allows us to do IO more simply. So to make + -- IO work, at present the requirement is 2 ** (e_Integer'Size - 5). + + pragma Assert (Max_Exponent <= 2 ** (e_Integer'Size - 5)); + + -- function e_Real_Machine_Emin returns Min_Exponent + -- function e_Real_Machine_Emax returns Max_Exponent + + + --subtype Digit_Type is Real; + -- Can use Floats with 53 bit mantissas as Digit_Type. Make 2 changes + -- above (search for No_Of_Usable_Bits_In_Digit and follow instructions) + -- and 2 changes in body (compiler will tell you where). Also comment + -- out next 3 statements. Amazingly, it worked nicely last time I did it. + -- Its slow, and it only makes sense when 64 bit ints are bad or missing. + + + type D_Type is range -2**63+1 .. 2**63-1; + subtype Digit_Type is D_Type'Base; + -- Must allow negative digits. Use 64 bit Integer. + + pragma Assert (Digit_Type'Last = 2**(Digit_Type'Size-1)-1); + pragma Assert (Digit_Type'Size-1 >= No_Of_Usable_Bits_In_Digit); + + Digit_Zero : constant Digit_Type := Digit_Type (0); + Digit_One : constant Digit_Type := Digit_Type (1); + Digit_Two : constant Digit_Type := Digit_Type (2); + + Digit_Radix : constant Digit_Type := Digit_Two**No_Of_Bits_In_Radix; + Half_Radix : constant Digit_Type := Digit_Two**(No_Of_Bits_In_Radix-1); + Digit_Radix_Squared : constant Digit_Type := Digit_Radix * Digit_Radix; + Digit_Radix_Minus_1 : constant Digit_Type := Digit_Radix - Digit_One; + + type Mantissa is array (Digit_Index) of Digit_Type; + + type e_Real is record + Digit : Mantissa := (others => Digit_Zero); + Exp : e_Integer := 0; + Is_Zero : Boolean := True; + Is_Positive : Boolean := True; + Is_Infinite : Boolean := False; + end record; + + --for e_Real'Size use (Digit_Type'Size*Mantissa'Length + e_Integer'Size*2); + -- Make e_Real'Size Integer number of 64-bit words. Usually doesn't matter. + -- Only for integer Digit_Type. Comment out for Float. pt. Digit types. + + Zero : constant e_Real + := e_Real' ((others => Digit_Zero), 0, True, True, False); + + One : constant e_Real + := e_Real' ((0 => Digit_One, others => Digit_Zero), 0, False, True, False); + + Positive_Infinity : constant e_Real + := e_Real' ((others => Digit_Zero), Max_Exponent+4, False, True, True); + + Negative_Infinity : constant e_Real + := e_Real' ((others => Digit_Zero), Max_Exponent+4, False, False, True); + + + -- For efficiency, we need an optimized (Real * Extended) + -- operation. So define type e_Digit, a single real number with + -- an exponent. Its a real number that's restricted to integral values + -- in the range to 0..Radix-1. + + type e_Digit is record + Digit : Digit_Type := Digit_Zero; + Exp : e_Integer := 0; + Is_Zero : Boolean := True; + Is_Positive : Boolean := True; + end record; + + -- Constants used in body. Real is used for easy communication with e_Real. + + Real_Zero : constant Real := Real (0.0); + Real_One : constant Real := Real (1.0); + + Real_Radix : constant Real := 2.0**No_Of_Bits_In_Radix; + Radix_Minus_1 : constant Real := 2.0**No_Of_Bits_In_Radix - 1.0; + Radix_Squared : constant Real := 2.0**(2*No_Of_Bits_In_Radix); + Inverse_Radix : constant Real := 2.0**(-No_Of_Bits_In_Radix); + Inverse_Radix_Squared : constant Real := Inverse_Radix * Inverse_Radix; + +end Extended_Real;