Merge #2107

2107: Scheduler timer bugfix + documentation updates r=phil-levis a=hudson-ayers ### Pull Request Overview This pull request fixes a potential (though as of yet undetected) bug in the kernel's use of the SchedulerTimer trait, where the kernel checked for expiration of the timer, then checked the amount of time remaining, not considering the possibility that the timer could wraparound in period between those checks, causing the kernel to read an invalid time remaining value. This was identified by @bradjc . Rather than just fix the bug, we decided to change the interface slightly to make this misuse impossible -- rather than separate `has_expired()` and `get_remaining_us()` functions, there is now only a `get_remaining_us()` function, which returns `None` if the timer has expired. This PR also includes lots of documentation updates by @bradjc that clarify the `SchedulerTimer` interface further. ### Testing Strategy This pull request was tested by running several apps simultaneously on Imix, incuding whileone, and verifying that timeslice expiration still works as expected. ### TODO or Help Wanted N/A ### Documentation Updated - [x] No updates are required. ### Formatting - [x] Ran `make prepush`. Co-authored-by: Brad Campbell <bradjc5@gmail.com> Co-authored-by: Hudson Ayers <hayers@stanford.edu>
tock · Sep 20, 2020 · ad9387a · ad9387a
2 parents 024738f + 17ef92c
commit ad9387a
Show file tree

Hide file tree

Showing 4 changed files with 107 additions and 86 deletions.
diff --git a/arch/cortex-m/src/systick.rs b/arch/cortex-m/src/systick.rs
@@ -147,10 +147,6 @@ impl kernel::SchedulerTimer for SysTick {
             .write(ControlAndStatus::ENABLE::SET + clock_source);
     }
 
-    fn has_expired(&self) -> bool {
-        SYSTICK_BASE.syst_csr.is_set(ControlAndStatus::COUNTFLAG)
-    }
-
     fn reset(&self) {
         SYSTICK_BASE.syst_csr.set(0);
         SYSTICK_BASE.syst_rvr.set(0);
@@ -193,10 +189,14 @@ impl kernel::SchedulerTimer for SysTick {
             .write(ControlAndStatus::TICKINT::CLEAR + ControlAndStatus::ENABLE::SET + clock_source);
     }
 
-    fn get_remaining_us(&self) -> u32 {
+    fn get_remaining_us(&self) -> Option<u32> {
         // use u64 in case of overflow when multiplying by 1,000,000
         let tics = SYSTICK_BASE.syst_cvr.read(CurrentValue::CURRENT) as u64;
-        let hertz = self.hertz() as u64;
-        ((tics * 1_000_000) / hertz) as u32
+        if SYSTICK_BASE.syst_csr.is_set(ControlAndStatus::COUNTFLAG) {
+            None
+        } else {
+            let hertz = self.hertz() as u64;
+            Some(((tics * 1_000_000) / hertz) as u32)
+        }
     }
 }
diff --git a/arch/rv32i/src/machine_timer.rs b/arch/rv32i/src/machine_timer.rs
@@ -119,14 +119,25 @@ impl kernel::SchedulerTimer for MachineTimer<'_> {
         self.registers.mtimecmp.write(MTimeCmp::MTIMECMP.val(tics));
     }
 
-    fn get_remaining_us(&self) -> u32 {
-        let tics = self.get_alarm().wrapping_sub(self.now()) as u64;
-        let hertz = <Self as Time>::Frequency::frequency() as u64;
-        ((tics * 1_000_000) / hertz) as u32
-    }
-
-    fn has_expired(&self) -> bool {
-        self.now() < self.get_alarm()
+    fn get_remaining_us(&self) -> Option<u32> {
+        // We need to convert from native tics to us, multiplication could overflow in 32-bit
+        // arithmetic. So we convert to 64-bit.
+
+        let diff = self.get_alarm().wrapping_sub(self.now()) as u64;
+        // If next alarm is more than one second away from now, alarm must have expired.
+        // Use this formulation to protect against errors when systick wraps around.
+        // 1 second was chosen because it is significantly greater than the 400ms max value allowed
+        // by start(), and requires no computational overhead (e.g. using 500ms would require
+        // dividing the returned ticks by 2)
+        // However, if the alarm frequency is slow enough relative to the cpu frequency, it is
+        // possible this will be evaluated while now() == get_alarm(), so we special case that
+        // result where the alarm has fired but the subtraction has not overflowed
+        if diff >= <Self as Time>::Frequency::frequency() as u64 || diff == 0 {
+            None
+        } else {
+            let hertz = <Self as Time>::Frequency::frequency() as u64;
+            Some(((diff * 1_000_000) / hertz) as u32)
+        }
     }
 
     fn reset(&self) {

diff --git a/kernel/src/platform/scheduler_timer.rs b/kernel/src/platform/scheduler_timer.rs
@@ -8,8 +8,10 @@ use crate::hil::time::{self, Frequency};
 /// Interface for the system scheduler timer.
 ///
 /// A system scheduler timer provides a countdown timer to enforce process
-/// scheduling time quanta. Implementations should have consistent timing
-/// while the CPU is active, but need not operate during sleep.
+/// scheduling time quanta. Implementations should have consistent timing while
+/// the CPU is active, but need not operate during sleep. Note, many scheduler
+/// implementations also charge time spent running the kernel on behalf of the
+/// process against the process time quantum.
 ///
 /// The primary requirement an implementation of this interface must satisfy is
 /// it must be capable of generating an interrupt when the timer expires. This
@@ -23,28 +25,42 @@ use crate::hil::time::{self, Frequency};
 ///
 /// The `SchedulerTimer` interface is carefully designed to be rather general to
 /// support the various implementations required on different hardware
-/// platforms. The general operation is the kernel will start a timer, in effect
-/// starting the time quantum assigned to a process. While the process is
-/// running, it will arm the timer, indicating to the implementation to ensure
+/// platforms. The general operation is the kernel will start a timer, which
+/// starts the time quantum assigned to a process. While the process is running,
+/// the kernel will arm the timer, telling the implementation it must ensure
 /// that an interrupt will occur when the time quantum is exhausted. When the
-/// process has stopped running the timer will be disarmed, indicating that an
-/// interrupt is no longer required. Note, many scheduler implementations also
-/// charge time spent running the kernel against the process time quantum.
-/// When the kernel needs to know if a process has exhausted its time quantum
-/// it will call `has_expired()`.
+/// process has stopped running, the kernel will disarm the timer, indicating to
+/// the implementation that an interrupt is no longer required. To check if the
+/// process has exhausted its time quantum the kernel will explicitly ask the
+/// implementation. The kernel itself does not expect to get an interrupt to
+/// handle when the time quantum is exhausted. This is because the time quantum
+/// may end while the kernel itself is running, and the kernel does not need to
+/// effectively preempt itself.
+///
+/// The `arm()` and `disarm()` functions in this interface serve as an optional
+/// optimization opportunity. This pair allows an implementation to only enable
+/// the interrupt when it is strictly necessary, i.e. while the process is
+/// actually executing. However, a correct implementation can have interrupts
+/// enabled anytime the scheduler timer has been started. What the
+/// implementation must ensure is that the interrupt is enabled when `arm()` is
+/// called.
 ///
 /// Implementations must take care when using interrupts. Since the
 /// `SchedulerTimer` is used in the core kernel loop and scheduler, top half
 /// interrupt handlers may not have executed before `SchedulerTimer` functions
 /// are called. In particular, implementations on top of virtualized timers may
 /// receive the interrupt fired callback "late" (i.e. after the kernel calls
-/// `has_expired()`). Implementations should ensure that they can reliably check for
-/// timeslice expirations.
+/// `has_expired()`). Implementations should ensure that they can reliably check
+/// for timeslice expirations.
 pub trait SchedulerTimer {
-    /// Start a timer for a process timeslice.
+    /// Start a timer for a process timeslice. The `us` argument is the length
+    /// of the timeslice in microseconds.
     ///
     /// This must set a timer for an interval as close as possible to the given
-    /// interval in microseconds. Interrupts do need to be enabled.
+    /// interval in microseconds. Interrupts do not need to be enabled. However,
+    /// if the implementation cannot separate time keeping from interrupt
+    /// generation, the implementation of `start()` should enable interrupts and
+    /// leave them enabled anytime the timer is active.
     ///
     /// Callers can assume at least a 24-bit wide clock. Specific timing is
     /// dependent on the driving clock. For ARM boards with a dedicated SysTick
@@ -53,15 +69,16 @@ pub trait SchedulerTimer {
     /// 400ms.
     fn start(&self, us: u32);
 
-    /// Reset the scheduler timer.
+    /// Reset the SchedulerTimer.
     ///
     /// This must reset the timer, and can safely disable it and put it in a low
     /// power state. Calling any function other than `start()` immediately after
     /// `reset()` is invalid.
     ///
-    /// Implementations should disable the timer and put it in a lower power
-    /// state, but this will depend on the hardware and whether virtualization
-    /// layers are used.
+    /// Implementations _should_ disable the timer and put it in a lower power
+    /// state. However, not all implementations will be able to guarantee this
+    /// (for example depending on the underlying hardware or if the timer is
+    /// implemented on top of a virtualized timer).
     fn reset(&self);
 
     /// Arm the SchedulerTimer timer and ensure an interrupt will be generated.
@@ -70,6 +87,9 @@ pub trait SchedulerTimer {
     /// guarantees that an interrupt will be generated when the already started
     /// timer expires. This interrupt will preempt the running userspace
     /// process.
+    ///
+    /// If the interrupt is already enabled when `arm()` is called, this
+    /// function should be a no-op implementation.
     fn arm(&self);
 
     /// Disarm the SchedulerTimer timer indicating an interrupt is no longer
@@ -79,31 +99,26 @@ pub trait SchedulerTimer {
     /// an interrupt is no longer required (i.e. the process is no longer
     /// executing). By not requiring an interrupt this may allow certain
     /// implementations to be more efficient by removing the overhead of
-    /// handling the interrupt. The implementation may disable the underlying
-    /// interrupt if one has been set, depending on the requirements of the
-    /// implementation.
+    /// handling the interrupt.
+    ///
+    /// If the implementation cannot disable the interrupt without stopping the
+    /// time keeping mechanism, this function should be a no-op implementation.
     fn disarm(&self);
 
-    /// Return the number of microseconds remaining in the process's timeslice.
-    /// If a process' timeslice has expired, this is not guaranteed to return a valid
-    /// value.
-    fn get_remaining_us(&self) -> u32;
-
-    /// Check if the process timeslice has expired.
+    /// Return the number of microseconds remaining in the process's timeslice
+    /// if the timeslice is still active.
     ///
-    /// Returns `true` if the timer has expired since the last time this
-    /// function or `start()` has been called. This function may not be
-    /// called after it has returned `true` for a given timeslice until
-    /// `start()` is called again (to start a new timeslice). If `has_expired()`
-    /// is called again after returning `true` without an intervening call to
-    /// `start()`, the return the return value is unspecified and
-    /// implementations may return whatever they like.
+    /// If the timeslice is still active, this returns `Some()` with the number
+    /// of microseconds remaining in the timeslice. If the timeslice has
+    /// expired, this returns `None`.
     ///
-    /// The requirement that this may not be called again after it returns
-    /// `true` simplifies implementation on hardware platforms where the
-    /// hardware automatically clears the expired flag on a read, as with the
-    /// ARM SysTick peripheral.
-    fn has_expired(&self) -> bool;
+    /// This function may not be called after it has returned `None` (signifying
+    /// the timeslice has expired) for a given timeslice until `start()` is
+    /// called again (to start a new timeslice). If `get_remaining_us()` is
+    /// called again after returning `None` without an intervening call to
+    /// `start()`, the return value is unspecified and implementations may
+    /// return whatever they like.
+    fn get_remaining_us(&self) -> Option<u32>;
 }
 
 /// A dummy `SchedulerTimer` implementation in which the timer never expires.
@@ -119,12 +134,8 @@ impl SchedulerTimer for () {
 
     fn arm(&self) {}
 
-    fn has_expired(&self) -> bool {
-        false
-    }
-
-    fn get_remaining_us(&self) -> u32 {
-        10000 // chose arbitrary large value
+    fn get_remaining_us(&self) -> Option<u32> {
+        Some(10000) // chose arbitrary large value
     }
 }
 
@@ -178,7 +189,11 @@ impl<A: 'static + time::Alarm<'static>> SchedulerTimer for VirtualSchedulerTimer
         self.alarm.disable();
     }
 
-    fn has_expired(&self) -> bool {
+    fn get_remaining_us(&self) -> Option<u32> {
+        // We need to convert from native tics to us, multiplication could overflow in 32-bit
+        // arithmetic. So we convert to 64-bit.
+
+        let diff = self.alarm.get_alarm().wrapping_sub(self.alarm.now()) as u64;
         // If next alarm is more than one second away from now, alarm must have expired.
         // Use this formulation to protect against errors when systick wraps around.
         // 1 second was chosen because it is significantly greater than the 400ms max value allowed
@@ -187,17 +202,12 @@ impl<A: 'static + time::Alarm<'static>> SchedulerTimer for VirtualSchedulerTimer
         // However, if the alarm frequency is slow enough relative to the cpu frequency, it is
         // possible this will be evaluated while now() == get_alarm(), so we special case that
         // result where the alarm has fired but the subtraction has not overflowed
-        let diff = self.alarm.get_alarm().wrapping_sub(self.alarm.now());
-        diff >= A::Frequency::frequency() || diff == 0
-    }
-
-    fn get_remaining_us(&self) -> u32 {
-        // We need to convert from native tics to us, multiplication could overflow in 32-bit
-        // arithmetic. So we convert to 64-bit.
-
-        let tics = self.alarm.get_alarm().wrapping_sub(self.alarm.now()) as u64;
-        let hertz = A::Frequency::frequency() as u64;
-        ((tics * 1_000_000) / hertz) as u32
+        if diff >= A::Frequency::frequency() as u64 || diff == 0 {
+            None
+        } else {
+            let hertz = A::Frequency::frequency() as u64;
+            Some(((diff * 1_000_000) / hertz) as u32)
+        }
     }
 }
 

diff --git a/kernel/src/sched.rs b/kernel/src/sched.rs
@@ -587,9 +587,11 @@ impl Kernel {
         // no longer wants to execute this process or if it exceeds its
         // timeslice.
         loop {
-            if scheduler_timer.has_expired()
-                || scheduler_timer.get_remaining_us() <= MIN_QUANTA_THRESHOLD_US
-            {
+            let stop_running = match scheduler_timer.get_remaining_us() {
+                Some(us) => us <= MIN_QUANTA_THRESHOLD_US,
+                None => true,
+            };
+            if stop_running {
                 // Process ran out of time while the kernel was executing.
                 process.debug_timeslice_expired();
                 return_reason = StoppedExecutingReason::TimesliceExpired;
@@ -798,7 +800,7 @@ impl Kernel {
                             }
                         }
                         Some(ContextSwitchReason::Interrupted) => {
-                            if scheduler_timer.has_expired() {
+                            if scheduler_timer.get_remaining_us().is_none() {
                                 // This interrupt was a timeslice expiration.
                                 process.debug_timeslice_expired();
                                 return_reason = StoppedExecutingReason::TimesliceExpired;
@@ -877,18 +879,16 @@ impl Kernel {
         // Check how much time the process used while it was executing, and
         // return the value so we can provide it to the scheduler.
         let time_executed_us = timeslice_us.map_or(None, |timeslice| {
-            // Note, we cannot call `.has_expired()` again if it has previously
-            // returned `true`, so we _must_ check the return reason first.
-            if return_reason == StoppedExecutingReason::TimesliceExpired
-                || scheduler_timer.has_expired()
-            {
-                // Note we cannot call `.get_remaining_us()` as it will return
-                // an invalid value after a timeslice expiration. Instead, we
-                // just return the original length of the timeslice.
+            // Note, we cannot call `.get_remaining_us()` again if it has previously
+            // returned `None`, so we _must_ check the return reason first.
+            if return_reason == StoppedExecutingReason::TimesliceExpired {
+                // used the whole timeslice
                 Some(timeslice)
             } else {
-                let remaining = scheduler_timer.get_remaining_us();
-                Some(timeslice - remaining)
+                match scheduler_timer.get_remaining_us() {
+                    Some(remaining) => Some(timeslice - remaining),
+                    None => Some(timeslice), // used whole timeslice
+                }
             }
         });