Skip to content

Commit

Permalink
Significantly improve ADC performance
Browse files Browse the repository at this point in the history
In 5eb9139 and previously
a398d0c, work was done to identify
ADC performance issues.  In the meantime, ST released new errata for
the STM32G474 ADC which more accurately describes problematic
configurations when multiple ADCs are used simultaneously.

 * ES0430 - Rev 8, 2.7.9

Notably, to achieve optimal results, the documented workaround is to
use a synchronous clock mode with a prescaler no more than 1 (!), and
also to trigger all ADCs simultaneously by using a hardware trigger.

Using a prescaler of exactly 1 is hard for moteus, since that would
run the ADC too fast, at 85Mhz.  Instead, now switch to a synchronous
clock with a prescaler of 2, and rely on the fact that we are already
starting all the ADCs at a fixed phase relative to the system cycle
counter.

While we are at it, use some inline assembly to get the ADC sample
starts to be much closer together in time.

The combined result of this change is that nearly all noise that
varies with ADC value is removed except for that near 2048, and that
is only around 3-5 LSB.  Previously depending exactly the phase things
ended up in, there could be significant additional noise every 8 LSB,
and an increased window of noise around the 2048 mark up to 10 LSB or
more.
  • Loading branch information
jpieper committed Jun 3, 2023
1 parent 79b79c0 commit 4db3610
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 49 deletions.
88 changes: 55 additions & 33 deletions fw/bldc_servo.cc
Expand Up @@ -636,40 +636,26 @@ class BldcServo::Impl {
DisableAdc(ADC4);
DisableAdc(ADC5);

// The prescaler must be at least 6x to be able to accurately read
// across all channels. If it is too low, you'll see errors that
// look like quantization, but not in a particularly uniform way
// and not consistently across each of the channels.
auto map_adc_prescale = [](int prescale) {
if (prescale == 1) { return 0; }
if (prescale == 2) { return 1; }
if (prescale == 4) { return 2; }
if (prescale == 6) { return 3; }
if (prescale == 8) { return 4; }
if (prescale == 10) { return 5; }
if (prescale == 12) { return 6; }
if (prescale == 16) { return 7; }
if (prescale == 32) { return 8; }
if (prescale == 64) { return 9; }
if (prescale == 128) { return 10; }
if (prescale == 256) { return 11; }
MJ_ASSERT(false);
return 0;
};

// Per "ES0430 - Rev 8, 2.7.9" the ADCs can only be used
// simultaneously if they are in synchronous mode with a divider
// no more than 1. Yay. We can't use synchronous mode with a
// divider of 1, since that would run the ADCs too fast. Instead,
// we use a divider of 2, and ensure that each ADC is started in
// an exact phase relationship to the global cycle counter.
ADC12_COMMON->CCR =
(map_adc_prescale(config_.adc_prescale) << ADC_CCR_PRESC_Pos) |
(2 << ADC_CCR_CKMODE_Pos) | // synchronous AHB/2
(1 << ADC_CCR_DUAL_Pos); // dual mode, regular + injected
ADC345_COMMON->CCR =
(map_adc_prescale(config_.adc_prescale) << ADC_CCR_PRESC_Pos) |
(2 << ADC_CCR_CKMODE_Pos) | // synchronous AHB/2
(1 << ADC_CCR_DUAL_Pos); // dual mode, regular + injected

constexpr int kAdcPrescale = 2; // from the CKMODE above

EnableAdc(ms_timer_, ADC1, config_.adc_prescale);
EnableAdc(ms_timer_, ADC2, config_.adc_prescale);
EnableAdc(ms_timer_, ADC3, config_.adc_prescale);
EnableAdc(ms_timer_, ADC4, config_.adc_prescale);
EnableAdc(ms_timer_, ADC5, config_.adc_prescale);
EnableAdc(ms_timer_, ADC1, kAdcPrescale, 0);
EnableAdc(ms_timer_, ADC2, kAdcPrescale, 0);
EnableAdc(ms_timer_, ADC3, kAdcPrescale, 0);
EnableAdc(ms_timer_, ADC4, kAdcPrescale, 0);
EnableAdc(ms_timer_, ADC5, kAdcPrescale, 0);

if (family0_) {
adc1_sqr_ = ADC1->SQR1 =
Expand Down Expand Up @@ -764,11 +750,47 @@ class BldcServo::Impl {
// ready. This means we will throw away the result if our control
// timer says it isn't our turn yet, but that is a relatively
// minor waste.
ADC1->CR |= ADC_CR_ADSTART;
// ADC2->CR |= ADC_CR_ADSTART; // we are in dual mode
ADC3->CR |= ADC_CR_ADSTART;
// ADC4->CR |= ADC_CR_ADSTART; // we are in dual mode
ADC5->CR |= ADC_CR_ADSTART;


{
// To start the ADCs, for now we resort to some inline assembly.
// The below is roughly equivalent to:
//
// auto tmp = ADC1->CR;
// tmp |= ADC_CR_ADSTART;
// ADC1->CR = tmp;
// ADC3->CR = tmp;
// ADC5->CR = tmp;
//
// Note: Since ADC1/2 and ADC3/4 are in dual mode, we don't have
// to explitly start ADC2 or ADC4.
//
// We perform this using inline assembly so as to attempt to
// start the trigger process of all 5 ADCs as closely as
// possible. Per STM32G474 errata "ES0430 Rev 8 - 2.7.9", the
// ADCs only have good performance if they are started at
// exactly the same time. Ideally we'd do that through a
// hardware, i.e. timer trigger. However, getting that
// integrated here is a bigger project. For now, this seems to
// give pretty good results.
uint32_t temp_reg1;
uint32_t temp_reg2;
asm volatile (
"mov %[temp_reg1], %[adstart];"
"ldr %[temp_reg2], [%[adc1_cr], #0];"
"orr %[temp_reg1], %[temp_reg2];"
"str %[temp_reg1], [%[adc1_cr], #0];"
"str %[temp_reg1], [%[adc3_cr], #0];"
"str %[temp_reg1], [%[adc5_cr], #0];"
: [temp_reg1]"=&r"(temp_reg1),
[temp_reg2]"=&r"(temp_reg2)
: [adstart]"r"(ADC_CR_ADSTART),
[adc1_cr]"r"(&ADC1->CR),
[adc3_cr]"r"(&ADC3->CR),
[adc5_cr]"r"(&ADC5->CR)
:
);
}

phase_ = (phase_ + 1) & rate_config_.interrupt_mask;
if (phase_) { return; }
Expand Down
12 changes: 0 additions & 12 deletions fw/bldc_servo_structs.h
Expand Up @@ -477,17 +477,6 @@ struct BldcServoConfig {
// need a larger sampling time.
uint16_t adc_aux_cycles = 47;

// The default prescaler values are not determined with much
// principle. 8 was used for all r4.X series boards for some time,
// but for some reason it often causes problems with the initial n1
// series boards but 6, which in theory should cause more problems
// seems to work much better in practice. So, until the n1 ADC
// analog performance is better, we use this kludge.
uint16_t adc_prescale =
(g_measured_hw_family == 0 ? 8 :
g_measured_hw_family == 1 ? 6 :
8);

// We use the same PID constants for D and Q current control
// loops.
SimplePI::Config pid_dq;
Expand Down Expand Up @@ -593,7 +582,6 @@ struct BldcServoConfig {
a->Visit(MJ_NVP(position_derate));
a->Visit(MJ_NVP(adc_cur_cycles));
a->Visit(MJ_NVP(adc_aux_cycles));
a->Visit(MJ_NVP(adc_prescale));
a->Visit(MJ_NVP(pid_dq));
a->Visit(MJ_NVP(pid_position));
a->Visit(MJ_NVP(current_feedforward));
Expand Down
2 changes: 1 addition & 1 deletion fw/moteus_hw.cc
Expand Up @@ -147,7 +147,7 @@ FamilyAndVersion DetectMoteusFamily(MillisecondTimer* timer) {
(17 << ADC_SQR1_SQ1_Pos) | // IN17
(0 << ADC_SQR1_L_Pos); // length 1

EnableAdc(timer, ADC2, 16);
EnableAdc(timer, ADC2, 16, 0);

ADC2->CR |= ADC_CR_ADSTART;
while ((ADC2->ISR & ADC_ISR_EOC) == 0);
Expand Down
71 changes: 69 additions & 2 deletions fw/stm32g4_adc.cc
Expand Up @@ -16,7 +16,7 @@

namespace moteus {

void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler) {
void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler, int offset) {
// 20.4.6: ADC Deep power-down mode startup procedure
adc->CR &= ~ADC_CR_DEEPPWD;
adc->CR |= ADC_CR_ADVREGEN;
Expand Down Expand Up @@ -47,7 +47,7 @@ void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler) {
const uint32_t initial_cyccnt = DWT->CYCCNT;
const uint32_t desired_cyccnt =
static_cast<uint32_t>(initial_cyccnt / prescaler) * prescaler +
prescaler * 64;
prescaler * 64 + offset;

// Clear the instruction and data cache, to help get the same
// results every time.
Expand Down Expand Up @@ -135,4 +135,71 @@ void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler) {
adc->CFGR &= ~(ADC_CFGR_CONT);
}

#if 0
// We leave this commented out because it was hard to create, and
// might be useful for debugging again.

// Delay a cycle accurate number of counts. This allows code to vary
// how long it takes in a cycle accurate way without changing its
// layout, which can often have large knock-on effects in other
// locations.
void CycleAccurateDelay(int counts) {
// Clear the instruction and data cache, to help get the same
// results every time.
FLASH->ACR &= ~(FLASH_ACR_ICEN | FLASH_ACR_DCEN);
FLASH->ACR |= FLASH_ACR_ICRST | FLASH_ACR_DCRST;
FLASH->ACR &= ~(FLASH_ACR_ICRST | FLASH_ACR_DCRST);
FLASH->ACR |= (FLASH_ACR_ICEN | FLASH_ACR_DCEN);

uint32_t loop_count;
uint32_t remainder;
asm volatile (
// Align ourselves to improve robustness.
".balign 4;"

// Our ultimate delay loop takes 4 cycles per iteration.
"asr %[loop_count], %[counts], #2;"

// Which means we may have up to 3 extra NOPs to insert. This
// part of the code requires the function to be placed in CCM
// memory, otherwise the FLASH ART destroys our ability to add
// single cycle delays.
"and %[remainder], %[counts], #3;"

"cmp %[remainder], #0;"
"ble 10f;"
"mov %[counts], #0;"
"mov %[counts], #0;"
"10:;"

"cmp %[remainder], #1;"
"ble 11f;"
"mov %[counts], #0;"
"mov %[counts], #0;"
"11:;"

"cmp %[remainder], #2;"
"ble 12f;"
"mov %[counts], #0;"
"mov %[counts], #0;"
"12:;"

// Make sure we are at the beginning of any pre-fetch lines.
".balign 4;"

// Our main loop, which takes 4 cycles.
"1:;"
"subs %[loop_count], %[loop_count], #1;"
"nop;"
"bne 1b;"

"nop;"
: [counts]"+&r"(counts),
[loop_count]"=&r"(loop_count),
[remainder]"=&r"(remainder)
: [cyccnt]"r"(&DWT->CYCCNT)
:
);
}
#endif
}
2 changes: 1 addition & 1 deletion fw/stm32g4_adc.h
Expand Up @@ -29,5 +29,5 @@ inline void DisableAdc(ADC_TypeDef* adc) {
}

// This function needs to be in CCM memory to achieve deterministic timings.
void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler) MOTEUS_CCM_ATTRIBUTE;
void EnableAdc(MillisecondTimer* timer, ADC_TypeDef* adc, int prescaler, int offset) MOTEUS_CCM_ATTRIBUTE;
}

0 comments on commit 4db3610

Please sign in to comment.