Skip to content

Commit

Permalink
machine_bitstream: Use the generic version for STM32 as well.
Browse files Browse the repository at this point in the history
The STM32 mp_hal_ticks_cpu has the unfavourable habit that it checks
on each call whether cpu_ticks is enabled. That make the timing
somewhat instable. Therefore the calls were renamed
from mp_hal_ticks_cpu into mp_hal_ticks_bitstream and
mp_hal_ticks_cpu_start into mp_hal_ticks_bitstream_start. In all
ports, the respective defines are added to mphalport.h.
With that change, the STM32 implementation works pretty precise.
For the STM32 M0 MCU, the assembler version is used.

Timing varies quite a bit  with changing CPU clock frequencies.

Testing done with:
PYBV11 at 168MHz
PBYD_SF6 at 196MHz
ESP8266 at both 80 and 160MHz
ESP32 at 240MHz
RP2 Pico at 125, 180 and 250MHz
MIMXRT1050 at 600MHz
MIMXRT1020 at 500MHz

Testing was done with symmetrical 1000ns cycles.
The worst figure is for ESP8266 at 80Mhz, being off at about +62ns for
a low phase, -38ns for the high phase, and a total error of 20ns for
the full 2000ns cycle.
  • Loading branch information
robert-hh committed Sep 21, 2021
1 parent 30efa17 commit bfb2f52
Show file tree
Hide file tree
Showing 9 changed files with 162 additions and 218 deletions.
135 changes: 130 additions & 5 deletions drivers/bitstream/machine_bitstream.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,129 @@

#if MICROPY_PY_MACHINE_BITSTREAM

#if __CORTEX_M == 0 && defined(STM32_CORTEXM0)

// No cycle counter on M0, do manual cycle counting instead.

// STM32F091 @ 48MHz
#define NS_CYCLES_PER_ITER_HIGH (6)
#define NS_CYCLES_PER_ITER_LOW (6)
#define NS_OVERHEAD_CYCLES_HIGH (12)
#define NS_OVERHEAD_CYCLES_LOW (18)

uint32_t mp_hal_delay_ns_calc(uint32_t ns, bool high) {
uint32_t ncycles = SystemCoreClock / 1000000 * ns / 1000;
uint32_t overhead = MIN(ncycles, high ? NS_OVERHEAD_CYCLES_HIGH : NS_OVERHEAD_CYCLES_LOW);
return MAX(1, MP_ROUND_DIVIDE(ncycles - overhead, high ? NS_CYCLES_PER_ITER_HIGH : NS_CYCLES_PER_ITER_LOW));
}

void machine_bitstream_high_low(mp_hal_pin_obj_t pin, uint32_t *timing_ns, const uint8_t *buf, size_t len) {
const uint32_t high_mask = pin->pin_mask;
const uint32_t low_mask = pin->pin_mask << 16;
volatile uint32_t *bsrr = &pin->gpio->BSRR;

// Convert ns to loop iterations [high_time_0, low_time_0, high_time_1, low_time_1].
for (size_t i = 0; i < 4; ++i) {
timing_ns[i] = mp_hal_delay_ns_calc(timing_ns[i], i % 2 == 0);
}

mp_uint_t atomic_state = MICROPY_BEGIN_ATOMIC_SECTION();

// Measured timing for F091 at 48MHz (cycle=20.83ns)
// timing_ns = (1,1,1,1)
// high: 370
// low: 500
// low8: 660
// timing_ns = (2,2,2,2)
// high: 490
// low: 620
// low8: 805

// --> high is 12 + n*6 cycles
// low is 18 + n*6 cycles

// NeoPixel timing (400, 850, 800, 450) (+/-150ns) gives timing_ns=(1, 4, 4, 1) which in cycles is
// (12 + 6, 18 + 24, 12 + 24, 18 + 6) = (18, 42, 36, 24)
// --> (375, 875, 750, 500) nanoseconds.
// Measured output on logic analyser is (370, 870, 750, 490) (+/-10ns at 100MHz)

// Note: final low of LSB is longer by 8 cycles (160ns) (due to start of outer loop and fetching next byte).
// This is slightly outside spec, but doesn't seem to cause a problem.

__asm volatile (
// Force consistent register assignment.
// r6 = len
"ldr r6, %0\n"
// r4 = buf
"ldr r4, %1\n"
// r5 = timing_ms
"ldr r5, %2\n"

// Must align for consistent timing.
".align 4\n"

// Don't increment/decrement before first iteration.
"b .outer2\n"
".outer:\n"
// ++buf, --len
" add r4, #1\n"
" sub r6, #1\n"

// len iterations
".outer2:\n"
" cmp r6, #0\n"
" beq .done\n"

// r0 = *buf
" ldrb r0, [r4, #0]\n"

// 8 bits in byte
" mov r7, #8\n"
" .inner:\n"
// *bsrr = high_mask
" ldr r1, %3\n"
" ldr r2, %4\n"
" str r2, [r1, #0]\n"

// r3 = (r0 >> 4) & 8 (r0 is 8 if high bit is 1 else 0)
" mov r8, r6\n"
" lsr r3, r0, #4\n"
" mov r6, #8\n"
" and r3, r6\n"
" mov r6, r8\n"

// r2 = timing_ns[r2]
" ldr r2, [r5, r3]\n"
" .loop1:\n sub r2, #1\n bne .loop1\n"

// *bsrr = low_mask
" ldr r2, %5\n"
" str r2, [r1, #0]\n"

// r2 = timing_ns[r3 + 4]
" add r3, #4\n"
" ldr r2, [r5, r3]\n"
" .loop2:\n sub r2, #1\n bne .loop2\n"

// b >>= 1
" lsl r0, r0, #1\n"

" sub r7, #1\n"
// end of inner loop
" beq .outer\n"
// continue inner loop
" b .inner\n"

".done:\n"
:
: "m" (len), "m" (buf), "m" (timing_ns), "m" (bsrr), "m" (high_mask), "m" (low_mask)
: "r0", "r1", "r2", "r3", "r7", "r8"
);

MICROPY_END_ATOMIC_SECTION(atomic_state);
}

#else // > CORTEX_M0

void machine_bitstream_high_low(mp_hal_pin_obj_t pin, uint32_t *timing_ns, const uint8_t *buf, size_t len) {
uint32_t fcpu_mhz = mp_hal_get_cpu_freq() / 1000000;
Expand All @@ -53,19 +176,21 @@ void machine_bitstream_high_low(mp_hal_pin_obj_t pin, uint32_t *timing_ns, const
for (size_t i = 0; i < len; ++i) {
uint8_t b = buf[i];
for (size_t j = 0; j < 8; ++j) {
uint32_t start_ticks = mp_hal_ticks_cpu_start();
uint32_t *t = &timing_ns[b >> 6 & 2];
uint32_t start_ticks = mp_hal_ticks_bitstream_start();
mp_hal_pin_high(pin);
while ((mp_hal_ticks_cpu() - start_ticks) < t[0]) {
uint32_t *t = &timing_ns[b >> 6 & 2];
while ((mp_hal_ticks_bitstream() - start_ticks) < t[0]) {
}
b <<= 1;
mp_hal_pin_low(pin);
while ((mp_hal_ticks_cpu() - start_ticks) < t[1]) {
b <<= 1;
while ((mp_hal_ticks_bitstream() - start_ticks) < t[1]) {
}
}
}

mp_hal_quiet_timing_exit(irq_state);
}

#endif // > CORTEX_M0

#endif // MICROPY_PY_MACHINE_BITSTREAM
2 changes: 1 addition & 1 deletion ports/esp32/main/linker.lf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[mapping:msin]
[mapping:main]
archive: libmain.a
entries:
machine_bitstream (noflash)
5 changes: 3 additions & 2 deletions ports/esp32/mphalport.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ __attribute__((always_inline)) static inline uint32_t mp_hal_ticks_cpu(void) {
}

#define mp_hal_ticks_cpu_enable(void)
#define mp_hal_ticks_cpu_start mp_hal_ticks_cpu
#define MP_HAL_BITSTREAM_NS_OVERHEAD (6)
#define mp_hal_ticks_bitstream mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream_start mp_hal_ticks_cpu
#define MP_HAL_BITSTREAM_NS_OVERHEAD (7)

static inline mp_uint_t mp_hal_get_cpu_freq(void) {
return ets_get_cpu_frequency() * 1000000;
Expand Down
7 changes: 4 additions & 3 deletions ports/esp8266/esp_mphal.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ __attribute__((always_inline)) static inline uint32_t mp_hal_ticks_cpu(void) {
}

#define mp_hal_ticks_cpu_enable(void)
#define mp_hal_ticks_cpu_start mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream_start mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream mp_hal_ticks_cpu
#define MP_HAL_BITSTREAM_NS_OVERHEAD (5)

void mp_hal_delay_us(uint32_t);
Expand Down Expand Up @@ -111,11 +112,11 @@ void mp_hal_pin_open_drain(mp_hal_pin_obj_t pin);
#define mp_hal_pin_write(p, v) pin_set((p), (v))

static inline void mp_hal_pin_low(mp_hal_pin_obj_t pin) {
GPIO_REG_WRITE(GPIO_OUT_W1TC_ADDRESS, 1 << pin);
GPIO_REG_WRITE(GPIO_OUT_W1TC_ADDRESS, 1 << pin);
}

static inline void mp_hal_pin_high(mp_hal_pin_obj_t pin) {
GPIO_REG_WRITE(GPIO_OUT_W1TS_ADDRESS, 1 << pin);
GPIO_REG_WRITE(GPIO_OUT_W1TS_ADDRESS, 1 << pin);
}
void *ets_get_esf_buf_ctlblk(void);
int ets_esf_free_bufs(int idx);
3 changes: 2 additions & 1 deletion ports/mimxrt/mphalport.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ static inline mp_uint_t mp_hal_ticks_cpu(void) {
return DWT->CYCCNT;
}

#define mp_hal_ticks_cpu_start mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream_start mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream mp_hal_ticks_cpu
#define MP_HAL_BITSTREAM_NS_OVERHEAD (6)

static inline mp_uint_t mp_hal_get_cpu_freq(void) {
Expand Down
5 changes: 4 additions & 1 deletion ports/rp2/mphalport.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,14 @@ static inline mp_uint_t mp_hal_ticks_cpu_start(void) {
return 0;
}

#define mp_hal_ticks_bitstream mp_hal_ticks_cpu
#define mp_hal_ticks_bitstream_start mp_hal_ticks_cpu_start

static inline mp_uint_t mp_hal_get_cpu_freq(void) {
return clock_get_hz(clk_sys);
}

#define MP_HAL_BITSTREAM_NS_OVERHEAD (9)
#define MP_HAL_BITSTREAM_NS_OVERHEAD (8)

// C-level pin HAL

Expand Down
3 changes: 2 additions & 1 deletion ports/stm32/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ SUPPORTS_HARDWARE_FP_DOUBLE = 1
else
ifeq ($(MCU_SERIES),$(filter $(MCU_SERIES),f0 l0))
CFLAGS_CORTEX_M += -msoft-float
CFLAGS += -DSTM32_CORTEXM0
else
CFLAGS_CORTEX_M += -mfpu=fpv4-sp-d16 -mfloat-abi=hard
SUPPORTS_HARDWARE_FP_SINGLE = 1
Expand Down Expand Up @@ -272,6 +273,7 @@ EXTMOD_SRC_C += $(addprefix extmod/,\
)

DRIVERS_SRC_C += $(addprefix drivers/,\
bitstream/machine_bitstream.c \
bus/softspi.c \
bus/softqspi.c \
memory/spiflash.c \
Expand Down Expand Up @@ -321,7 +323,6 @@ SRC_C += \
gccollect.c \
help.c \
machine_adc.c \
machine_bitstream.c \
machine_i2c.c \
machine_i2s.c \
machine_spi.c \
Expand Down

0 comments on commit bfb2f52

Please sign in to comment.