From 3f0a9514a80cf73f483d5ac63ef62a7c4aa8da02 Mon Sep 17 00:00:00 2001 From: R Date: Sat, 2 Aug 2025 01:52:01 +0100 Subject: [PATCH 1/3] lib/tiam1808: Fix bug with CP15 cache flush function name This function flushes the D-cache, not the I-cache. --- lib/tiam1808/drivers/cppi41dma.c | 6 +++--- lib/tiam1808/system_config/armv5/gcc/cp15.c | 2 +- lib/tiam1808/tiam1808/armv5/cp15.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/tiam1808/drivers/cppi41dma.c b/lib/tiam1808/drivers/cppi41dma.c index 2dd3eff1f..c42d13887 100644 --- a/lib/tiam1808/drivers/cppi41dma.c +++ b/lib/tiam1808/drivers/cppi41dma.c @@ -1132,7 +1132,7 @@ unsigned int dmaTxCompletion(unsigned short usbDevInst, unsigned int ulEndpoint if(state == DMA_TX_COMPLETED) while ((HWREGH(usbInstance->usbBaseAddress + ulRegister) & 0x2) == 0x02); - CP15ICacheFlushBuff((unsigned int)completed_bd->buffAdd, sizeof(completed_bd->buffAdd)); + CP15DCacheFlushBuff((unsigned int)completed_bd->buffAdd, sizeof(completed_bd->buffAdd)); cppiDmaFreenBuffer((unsigned int *)completed_bd->buffAdd); @@ -1207,13 +1207,13 @@ unsigned int dmaRxCompletion(unsigned short usbDevInst, unsigned int ulEndpoint ->rxEndPoint[ulEndpoint].complettionq); /*Fush the cache to update the BD */ - CP15ICacheFlushBuff((unsigned int)rx_bd, sizeof(hostPacketDesc)); + CP15DCacheFlushBuff((unsigned int)rx_bd, sizeof(hostPacketDesc)); bufferAdd = rx_bd->buffAdd; length = rx_bd->buffLength; /*Flush the cache to update the buffer */ - CP15ICacheFlushBuff(bufferAdd, length); + CP15DCacheFlushBuff(bufferAdd, length); putFreeBd(rx_bd); diff --git a/lib/tiam1808/system_config/armv5/gcc/cp15.c b/lib/tiam1808/system_config/armv5/gcc/cp15.c index 39d5f28dc..079c94d08 100644 --- a/lib/tiam1808/system_config/armv5/gcc/cp15.c +++ b/lib/tiam1808/system_config/armv5/gcc/cp15.c @@ -170,7 +170,7 @@ void CP15ICacheFlush(void) * \return None. * **/ -void CP15ICacheFlushBuff(unsigned int bufPtr, unsigned int size) +void CP15DCacheFlushBuff(unsigned int bufPtr, unsigned int size) { unsigned int ptr; diff --git a/lib/tiam1808/tiam1808/armv5/cp15.h b/lib/tiam1808/tiam1808/armv5/cp15.h index 847e0cf69..368d4b839 100644 --- a/lib/tiam1808/tiam1808/armv5/cp15.h +++ b/lib/tiam1808/tiam1808/armv5/cp15.h @@ -60,7 +60,7 @@ extern void CP15ICacheFlush(void); extern void CP15TtbSet(unsigned int ttb); extern void CP15MMUDisable(void); extern void CP15MMUEnable(void); -extern void CP15ICacheFlushBuff(unsigned int ptr, unsigned int size); +extern void CP15DCacheFlushBuff(unsigned int ptr, unsigned int size); extern void CP15DCacheCleanBuff(unsigned int bufPtr, unsigned int size); extern uint32_t CP15GetDFSR(void); extern uint32_t CP15GetIFSR(void); From c8b3c523c03dd85fcac9bda1c98f176188a0db0d Mon Sep 17 00:00:00 2001 From: R Date: Sat, 2 Aug 2025 02:01:47 +0100 Subject: [PATCH 2/3] pbio/include/pbdrv/cache.h: Add header for cache ops --- lib/pbio/include/pbdrv/cache.h | 29 +++++++++++++++++++++ lib/pbio/platform/ev3/platform.c | 17 ++++++++++++ lib/tiam1808/system_config/armv5/gcc/cp15.c | 10 +++++++ lib/tiam1808/tiam1808/armv5/cp15.h | 1 + 4 files changed, 57 insertions(+) create mode 100644 lib/pbio/include/pbdrv/cache.h diff --git a/lib/pbio/include/pbdrv/cache.h b/lib/pbio/include/pbdrv/cache.h new file mode 100644 index 000000000..3824da292 --- /dev/null +++ b/lib/pbio/include/pbdrv/cache.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025 The Pybricks Authors + +#ifndef _PBDRV_CACHE_H_ + +#include + +// Gets data ready so that the data that we (the CPU) have written +// can be read by DMA peripherals. This cleans the relevant cache lines +// and flushes the write buffer. +void pbdrv_cache_prepare_before_dma(const void *buf, size_t sz); + +// Makes sure that we (the CPU) are able to read data written +// by DMA peripherals. This invalidates the relevant cache lines. +void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz); + +// Accesses a variable via the uncached memory alias +#define PBDRV_UNCACHED(x) (*(volatile __typeof__(x) *)((uint32_t)(&(x)) + 0x10000000)) + +// Gets the address of the uncached memory alias of a variable +#define PBDRV_UNCACHED_ADDR(x) ((__typeof__(x) *)((uint32_t)(&(x)) + 0x10000000)) + +// Cache line size +#define PBDRV_CACHE_LINE_SZ 32 + +// Align data to a cache line, which is needed for clean RX DMA +#define PBDRV_CACHE_LINE_ALIGNED __attribute__((aligned(PBDRV_CACHE_LINE_SZ))) + +#endif diff --git a/lib/pbio/platform/ev3/platform.c b/lib/pbio/platform/ev3/platform.c index c89c888b7..1131aea79 100644 --- a/lib/pbio/platform/ev3/platform.c +++ b/lib/pbio/platform/ev3/platform.c @@ -55,6 +55,7 @@ #include +#include #include #include #include @@ -396,6 +397,22 @@ unsigned int EDMAVersionGet(void) { return 1; } +void pbdrv_cache_prepare_before_dma(const void *buf, size_t sz) { + // Make sure all data is written by the compiler... + pbdrv_compiler_memory_barrier(); + // and then make sure it's written out of the cache... + CP15DCacheCleanBuff((uint32_t)buf, sz); + // and also the write buffer, in case the cache missed. + CP15DrainWriteBuffer(); +} + +void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz) { + // Invalidate stale data in the cache... + CP15DCacheFlushBuff((uint32_t)buf, sz); + // and then make sure _subsequent_ reads cannot be reordered earlier. + pbdrv_compiler_memory_barrier(); +} + static void panic_puts(const char *c) { while (*c) { UARTCharPut(SOC_UART_1_REGS, *(c++)); diff --git a/lib/tiam1808/system_config/armv5/gcc/cp15.c b/lib/tiam1808/system_config/armv5/gcc/cp15.c index 079c94d08..6345af1b2 100644 --- a/lib/tiam1808/system_config/armv5/gcc/cp15.c +++ b/lib/tiam1808/system_config/armv5/gcc/cp15.c @@ -208,6 +208,16 @@ void CP15DCacheCleanBuff(unsigned int bufPtr, unsigned int size) } } +/** + * \brief This function drains the CPU write buffer and acts as a data memory barrier. + */ +void CP15DrainWriteBuffer(void) +{ + __asm(" mov r0, #0\n\t" + " mcr p15, #0, r0, c7, c10, #4" + ::: "r0"); +} + /** * \brief This API Configures translation table base register with * with page table starting address. diff --git a/lib/tiam1808/tiam1808/armv5/cp15.h b/lib/tiam1808/tiam1808/armv5/cp15.h index 368d4b839..0171197fc 100644 --- a/lib/tiam1808/tiam1808/armv5/cp15.h +++ b/lib/tiam1808/tiam1808/armv5/cp15.h @@ -62,6 +62,7 @@ extern void CP15MMUDisable(void); extern void CP15MMUEnable(void); extern void CP15DCacheFlushBuff(unsigned int ptr, unsigned int size); extern void CP15DCacheCleanBuff(unsigned int bufPtr, unsigned int size); +extern void CP15DrainWriteBuffer(void); extern uint32_t CP15GetDFSR(void); extern uint32_t CP15GetIFSR(void); extern uint32_t CP15GetFAR(void); From 2000de2c862eda6451fb8a6fb426039e344de1b3 Mon Sep 17 00:00:00 2001 From: R Date: Sat, 2 Aug 2025 02:41:57 +0100 Subject: [PATCH 3/3] pbio/drv: Handle cache ops for EV3 drivers This allows the main memory to be write-back cached. Special note regarding DMA buffers and the uncached alias mapping: DMA buffers and uncached data cannot safely share cache lines with unrelated data! We use a special macro to align these buffers so that issues do not occur. If DMA RX buffers share cache lines with other data, it can lead to race conditions which corrupt data. If we flush and invalidate the cache before performing the DMA, reading the unrelated data can cause the cache line to be loaded back into cache, cancelling out the invalidation. If we wait until after performing the DMA and perform a flush and invalidate, the flush can corrupt part of the data which was written by the DMA. If we only perform an invalidate, writes to the unrelated data can be lost. A similar race condition can happen if an uncached alias is used to read/write to data which sits next to unrelated data. This currently applies to the ADC RX buffer. The USB driver's CPPI descriptors are safe as they happen to already be cache line aligned. The solution we use here is to place each buffer in its own cache-line-aligned block of memory and only perform an invalidate upon DMA completion. This makes sure that no unrelated data can get caught up in the same cache lines. This issue does not affect DMA TX buffers written using the normal cached mapping. Those are handled by flushing the cache. --- lib/pbio/drv/block_device/block_device_ev3.c | 83 +++++++++++++------- lib/pbio/drv/display/display_ev3.c | 3 + lib/pbio/drv/uart/uart_ev3.c | 3 + lib/pbio/drv/usb/usb_ev3.c | 14 +++- lib/pbio/include/pbdrv/cache.h | 2 +- lib/pbio/platform/ev3/platform.c | 4 +- lib/pbio/platform/ev3/platform.ld | 6 +- 7 files changed, 79 insertions(+), 36 deletions(-) diff --git a/lib/pbio/drv/block_device/block_device_ev3.c b/lib/pbio/drv/block_device/block_device_ev3.c index 9198a64c8..3ba1d4b17 100644 --- a/lib/pbio/drv/block_device/block_device_ev3.c +++ b/lib/pbio/drv/block_device/block_device_ev3.c @@ -34,6 +34,7 @@ #include "block_device_ev3.h" #include +#include #include #include #include @@ -93,17 +94,27 @@ enum { static struct { /** HAL Transfer status */ volatile spi_status_t status; - // This is used when SPI only needs to receive. It should always stay as 0. - uint8_t tx_dummy_byte; - // This is used when received data is to be discarded. Its value should be ignored. - uint8_t rx_dummy_byte; + // This stores the RX buffer address so that we clean the cache when DMA is complete + uint32_t rx_user_buf_addr; + // This stores the RX buffer size; + uint32_t rx_user_buf_sz; +} spi_dev; + +/** + * SPI small DMA buffers + */ +static struct { // This is used when transmitting so that the last byte clears CSHOLD. uint32_t tx_last_word; // This is used to hold the initial command to the SPI peripheral. uint8_t spi_cmd_buf_tx[SPI_CMD_BUF_SZ]; // This is used to hold the replies to commands to the SPI peripheral. uint8_t spi_cmd_buf_rx[SPI_CMD_BUF_SZ]; -} spi_dev; + // This is used when SPI only needs to receive. It should always stay as 0. + uint8_t tx_dummy_byte; + // This is used when received data is to be discarded. Its value should be ignored. + uint8_t rx_dummy_byte; +} spi_dev_bufs PBDRV_DMA_BUF; static uint32_t last_spi_dma_complete_time; @@ -114,6 +125,10 @@ static void spi_dma_complete(void) { } SPIIntDisable(SOC_SPI_0_REGS, SPI_DMA_REQUEST_ENA_INT); pbio_os_request_poll(); + pbdrv_cache_prepare_after_dma(&spi_dev_bufs, sizeof(spi_dev_bufs)); + if (spi_dev.rx_user_buf_addr && spi_dev.rx_user_buf_sz) { + pbdrv_cache_prepare_after_dma((void *)spi_dev.rx_user_buf_addr, spi_dev.rx_user_buf_sz); + } last_spi_dma_complete_time = pbdrv_clock_get_ms(); } @@ -153,7 +168,7 @@ static void spi0_isr(void) { continue; } - spi_dev.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF); + spi_dev_bufs.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF); spi_dev.status &= ~SPI_STATUS_WAIT_RX; SPIIntDisable(SOC_SPI_0_REGS, SPI_RECV_INT); pbio_os_request_poll(); @@ -351,19 +366,24 @@ static pbio_error_t spi_begin_for_flash( spi_dev.status = SPI_STATUS_WAIT_RX; + // Prevent write to spi_dev.status from being reordered + pbdrv_compiler_memory_barrier(); + uint32_t tx = spi0_last_dat1_for_flash(cmd[0]); SPIIntEnable(SOC_SPI_0_REGS, SPI_RECV_INT); HWREG(SOC_SPI_0_REGS + SPI_SPIDAT1) = tx; } else { - memcpy(&spi_dev.spi_cmd_buf_tx, cmd, cmd_len); + memcpy(spi_dev_bufs.spi_cmd_buf_tx, cmd, cmd_len); + spi_dev.rx_user_buf_addr = (uint32_t)user_data_rx; + spi_dev.rx_user_buf_sz = user_data_len; if (user_data_len == 0) { // Only a command, no user data - spi_dev.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]); // TX everything except last byte - ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx); ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1; ps.p.aCnt = 1; ps.p.bCnt = cmd_len - 1; @@ -378,7 +398,7 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(EDMA3_CHA_SPI0_TX, &ps); // TX last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; @@ -387,7 +407,7 @@ static pbio_error_t spi_begin_for_flash( // RX all bytes ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF; - ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx); ps.p.aCnt = 1; ps.p.bCnt = cmd_len; ps.p.cCnt = 1; @@ -403,7 +423,7 @@ static pbio_error_t spi_begin_for_flash( // Command *and* user data // TX the command - ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx); ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1; ps.p.aCnt = 1; ps.p.bCnt = cmd_len; @@ -418,7 +438,8 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(EDMA3_CHA_SPI0_TX, &ps); if (user_data_tx) { - spi_dev.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]); + pbdrv_cache_prepare_before_dma(user_data_tx, user_data_len); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]); // TX all but the last byte ps.p.srcAddr = (unsigned int)(user_data_tx); @@ -427,24 +448,24 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(126, &ps); // TX the last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_TX << EDMA3CC_OPT_TCC_SHIFT); edma3_set_param(127, &ps); } else { - spi_dev.tx_last_word = spi0_last_dat1_for_flash(0); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(0); // TX all but the last byte - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_dummy_byte); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_dummy_byte); ps.p.bCnt = user_data_len - 1; ps.p.srcBIdx = 0; ps.p.linkAddr = 127 * 32; edma3_set_param(126, &ps); // TX the last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; @@ -454,7 +475,7 @@ static pbio_error_t spi_begin_for_flash( // RX the command ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF; - ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx); ps.p.aCnt = 1; ps.p.bCnt = cmd_len; ps.p.cCnt = 1; @@ -476,7 +497,7 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(125, &ps); } else { // RX dummy - ps.p.destAddr = (unsigned int)(&spi_dev.rx_dummy_byte); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.rx_dummy_byte); ps.p.bCnt = user_data_len; ps.p.destBIdx = 0; ps.p.linkAddr = 0xffff; @@ -487,8 +508,9 @@ static pbio_error_t spi_begin_for_flash( spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX; - // TODO: eventually needs DMA cache management - pbdrv_compiler_memory_barrier(); + // Make sure writes to tx buffer leave cache + // (we already flush the user buffer earlier) + pbdrv_cache_prepare_before_dma(&spi_dev_bufs, sizeof(spi_dev_bufs)); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_RX, EDMA3_TRIG_MODE_EVENT); @@ -600,7 +622,7 @@ static pbio_error_t flash_wait_write(pbio_os_state_t *state) { } PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY); - status = spi_dev.spi_cmd_buf_rx[1]; + status = spi_dev_bufs.spi_cmd_buf_rx[1]; } while (status & FLASH_STATUS_BUSY); PBIO_OS_ASYNC_END(PBIO_SUCCESS); @@ -722,7 +744,7 @@ static const uint32_t channel_cmd[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ MANUAL_ADC_CHANNEL(15), MANUAL_ADC_CHANNEL(15), }; -static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; +static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES] PBDRV_DMA_BUF; pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) { if (ch >= PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS) { @@ -733,8 +755,8 @@ pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) { uint16_t a, b; do { // Values for the requested channel are received several samples later. - a = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; - b = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; + a = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]); + b = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]); } while (a != b); // Mask the data to 10 bits @@ -804,9 +826,14 @@ static pbio_error_t pbdrv_block_device_ev3_spi_begin_for_adc(const uint32_t *cmd ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_RX << EDMA3CC_OPT_TCC_SHIFT); edma3_set_param(EDMA3_CHA_SPI0_RX, &ps); + // We play dangerously and don't flush the cache for the ADC. + // The commands are const, and the values are read through an uncached mapping. + spi_dev.rx_user_buf_addr = 0; + spi_dev.rx_user_buf_sz = 0; + spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX; - // TODO: eventually needs DMA cache management + // Prevent write to spi_dev.status from being reordered pbdrv_compiler_memory_barrier(); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT); @@ -836,7 +863,7 @@ static struct { pbsys_storage_data_map_t data_map; uint8_t data[PBDRV_CONFIG_BLOCK_DEVICE_RAM_SIZE]; }; -} ramdisk __attribute__((section(".noinit"), used)); +} ramdisk __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".noinit"), used)); uint32_t pbdrv_block_device_get_writable_size(void) { return PBDRV_CONFIG_BLOCK_DEVICE_EV3_SIZE - sizeof(ramdisk.saved_size); @@ -869,7 +896,7 @@ pbio_error_t ev3_spi_process_thread(pbio_os_state_t *state, void *context) { return err; } PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY); - if (memcmp(device_id, &spi_dev.spi_cmd_buf_rx[1], sizeof(device_id))) { + if (memcmp(device_id, &spi_dev_bufs.spi_cmd_buf_rx[1], sizeof(device_id))) { return PBIO_ERROR_FAILED; } diff --git a/lib/pbio/drv/display/display_ev3.c b/lib/pbio/drv/display/display_ev3.c index abcc32fef..ce3eecde2 100644 --- a/lib/pbio/drv/display/display_ev3.c +++ b/lib/pbio/drv/display/display_ev3.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -363,6 +364,8 @@ void pbdrv_display_st7586s_write_data_begin(uint8_t *data, uint32_t size) { spi_status = SPI_STATUS_WAIT; pbdrv_gpio_out_low(&pin_lcd_cs); + pbdrv_cache_prepare_before_dma(data, size); + // Parameter object must be volatile since it is copied byte-by-byte in the // TI API, causing it to be optimized out. volatile EDMA3CCPaRAMEntry paramSet = { diff --git a/lib/pbio/drv/uart/uart_ev3.c b/lib/pbio/drv/uart/uart_ev3.c index b53ed9c90..a2201a550 100644 --- a/lib/pbio/drv/uart/uart_ev3.c +++ b/lib/pbio/drv/uart/uart_ev3.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -220,6 +221,8 @@ pbio_error_t pbdrv_uart_write_hw(pbio_os_state_t *state, pbdrv_uart_dev_t *uart, .opt = EDMA3CC_OPT_DAM | ((pdata->sys_int_uart_tx_int_id << EDMA3CC_OPT_TCC_SHIFT) & EDMA3CC_OPT_TCC) | (1 << EDMA3CC_OPT_TCINTEN_SHIFT), }; + pbdrv_cache_prepare_before_dma(uart->write_buf, length); + // Save configuration and start transfer. EDMA3SetPaRAM(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, (EDMA3CCPaRAMEntry *)¶mSet); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, EDMA3_TRIG_MODE_EVENT); diff --git a/lib/pbio/drv/usb/usb_ev3.c b/lib/pbio/drv/usb/usb_ev3.c index 122551498..9fcb0897b 100644 --- a/lib/pbio/drv/usb/usb_ev3.c +++ b/lib/pbio/drv/usb/usb_ev3.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -220,7 +221,7 @@ static bool pbdrv_usb_is_usb_hs; static bool pbdrv_usb_is_events_subscribed; // Buffers, used for different logical flows on the data endpoint -static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS]; +static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS] PBDRV_DMA_BUF; static uint8_t ep1_tx_response_buf[PYBRICKS_EP_PKT_SZ_HS]; static uint8_t ep1_tx_status_buf[PYBRICKS_EP_PKT_SZ_HS]; static uint8_t ep1_tx_stdout_buf[PYBRICKS_EP_PKT_SZ_HS]; @@ -283,7 +284,7 @@ static uint32_t cppi_linking_ram[CPPI_DESC_COUNT]; // Fill in the CPPI DMA descriptor to receive a packet static void usb_setup_rx_dma_desc(void) { - cppi_descriptors[CPPI_DESC_RX] = (usb_cppi_hpd_t) { + PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX]) = (usb_cppi_hpd_t) { .word0 = { .hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE, }, @@ -307,7 +308,7 @@ static void usb_setup_rx_dma_desc(void) { // Fill in the CPPI DMA descriptor to send a packet static void usb_setup_tx_dma_desc(int tx_type, void *buf, uint32_t buf_len) { - cppi_descriptors[tx_type] = (usb_cppi_hpd_t) { + PBDRV_UNCACHED(cppi_descriptors[tx_type]) = (usb_cppi_hpd_t) { .word0 = { .hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE, .pktLength = buf_len, @@ -904,11 +905,13 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c // (which is technically allowed by the as-if rule). pbdrv_compiler_memory_barrier(); - uint32_t usb_rx_sz = cppi_descriptors[CPPI_DESC_RX].word0.pktLength; + uint32_t usb_rx_sz = PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX].word0).pktLength; pbio_pybricks_error_t result; bool usb_send_response = false; // Skip empty commands, or commands sent when previous response is still busy if (usb_rx_sz && !usb_tx_response_is_not_ready) { + pbdrv_cache_prepare_after_dma(ep1_rx_buf, sizeof(ep1_rx_buf)); + switch (ep1_rx_buf[0]) { case PBIO_PYBRICKS_OUT_EP_MSG_SUBSCRIBE: pbio_os_timer_set(&keepalive_timer, 1000); @@ -928,6 +931,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c if (usb_send_response) { usb_tx_response_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_response_buf, sizeof(ep1_tx_response_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_RESPONSE, ep1_tx_response_buf, PBIO_PYBRICKS_USB_MESSAGE_SIZE(sizeof(uint32_t))); } @@ -944,6 +948,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c uint32_t usb_status_sz = PBIO_PYBRICKS_USB_MESSAGE_SIZE(pbsys_status_get_status_report(&ep1_tx_status_buf[1])); usb_tx_status_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_status_buf, sizeof(ep1_tx_status_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_STATUS, ep1_tx_status_buf, usb_status_sz); prev_status_flags = new_status_flags; @@ -1124,6 +1129,7 @@ pbio_error_t pbdrv_usb_stdout_tx(const uint8_t *data, uint32_t *size) { memcpy(ptr, data, *size); usb_tx_stdout_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_stdout_buf, sizeof(ep1_tx_stdout_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_STDOUT, ep1_tx_stdout_buf, 2 + *size); return PBIO_SUCCESS; diff --git a/lib/pbio/include/pbdrv/cache.h b/lib/pbio/include/pbdrv/cache.h index 3824da292..11621edcd 100644 --- a/lib/pbio/include/pbdrv/cache.h +++ b/lib/pbio/include/pbdrv/cache.h @@ -24,6 +24,6 @@ void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz); #define PBDRV_CACHE_LINE_SZ 32 // Align data to a cache line, which is needed for clean RX DMA -#define PBDRV_CACHE_LINE_ALIGNED __attribute__((aligned(PBDRV_CACHE_LINE_SZ))) +#define PBDRV_DMA_BUF __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".dma"))) #endif diff --git a/lib/pbio/platform/ev3/platform.c b/lib/pbio/platform/ev3/platform.c index 1131aea79..0bc63ce1c 100644 --- a/lib/pbio/platform/ev3/platform.c +++ b/lib/pbio/platform/ev3/platform.c @@ -721,8 +721,8 @@ static void mmu_init(void) { // Off-chip main DDR RAM for (unsigned int i = 0; i < SYSTEM_RAM_SZ_MB; i++) { uint32_t addr = 0xC0000000 + i * MMU_SECTION_SZ; - // TODO: Enable caching once DMA code is upgraded to handle cache - l1_page_table[addr >> MMU_SECTION_SHIFT] = MMU_L1_SECTION(addr, 0, 1, 0, 0); + // Enable write-back caching + l1_page_table[addr >> MMU_SECTION_SHIFT] = MMU_L1_SECTION(addr, 0, 1, 1, 1); } // Off-chip main DDR RAM, uncacheable mirror @ 0xD0000000 for (unsigned int i = 0; i < SYSTEM_RAM_SZ_MB; i++) { diff --git a/lib/pbio/platform/ev3/platform.ld b/lib/pbio/platform/ev3/platform.ld index 4eb875f8e..ab28e82e4 100644 --- a/lib/pbio/platform/ev3/platform.ld +++ b/lib/pbio/platform/ev3/platform.ld @@ -66,7 +66,11 @@ SECTIONS _bss_start = .; *(.bss) *(.bss.*) - . = ALIGN(4); + /* Put DMA RX buffers here so that they never share cache lines with + unrelated data. This makes sure that they can be invalidated properly. */ + . = ALIGN(32); + *(.dma) + . = ALIGN(32); _bss_end = .; } > DDR