diff --git a/lib/pbio/drv/block_device/block_device_ev3.c b/lib/pbio/drv/block_device/block_device_ev3.c index 9198a64c8..3ba1d4b17 100644 --- a/lib/pbio/drv/block_device/block_device_ev3.c +++ b/lib/pbio/drv/block_device/block_device_ev3.c @@ -34,6 +34,7 @@ #include "block_device_ev3.h" #include +#include #include #include #include @@ -93,17 +94,27 @@ enum { static struct { /** HAL Transfer status */ volatile spi_status_t status; - // This is used when SPI only needs to receive. It should always stay as 0. - uint8_t tx_dummy_byte; - // This is used when received data is to be discarded. Its value should be ignored. - uint8_t rx_dummy_byte; + // This stores the RX buffer address so that we clean the cache when DMA is complete + uint32_t rx_user_buf_addr; + // This stores the RX buffer size; + uint32_t rx_user_buf_sz; +} spi_dev; + +/** + * SPI small DMA buffers + */ +static struct { // This is used when transmitting so that the last byte clears CSHOLD. uint32_t tx_last_word; // This is used to hold the initial command to the SPI peripheral. uint8_t spi_cmd_buf_tx[SPI_CMD_BUF_SZ]; // This is used to hold the replies to commands to the SPI peripheral. uint8_t spi_cmd_buf_rx[SPI_CMD_BUF_SZ]; -} spi_dev; + // This is used when SPI only needs to receive. It should always stay as 0. + uint8_t tx_dummy_byte; + // This is used when received data is to be discarded. Its value should be ignored. + uint8_t rx_dummy_byte; +} spi_dev_bufs PBDRV_DMA_BUF; static uint32_t last_spi_dma_complete_time; @@ -114,6 +125,10 @@ static void spi_dma_complete(void) { } SPIIntDisable(SOC_SPI_0_REGS, SPI_DMA_REQUEST_ENA_INT); pbio_os_request_poll(); + pbdrv_cache_prepare_after_dma(&spi_dev_bufs, sizeof(spi_dev_bufs)); + if (spi_dev.rx_user_buf_addr && spi_dev.rx_user_buf_sz) { + pbdrv_cache_prepare_after_dma((void *)spi_dev.rx_user_buf_addr, spi_dev.rx_user_buf_sz); + } last_spi_dma_complete_time = pbdrv_clock_get_ms(); } @@ -153,7 +168,7 @@ static void spi0_isr(void) { continue; } - spi_dev.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF); + spi_dev_bufs.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF); spi_dev.status &= ~SPI_STATUS_WAIT_RX; SPIIntDisable(SOC_SPI_0_REGS, SPI_RECV_INT); pbio_os_request_poll(); @@ -351,19 +366,24 @@ static pbio_error_t spi_begin_for_flash( spi_dev.status = SPI_STATUS_WAIT_RX; + // Prevent write to spi_dev.status from being reordered + pbdrv_compiler_memory_barrier(); + uint32_t tx = spi0_last_dat1_for_flash(cmd[0]); SPIIntEnable(SOC_SPI_0_REGS, SPI_RECV_INT); HWREG(SOC_SPI_0_REGS + SPI_SPIDAT1) = tx; } else { - memcpy(&spi_dev.spi_cmd_buf_tx, cmd, cmd_len); + memcpy(spi_dev_bufs.spi_cmd_buf_tx, cmd, cmd_len); + spi_dev.rx_user_buf_addr = (uint32_t)user_data_rx; + spi_dev.rx_user_buf_sz = user_data_len; if (user_data_len == 0) { // Only a command, no user data - spi_dev.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]); // TX everything except last byte - ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx); ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1; ps.p.aCnt = 1; ps.p.bCnt = cmd_len - 1; @@ -378,7 +398,7 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(EDMA3_CHA_SPI0_TX, &ps); // TX last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; @@ -387,7 +407,7 @@ static pbio_error_t spi_begin_for_flash( // RX all bytes ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF; - ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx); ps.p.aCnt = 1; ps.p.bCnt = cmd_len; ps.p.cCnt = 1; @@ -403,7 +423,7 @@ static pbio_error_t spi_begin_for_flash( // Command *and* user data // TX the command - ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx); ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1; ps.p.aCnt = 1; ps.p.bCnt = cmd_len; @@ -418,7 +438,8 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(EDMA3_CHA_SPI0_TX, &ps); if (user_data_tx) { - spi_dev.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]); + pbdrv_cache_prepare_before_dma(user_data_tx, user_data_len); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]); // TX all but the last byte ps.p.srcAddr = (unsigned int)(user_data_tx); @@ -427,24 +448,24 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(126, &ps); // TX the last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_TX << EDMA3CC_OPT_TCC_SHIFT); edma3_set_param(127, &ps); } else { - spi_dev.tx_last_word = spi0_last_dat1_for_flash(0); + spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(0); // TX all but the last byte - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_dummy_byte); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_dummy_byte); ps.p.bCnt = user_data_len - 1; ps.p.srcBIdx = 0; ps.p.linkAddr = 127 * 32; edma3_set_param(126, &ps); // TX the last byte, clearing CSHOLD - ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word); + ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word); ps.p.aCnt = 4; ps.p.bCnt = 1; ps.p.linkAddr = 0xffff; @@ -454,7 +475,7 @@ static pbio_error_t spi_begin_for_flash( // RX the command ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF; - ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx); ps.p.aCnt = 1; ps.p.bCnt = cmd_len; ps.p.cCnt = 1; @@ -476,7 +497,7 @@ static pbio_error_t spi_begin_for_flash( edma3_set_param(125, &ps); } else { // RX dummy - ps.p.destAddr = (unsigned int)(&spi_dev.rx_dummy_byte); + ps.p.destAddr = (unsigned int)(&spi_dev_bufs.rx_dummy_byte); ps.p.bCnt = user_data_len; ps.p.destBIdx = 0; ps.p.linkAddr = 0xffff; @@ -487,8 +508,9 @@ static pbio_error_t spi_begin_for_flash( spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX; - // TODO: eventually needs DMA cache management - pbdrv_compiler_memory_barrier(); + // Make sure writes to tx buffer leave cache + // (we already flush the user buffer earlier) + pbdrv_cache_prepare_before_dma(&spi_dev_bufs, sizeof(spi_dev_bufs)); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_RX, EDMA3_TRIG_MODE_EVENT); @@ -600,7 +622,7 @@ static pbio_error_t flash_wait_write(pbio_os_state_t *state) { } PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY); - status = spi_dev.spi_cmd_buf_rx[1]; + status = spi_dev_bufs.spi_cmd_buf_rx[1]; } while (status & FLASH_STATUS_BUSY); PBIO_OS_ASYNC_END(PBIO_SUCCESS); @@ -722,7 +744,7 @@ static const uint32_t channel_cmd[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ MANUAL_ADC_CHANNEL(15), MANUAL_ADC_CHANNEL(15), }; -static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; +static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES] PBDRV_DMA_BUF; pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) { if (ch >= PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS) { @@ -733,8 +755,8 @@ pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) { uint16_t a, b; do { // Values for the requested channel are received several samples later. - a = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; - b = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]; + a = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]); + b = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]); } while (a != b); // Mask the data to 10 bits @@ -804,9 +826,14 @@ static pbio_error_t pbdrv_block_device_ev3_spi_begin_for_adc(const uint32_t *cmd ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_RX << EDMA3CC_OPT_TCC_SHIFT); edma3_set_param(EDMA3_CHA_SPI0_RX, &ps); + // We play dangerously and don't flush the cache for the ADC. + // The commands are const, and the values are read through an uncached mapping. + spi_dev.rx_user_buf_addr = 0; + spi_dev.rx_user_buf_sz = 0; + spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX; - // TODO: eventually needs DMA cache management + // Prevent write to spi_dev.status from being reordered pbdrv_compiler_memory_barrier(); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT); @@ -836,7 +863,7 @@ static struct { pbsys_storage_data_map_t data_map; uint8_t data[PBDRV_CONFIG_BLOCK_DEVICE_RAM_SIZE]; }; -} ramdisk __attribute__((section(".noinit"), used)); +} ramdisk __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".noinit"), used)); uint32_t pbdrv_block_device_get_writable_size(void) { return PBDRV_CONFIG_BLOCK_DEVICE_EV3_SIZE - sizeof(ramdisk.saved_size); @@ -869,7 +896,7 @@ pbio_error_t ev3_spi_process_thread(pbio_os_state_t *state, void *context) { return err; } PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY); - if (memcmp(device_id, &spi_dev.spi_cmd_buf_rx[1], sizeof(device_id))) { + if (memcmp(device_id, &spi_dev_bufs.spi_cmd_buf_rx[1], sizeof(device_id))) { return PBIO_ERROR_FAILED; } diff --git a/lib/pbio/drv/display/display_ev3.c b/lib/pbio/drv/display/display_ev3.c index abcc32fef..ce3eecde2 100644 --- a/lib/pbio/drv/display/display_ev3.c +++ b/lib/pbio/drv/display/display_ev3.c @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -363,6 +364,8 @@ void pbdrv_display_st7586s_write_data_begin(uint8_t *data, uint32_t size) { spi_status = SPI_STATUS_WAIT; pbdrv_gpio_out_low(&pin_lcd_cs); + pbdrv_cache_prepare_before_dma(data, size); + // Parameter object must be volatile since it is copied byte-by-byte in the // TI API, causing it to be optimized out. volatile EDMA3CCPaRAMEntry paramSet = { diff --git a/lib/pbio/drv/uart/uart_ev3.c b/lib/pbio/drv/uart/uart_ev3.c index b53ed9c90..a2201a550 100644 --- a/lib/pbio/drv/uart/uart_ev3.c +++ b/lib/pbio/drv/uart/uart_ev3.c @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -220,6 +221,8 @@ pbio_error_t pbdrv_uart_write_hw(pbio_os_state_t *state, pbdrv_uart_dev_t *uart, .opt = EDMA3CC_OPT_DAM | ((pdata->sys_int_uart_tx_int_id << EDMA3CC_OPT_TCC_SHIFT) & EDMA3CC_OPT_TCC) | (1 << EDMA3CC_OPT_TCINTEN_SHIFT), }; + pbdrv_cache_prepare_before_dma(uart->write_buf, length); + // Save configuration and start transfer. EDMA3SetPaRAM(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, (EDMA3CCPaRAMEntry *)¶mSet); EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, EDMA3_TRIG_MODE_EVENT); diff --git a/lib/pbio/drv/usb/usb_ev3.c b/lib/pbio/drv/usb/usb_ev3.c index 122551498..9fcb0897b 100644 --- a/lib/pbio/drv/usb/usb_ev3.c +++ b/lib/pbio/drv/usb/usb_ev3.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -220,7 +221,7 @@ static bool pbdrv_usb_is_usb_hs; static bool pbdrv_usb_is_events_subscribed; // Buffers, used for different logical flows on the data endpoint -static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS]; +static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS] PBDRV_DMA_BUF; static uint8_t ep1_tx_response_buf[PYBRICKS_EP_PKT_SZ_HS]; static uint8_t ep1_tx_status_buf[PYBRICKS_EP_PKT_SZ_HS]; static uint8_t ep1_tx_stdout_buf[PYBRICKS_EP_PKT_SZ_HS]; @@ -283,7 +284,7 @@ static uint32_t cppi_linking_ram[CPPI_DESC_COUNT]; // Fill in the CPPI DMA descriptor to receive a packet static void usb_setup_rx_dma_desc(void) { - cppi_descriptors[CPPI_DESC_RX] = (usb_cppi_hpd_t) { + PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX]) = (usb_cppi_hpd_t) { .word0 = { .hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE, }, @@ -307,7 +308,7 @@ static void usb_setup_rx_dma_desc(void) { // Fill in the CPPI DMA descriptor to send a packet static void usb_setup_tx_dma_desc(int tx_type, void *buf, uint32_t buf_len) { - cppi_descriptors[tx_type] = (usb_cppi_hpd_t) { + PBDRV_UNCACHED(cppi_descriptors[tx_type]) = (usb_cppi_hpd_t) { .word0 = { .hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE, .pktLength = buf_len, @@ -904,11 +905,13 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c // (which is technically allowed by the as-if rule). pbdrv_compiler_memory_barrier(); - uint32_t usb_rx_sz = cppi_descriptors[CPPI_DESC_RX].word0.pktLength; + uint32_t usb_rx_sz = PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX].word0).pktLength; pbio_pybricks_error_t result; bool usb_send_response = false; // Skip empty commands, or commands sent when previous response is still busy if (usb_rx_sz && !usb_tx_response_is_not_ready) { + pbdrv_cache_prepare_after_dma(ep1_rx_buf, sizeof(ep1_rx_buf)); + switch (ep1_rx_buf[0]) { case PBIO_PYBRICKS_OUT_EP_MSG_SUBSCRIBE: pbio_os_timer_set(&keepalive_timer, 1000); @@ -928,6 +931,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c if (usb_send_response) { usb_tx_response_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_response_buf, sizeof(ep1_tx_response_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_RESPONSE, ep1_tx_response_buf, PBIO_PYBRICKS_USB_MESSAGE_SIZE(sizeof(uint32_t))); } @@ -944,6 +948,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c uint32_t usb_status_sz = PBIO_PYBRICKS_USB_MESSAGE_SIZE(pbsys_status_get_status_report(&ep1_tx_status_buf[1])); usb_tx_status_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_status_buf, sizeof(ep1_tx_status_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_STATUS, ep1_tx_status_buf, usb_status_sz); prev_status_flags = new_status_flags; @@ -1124,6 +1129,7 @@ pbio_error_t pbdrv_usb_stdout_tx(const uint8_t *data, uint32_t *size) { memcpy(ptr, data, *size); usb_tx_stdout_is_not_ready = true; + pbdrv_cache_prepare_before_dma(ep1_tx_stdout_buf, sizeof(ep1_tx_stdout_buf)); usb_setup_tx_dma_desc(CPPI_DESC_TX_STDOUT, ep1_tx_stdout_buf, 2 + *size); return PBIO_SUCCESS; diff --git a/lib/pbio/include/pbdrv/cache.h b/lib/pbio/include/pbdrv/cache.h new file mode 100644 index 000000000..11621edcd --- /dev/null +++ b/lib/pbio/include/pbdrv/cache.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025 The Pybricks Authors + +#ifndef _PBDRV_CACHE_H_ + +#include + +// Gets data ready so that the data that we (the CPU) have written +// can be read by DMA peripherals. This cleans the relevant cache lines +// and flushes the write buffer. +void pbdrv_cache_prepare_before_dma(const void *buf, size_t sz); + +// Makes sure that we (the CPU) are able to read data written +// by DMA peripherals. This invalidates the relevant cache lines. +void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz); + +// Accesses a variable via the uncached memory alias +#define PBDRV_UNCACHED(x) (*(volatile __typeof__(x) *)((uint32_t)(&(x)) + 0x10000000)) + +// Gets the address of the uncached memory alias of a variable +#define PBDRV_UNCACHED_ADDR(x) ((__typeof__(x) *)((uint32_t)(&(x)) + 0x10000000)) + +// Cache line size +#define PBDRV_CACHE_LINE_SZ 32 + +// Align data to a cache line, which is needed for clean RX DMA +#define PBDRV_DMA_BUF __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".dma"))) + +#endif diff --git a/lib/pbio/platform/ev3/platform.c b/lib/pbio/platform/ev3/platform.c index c89c888b7..0bc63ce1c 100644 --- a/lib/pbio/platform/ev3/platform.c +++ b/lib/pbio/platform/ev3/platform.c @@ -55,6 +55,7 @@ #include +#include #include #include #include @@ -396,6 +397,22 @@ unsigned int EDMAVersionGet(void) { return 1; } +void pbdrv_cache_prepare_before_dma(const void *buf, size_t sz) { + // Make sure all data is written by the compiler... + pbdrv_compiler_memory_barrier(); + // and then make sure it's written out of the cache... + CP15DCacheCleanBuff((uint32_t)buf, sz); + // and also the write buffer, in case the cache missed. + CP15DrainWriteBuffer(); +} + +void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz) { + // Invalidate stale data in the cache... + CP15DCacheFlushBuff((uint32_t)buf, sz); + // and then make sure _subsequent_ reads cannot be reordered earlier. + pbdrv_compiler_memory_barrier(); +} + static void panic_puts(const char *c) { while (*c) { UARTCharPut(SOC_UART_1_REGS, *(c++)); @@ -704,8 +721,8 @@ static void mmu_init(void) { // Off-chip main DDR RAM for (unsigned int i = 0; i < SYSTEM_RAM_SZ_MB; i++) { uint32_t addr = 0xC0000000 + i * MMU_SECTION_SZ; - // TODO: Enable caching once DMA code is upgraded to handle cache - l1_page_table[addr >> MMU_SECTION_SHIFT] = MMU_L1_SECTION(addr, 0, 1, 0, 0); + // Enable write-back caching + l1_page_table[addr >> MMU_SECTION_SHIFT] = MMU_L1_SECTION(addr, 0, 1, 1, 1); } // Off-chip main DDR RAM, uncacheable mirror @ 0xD0000000 for (unsigned int i = 0; i < SYSTEM_RAM_SZ_MB; i++) { diff --git a/lib/pbio/platform/ev3/platform.ld b/lib/pbio/platform/ev3/platform.ld index 4eb875f8e..ab28e82e4 100644 --- a/lib/pbio/platform/ev3/platform.ld +++ b/lib/pbio/platform/ev3/platform.ld @@ -66,7 +66,11 @@ SECTIONS _bss_start = .; *(.bss) *(.bss.*) - . = ALIGN(4); + /* Put DMA RX buffers here so that they never share cache lines with + unrelated data. This makes sure that they can be invalidated properly. */ + . = ALIGN(32); + *(.dma) + . = ALIGN(32); _bss_end = .; } > DDR diff --git a/lib/tiam1808/drivers/cppi41dma.c b/lib/tiam1808/drivers/cppi41dma.c index 2dd3eff1f..c42d13887 100644 --- a/lib/tiam1808/drivers/cppi41dma.c +++ b/lib/tiam1808/drivers/cppi41dma.c @@ -1132,7 +1132,7 @@ unsigned int dmaTxCompletion(unsigned short usbDevInst, unsigned int ulEndpoint if(state == DMA_TX_COMPLETED) while ((HWREGH(usbInstance->usbBaseAddress + ulRegister) & 0x2) == 0x02); - CP15ICacheFlushBuff((unsigned int)completed_bd->buffAdd, sizeof(completed_bd->buffAdd)); + CP15DCacheFlushBuff((unsigned int)completed_bd->buffAdd, sizeof(completed_bd->buffAdd)); cppiDmaFreenBuffer((unsigned int *)completed_bd->buffAdd); @@ -1207,13 +1207,13 @@ unsigned int dmaRxCompletion(unsigned short usbDevInst, unsigned int ulEndpoint ->rxEndPoint[ulEndpoint].complettionq); /*Fush the cache to update the BD */ - CP15ICacheFlushBuff((unsigned int)rx_bd, sizeof(hostPacketDesc)); + CP15DCacheFlushBuff((unsigned int)rx_bd, sizeof(hostPacketDesc)); bufferAdd = rx_bd->buffAdd; length = rx_bd->buffLength; /*Flush the cache to update the buffer */ - CP15ICacheFlushBuff(bufferAdd, length); + CP15DCacheFlushBuff(bufferAdd, length); putFreeBd(rx_bd); diff --git a/lib/tiam1808/system_config/armv5/gcc/cp15.c b/lib/tiam1808/system_config/armv5/gcc/cp15.c index 39d5f28dc..6345af1b2 100644 --- a/lib/tiam1808/system_config/armv5/gcc/cp15.c +++ b/lib/tiam1808/system_config/armv5/gcc/cp15.c @@ -170,7 +170,7 @@ void CP15ICacheFlush(void) * \return None. * **/ -void CP15ICacheFlushBuff(unsigned int bufPtr, unsigned int size) +void CP15DCacheFlushBuff(unsigned int bufPtr, unsigned int size) { unsigned int ptr; @@ -208,6 +208,16 @@ void CP15DCacheCleanBuff(unsigned int bufPtr, unsigned int size) } } +/** + * \brief This function drains the CPU write buffer and acts as a data memory barrier. + */ +void CP15DrainWriteBuffer(void) +{ + __asm(" mov r0, #0\n\t" + " mcr p15, #0, r0, c7, c10, #4" + ::: "r0"); +} + /** * \brief This API Configures translation table base register with * with page table starting address. diff --git a/lib/tiam1808/tiam1808/armv5/cp15.h b/lib/tiam1808/tiam1808/armv5/cp15.h index 847e0cf69..0171197fc 100644 --- a/lib/tiam1808/tiam1808/armv5/cp15.h +++ b/lib/tiam1808/tiam1808/armv5/cp15.h @@ -60,8 +60,9 @@ extern void CP15ICacheFlush(void); extern void CP15TtbSet(unsigned int ttb); extern void CP15MMUDisable(void); extern void CP15MMUEnable(void); -extern void CP15ICacheFlushBuff(unsigned int ptr, unsigned int size); +extern void CP15DCacheFlushBuff(unsigned int ptr, unsigned int size); extern void CP15DCacheCleanBuff(unsigned int bufPtr, unsigned int size); +extern void CP15DrainWriteBuffer(void); extern uint32_t CP15GetDFSR(void); extern uint32_t CP15GetIFSR(void); extern uint32_t CP15GetFAR(void);