Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 55 additions & 28 deletions lib/pbio/drv/block_device/block_device_ev3.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "block_device_ev3.h"

#include <pbdrv/block_device.h>
#include <pbdrv/cache.h>
#include <pbdrv/clock.h>
#include <pbdrv/compiler.h>
#include <pbdrv/gpio.h>
Expand Down Expand Up @@ -93,17 +94,27 @@ enum {
static struct {
/** HAL Transfer status */
volatile spi_status_t status;
// This is used when SPI only needs to receive. It should always stay as 0.
uint8_t tx_dummy_byte;
// This is used when received data is to be discarded. Its value should be ignored.
uint8_t rx_dummy_byte;
// This stores the RX buffer address so that we clean the cache when DMA is complete
uint32_t rx_user_buf_addr;
// This stores the RX buffer size;
uint32_t rx_user_buf_sz;
} spi_dev;

/**
* SPI small DMA buffers
*/
static struct {
// This is used when transmitting so that the last byte clears CSHOLD.
uint32_t tx_last_word;
// This is used to hold the initial command to the SPI peripheral.
uint8_t spi_cmd_buf_tx[SPI_CMD_BUF_SZ];
// This is used to hold the replies to commands to the SPI peripheral.
uint8_t spi_cmd_buf_rx[SPI_CMD_BUF_SZ];
} spi_dev;
// This is used when SPI only needs to receive. It should always stay as 0.
uint8_t tx_dummy_byte;
// This is used when received data is to be discarded. Its value should be ignored.
uint8_t rx_dummy_byte;
} spi_dev_bufs PBDRV_DMA_BUF;

static uint32_t last_spi_dma_complete_time;

Expand All @@ -114,6 +125,10 @@ static void spi_dma_complete(void) {
}
SPIIntDisable(SOC_SPI_0_REGS, SPI_DMA_REQUEST_ENA_INT);
pbio_os_request_poll();
pbdrv_cache_prepare_after_dma(&spi_dev_bufs, sizeof(spi_dev_bufs));
if (spi_dev.rx_user_buf_addr && spi_dev.rx_user_buf_sz) {
pbdrv_cache_prepare_after_dma((void *)spi_dev.rx_user_buf_addr, spi_dev.rx_user_buf_sz);
}
last_spi_dma_complete_time = pbdrv_clock_get_ms();
}

Expand Down Expand Up @@ -153,7 +168,7 @@ static void spi0_isr(void) {
continue;
}

spi_dev.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
spi_dev_bufs.spi_cmd_buf_rx[0] = HWREG(SOC_SPI_0_REGS + SPI_SPIBUF);
spi_dev.status &= ~SPI_STATUS_WAIT_RX;
SPIIntDisable(SOC_SPI_0_REGS, SPI_RECV_INT);
pbio_os_request_poll();
Expand Down Expand Up @@ -351,19 +366,24 @@ static pbio_error_t spi_begin_for_flash(

spi_dev.status = SPI_STATUS_WAIT_RX;

// Prevent write to spi_dev.status from being reordered
pbdrv_compiler_memory_barrier();

uint32_t tx = spi0_last_dat1_for_flash(cmd[0]);
SPIIntEnable(SOC_SPI_0_REGS, SPI_RECV_INT);
HWREG(SOC_SPI_0_REGS + SPI_SPIDAT1) = tx;
} else {
memcpy(&spi_dev.spi_cmd_buf_tx, cmd, cmd_len);
memcpy(spi_dev_bufs.spi_cmd_buf_tx, cmd, cmd_len);
spi_dev.rx_user_buf_addr = (uint32_t)user_data_rx;
spi_dev.rx_user_buf_sz = user_data_len;

if (user_data_len == 0) {
// Only a command, no user data

spi_dev.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(cmd[cmd_len - 1]);

// TX everything except last byte
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
ps.p.aCnt = 1;
ps.p.bCnt = cmd_len - 1;
Expand All @@ -378,7 +398,7 @@ static pbio_error_t spi_begin_for_flash(
edma3_set_param(EDMA3_CHA_SPI0_TX, &ps);

// TX last byte, clearing CSHOLD
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
ps.p.aCnt = 4;
ps.p.bCnt = 1;
ps.p.linkAddr = 0xffff;
Expand All @@ -387,7 +407,7 @@ static pbio_error_t spi_begin_for_flash(

// RX all bytes
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
ps.p.aCnt = 1;
ps.p.bCnt = cmd_len;
ps.p.cCnt = 1;
Expand All @@ -403,7 +423,7 @@ static pbio_error_t spi_begin_for_flash(
// Command *and* user data

// TX the command
ps.p.srcAddr = (unsigned int)(&spi_dev.spi_cmd_buf_tx);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_tx);
ps.p.destAddr = SOC_SPI_0_REGS + SPI_SPIDAT1;
ps.p.aCnt = 1;
ps.p.bCnt = cmd_len;
Expand All @@ -418,7 +438,8 @@ static pbio_error_t spi_begin_for_flash(
edma3_set_param(EDMA3_CHA_SPI0_TX, &ps);

if (user_data_tx) {
spi_dev.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);
pbdrv_cache_prepare_before_dma(user_data_tx, user_data_len);
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(user_data_tx[user_data_len - 1]);

// TX all but the last byte
ps.p.srcAddr = (unsigned int)(user_data_tx);
Expand All @@ -427,24 +448,24 @@ static pbio_error_t spi_begin_for_flash(
edma3_set_param(126, &ps);

// TX the last byte, clearing CSHOLD
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
ps.p.aCnt = 4;
ps.p.bCnt = 1;
ps.p.linkAddr = 0xffff;
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_TX << EDMA3CC_OPT_TCC_SHIFT);
edma3_set_param(127, &ps);
} else {
spi_dev.tx_last_word = spi0_last_dat1_for_flash(0);
spi_dev_bufs.tx_last_word = spi0_last_dat1_for_flash(0);

// TX all but the last byte
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_dummy_byte);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_dummy_byte);
ps.p.bCnt = user_data_len - 1;
ps.p.srcBIdx = 0;
ps.p.linkAddr = 127 * 32;
edma3_set_param(126, &ps);

// TX the last byte, clearing CSHOLD
ps.p.srcAddr = (unsigned int)(&spi_dev.tx_last_word);
ps.p.srcAddr = (unsigned int)(&spi_dev_bufs.tx_last_word);
ps.p.aCnt = 4;
ps.p.bCnt = 1;
ps.p.linkAddr = 0xffff;
Expand All @@ -454,7 +475,7 @@ static pbio_error_t spi_begin_for_flash(

// RX the command
ps.p.srcAddr = SOC_SPI_0_REGS + SPI_SPIBUF;
ps.p.destAddr = (unsigned int)(&spi_dev.spi_cmd_buf_rx);
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.spi_cmd_buf_rx);
ps.p.aCnt = 1;
ps.p.bCnt = cmd_len;
ps.p.cCnt = 1;
Expand All @@ -476,7 +497,7 @@ static pbio_error_t spi_begin_for_flash(
edma3_set_param(125, &ps);
} else {
// RX dummy
ps.p.destAddr = (unsigned int)(&spi_dev.rx_dummy_byte);
ps.p.destAddr = (unsigned int)(&spi_dev_bufs.rx_dummy_byte);
ps.p.bCnt = user_data_len;
ps.p.destBIdx = 0;
ps.p.linkAddr = 0xffff;
Expand All @@ -487,8 +508,9 @@ static pbio_error_t spi_begin_for_flash(

spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX;

// TODO: eventually needs DMA cache management
pbdrv_compiler_memory_barrier();
// Make sure writes to tx buffer leave cache
// (we already flush the user buffer earlier)
pbdrv_cache_prepare_before_dma(&spi_dev_bufs, sizeof(spi_dev_bufs));

EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT);
EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_RX, EDMA3_TRIG_MODE_EVENT);
Expand Down Expand Up @@ -600,7 +622,7 @@ static pbio_error_t flash_wait_write(pbio_os_state_t *state) {
}
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);

status = spi_dev.spi_cmd_buf_rx[1];
status = spi_dev_bufs.spi_cmd_buf_rx[1];
} while (status & FLASH_STATUS_BUSY);

PBIO_OS_ASYNC_END(PBIO_SUCCESS);
Expand Down Expand Up @@ -722,7 +744,7 @@ static const uint32_t channel_cmd[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_
MANUAL_ADC_CHANNEL(15),
MANUAL_ADC_CHANNEL(15),
};
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
static volatile uint16_t channel_data[PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES] PBDRV_DMA_BUF;

pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
if (ch >= PBDRV_CONFIG_ADC_EV3_ADC_NUM_CHANNELS) {
Expand All @@ -733,8 +755,8 @@ pbio_error_t pbdrv_adc_get_ch(uint8_t ch, uint16_t *value) {
uint16_t a, b;
do {
// Values for the requested channel are received several samples later.
a = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
b = channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES];
a = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
b = PBDRV_UNCACHED(channel_data[ch + PBDRV_ADC_EV3_NUM_DELAY_SAMPLES]);
} while (a != b);

// Mask the data to 10 bits
Expand Down Expand Up @@ -804,9 +826,14 @@ static pbio_error_t pbdrv_block_device_ev3_spi_begin_for_adc(const uint32_t *cmd
ps.p.opt = EDMA3CC_OPT_TCINTEN | (EDMA3_CHA_SPI0_RX << EDMA3CC_OPT_TCC_SHIFT);
edma3_set_param(EDMA3_CHA_SPI0_RX, &ps);

// We play dangerously and don't flush the cache for the ADC.
// The commands are const, and the values are read through an uncached mapping.
spi_dev.rx_user_buf_addr = 0;
spi_dev.rx_user_buf_sz = 0;

spi_dev.status = SPI_STATUS_WAIT_TX | SPI_STATUS_WAIT_RX;

// TODO: eventually needs DMA cache management
// Prevent write to spi_dev.status from being reordered
pbdrv_compiler_memory_barrier();

EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, EDMA3_CHA_SPI0_TX, EDMA3_TRIG_MODE_EVENT);
Expand Down Expand Up @@ -836,7 +863,7 @@ static struct {
pbsys_storage_data_map_t data_map;
uint8_t data[PBDRV_CONFIG_BLOCK_DEVICE_RAM_SIZE];
};
} ramdisk __attribute__((section(".noinit"), used));
} ramdisk __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".noinit"), used));

uint32_t pbdrv_block_device_get_writable_size(void) {
return PBDRV_CONFIG_BLOCK_DEVICE_EV3_SIZE - sizeof(ramdisk.saved_size);
Expand Down Expand Up @@ -869,7 +896,7 @@ pbio_error_t ev3_spi_process_thread(pbio_os_state_t *state, void *context) {
return err;
}
PBIO_OS_AWAIT_WHILE(state, spi_dev.status & SPI_STATUS_WAIT_ANY);
if (memcmp(device_id, &spi_dev.spi_cmd_buf_rx[1], sizeof(device_id))) {
if (memcmp(device_id, &spi_dev_bufs.spi_cmd_buf_rx[1], sizeof(device_id))) {
return PBIO_ERROR_FAILED;
}

Expand Down
3 changes: 3 additions & 0 deletions lib/pbio/drv/display/display_ev3.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <stdio.h>
#include <string.h>

#include <pbdrv/cache.h>
#include <pbdrv/display.h>
#include <pbdrv/gpio.h>

Expand Down Expand Up @@ -363,6 +364,8 @@ void pbdrv_display_st7586s_write_data_begin(uint8_t *data, uint32_t size) {
spi_status = SPI_STATUS_WAIT;
pbdrv_gpio_out_low(&pin_lcd_cs);

pbdrv_cache_prepare_before_dma(data, size);

// Parameter object must be volatile since it is copied byte-by-byte in the
// TI API, causing it to be optimized out.
volatile EDMA3CCPaRAMEntry paramSet = {
Expand Down
3 changes: 3 additions & 0 deletions lib/pbio/drv/uart/uart_ev3.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include <contiki.h>
#include <contiki-lib.h>

#include <pbdrv/cache.h>
#include <pbdrv/uart.h>

#include <pbio/busy_count.h>
Expand Down Expand Up @@ -220,6 +221,8 @@ pbio_error_t pbdrv_uart_write_hw(pbio_os_state_t *state, pbdrv_uart_dev_t *uart,
.opt = EDMA3CC_OPT_DAM | ((pdata->sys_int_uart_tx_int_id << EDMA3CC_OPT_TCC_SHIFT) & EDMA3CC_OPT_TCC) | (1 << EDMA3CC_OPT_TCINTEN_SHIFT),
};

pbdrv_cache_prepare_before_dma(uart->write_buf, length);

// Save configuration and start transfer.
EDMA3SetPaRAM(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, (EDMA3CCPaRAMEntry *)&paramSet);
EDMA3EnableTransfer(SOC_EDMA30CC_0_REGS, pdata->sys_int_uart_tx_int_id, EDMA3_TRIG_MODE_EVENT);
Expand Down
14 changes: 10 additions & 4 deletions lib/pbio/drv/usb/usb_ev3.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <string.h>

#include <pbdrv/bluetooth.h>
#include <pbdrv/cache.h>
#include <pbdrv/compiler.h>
#include <pbdrv/usb.h>
#include <pbio/os.h>
Expand Down Expand Up @@ -220,7 +221,7 @@ static bool pbdrv_usb_is_usb_hs;
static bool pbdrv_usb_is_events_subscribed;

// Buffers, used for different logical flows on the data endpoint
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS];
static uint8_t ep1_rx_buf[PYBRICKS_EP_PKT_SZ_HS] PBDRV_DMA_BUF;
static uint8_t ep1_tx_response_buf[PYBRICKS_EP_PKT_SZ_HS];
static uint8_t ep1_tx_status_buf[PYBRICKS_EP_PKT_SZ_HS];
static uint8_t ep1_tx_stdout_buf[PYBRICKS_EP_PKT_SZ_HS];
Expand Down Expand Up @@ -283,7 +284,7 @@ static uint32_t cppi_linking_ram[CPPI_DESC_COUNT];

// Fill in the CPPI DMA descriptor to receive a packet
static void usb_setup_rx_dma_desc(void) {
cppi_descriptors[CPPI_DESC_RX] = (usb_cppi_hpd_t) {
PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX]) = (usb_cppi_hpd_t) {
.word0 = {
.hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE,
},
Expand All @@ -307,7 +308,7 @@ static void usb_setup_rx_dma_desc(void) {

// Fill in the CPPI DMA descriptor to send a packet
static void usb_setup_tx_dma_desc(int tx_type, void *buf, uint32_t buf_len) {
cppi_descriptors[tx_type] = (usb_cppi_hpd_t) {
PBDRV_UNCACHED(cppi_descriptors[tx_type]) = (usb_cppi_hpd_t) {
.word0 = {
.hostPktType = CPPI_HOST_PACKET_DESCRIPTOR_TYPE,
.pktLength = buf_len,
Expand Down Expand Up @@ -904,11 +905,13 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c
// (which is technically allowed by the as-if rule).
pbdrv_compiler_memory_barrier();

uint32_t usb_rx_sz = cppi_descriptors[CPPI_DESC_RX].word0.pktLength;
uint32_t usb_rx_sz = PBDRV_UNCACHED(cppi_descriptors[CPPI_DESC_RX].word0).pktLength;
pbio_pybricks_error_t result;
bool usb_send_response = false;
// Skip empty commands, or commands sent when previous response is still busy
if (usb_rx_sz && !usb_tx_response_is_not_ready) {
pbdrv_cache_prepare_after_dma(ep1_rx_buf, sizeof(ep1_rx_buf));

switch (ep1_rx_buf[0]) {
case PBIO_PYBRICKS_OUT_EP_MSG_SUBSCRIBE:
pbio_os_timer_set(&keepalive_timer, 1000);
Expand All @@ -928,6 +931,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c

if (usb_send_response) {
usb_tx_response_is_not_ready = true;
pbdrv_cache_prepare_before_dma(ep1_tx_response_buf, sizeof(ep1_tx_response_buf));
usb_setup_tx_dma_desc(CPPI_DESC_TX_RESPONSE, ep1_tx_response_buf, PBIO_PYBRICKS_USB_MESSAGE_SIZE(sizeof(uint32_t)));
}

Expand All @@ -944,6 +948,7 @@ static pbio_error_t pbdrv_usb_ev3_process_thread(pbio_os_state_t *state, void *c
uint32_t usb_status_sz = PBIO_PYBRICKS_USB_MESSAGE_SIZE(pbsys_status_get_status_report(&ep1_tx_status_buf[1]));

usb_tx_status_is_not_ready = true;
pbdrv_cache_prepare_before_dma(ep1_tx_status_buf, sizeof(ep1_tx_status_buf));
usb_setup_tx_dma_desc(CPPI_DESC_TX_STATUS, ep1_tx_status_buf, usb_status_sz);

prev_status_flags = new_status_flags;
Expand Down Expand Up @@ -1124,6 +1129,7 @@ pbio_error_t pbdrv_usb_stdout_tx(const uint8_t *data, uint32_t *size) {
memcpy(ptr, data, *size);

usb_tx_stdout_is_not_ready = true;
pbdrv_cache_prepare_before_dma(ep1_tx_stdout_buf, sizeof(ep1_tx_stdout_buf));
usb_setup_tx_dma_desc(CPPI_DESC_TX_STDOUT, ep1_tx_stdout_buf, 2 + *size);

return PBIO_SUCCESS;
Expand Down
29 changes: 29 additions & 0 deletions lib/pbio/include/pbdrv/cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2025 The Pybricks Authors

#ifndef _PBDRV_CACHE_H_

#include <stddef.h>

// Gets data ready so that the data that we (the CPU) have written
// can be read by DMA peripherals. This cleans the relevant cache lines
// and flushes the write buffer.
void pbdrv_cache_prepare_before_dma(const void *buf, size_t sz);

// Makes sure that we (the CPU) are able to read data written
// by DMA peripherals. This invalidates the relevant cache lines.
void pbdrv_cache_prepare_after_dma(const void *buf, size_t sz);

// Accesses a variable via the uncached memory alias
#define PBDRV_UNCACHED(x) (*(volatile __typeof__(x) *)((uint32_t)(&(x)) + 0x10000000))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems very specific to EV3, so we should rename things accordingly. Or just put this in the tiam1808 library.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely not in tiam1808, since it's specific to the way we've specifically programmed the MMU. PBDRV_EV3_UNCACHED?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK. Then I would expect a macro for the 0x10000000 value then that is shared between this and where we program the MMU.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also wondering about the uint32_t. Would it make more sense to use uintptr_t?


// Gets the address of the uncached memory alias of a variable
#define PBDRV_UNCACHED_ADDR(x) ((__typeof__(x) *)((uint32_t)(&(x)) + 0x10000000))

// Cache line size
#define PBDRV_CACHE_LINE_SZ 32

// Align data to a cache line, which is needed for clean RX DMA
#define PBDRV_DMA_BUF __attribute__((aligned(PBDRV_CACHE_LINE_SZ), section(".dma")))

#endif
Loading