diff --git a/fel-to-spl-thunk.S b/fel-to-spl-thunk.S new file mode 100644 index 0000000000..93350b3759 --- /dev/null +++ b/fel-to-spl-thunk.S @@ -0,0 +1,172 @@ +/* + * Copyright © 2015 Siarhei Siamashka + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/*************************************************************************/ +/* Usage instructions: "ruby -x fel-to-spl-thunk.S > fel-to-spl-thunk.h" */ +/*************************************************************************/ + +#if 0 +#!/usr/bin/env ruby + +def tool_exists(tool_name) + `which #{tool_name} > /dev/null 2>&1` + return $?.to_i == 0 +end + +toolchains = [ + "arm-none-eabi-", + "arm-linux-gnueabihf-", + "arm-none-linux-gnueabi-", + "armv7a-hardfloat-linux-gnueabi-", +] + +toolchain = toolchains.find { |toolchain| tool_exists("#{toolchain}gcc") } +abort "Can't find any ARM crosscompiler\n" unless toolchain + +system("#{toolchain}gcc -o #{$PROGRAM_NAME}.o -c #{$PROGRAM_NAME}") +exit($?.to_i) if $?.to_i != 0 + +`#{toolchain}objdump -d #{$PROGRAM_NAME}.o`.each_line {|l| + next unless l =~ /(\h+)\:\s+(\h+)\s+(\S+)\s+([^;]*)/ + printf("\t0x%s, /* %8s: %-10s %-28s */\n", $2, $1, $3, $4.strip) +} + +__END__ +#endif + +/*************************************************************************/ + +BUF1 .req r0 +BUF2 .req r1 +TMP1 .req r2 +TMP2 .req r3 +SWAPTBL .req r4 +FULLSIZE .req r5 +BUFSIZE .req r6 +CHECKSUM .req r7 + +entry_point: + b setup_stack + +stack_begin: + nop + nop + nop + nop + nop + nop + nop + nop +stack_end: + nop + + /* A function, which walks the table and swaps all buffers */ +swap_all_buffers: + adr SWAPTBL, swaptbl_start +swap_next_buffer: + ldr BUF1, [SWAPTBL], #4 + ldr BUF2, [SWAPTBL], #4 + ldr BUFSIZE, [SWAPTBL], #4 + cmp BUFSIZE, #0 + bxeq lr +swap_next_word: + ldr TMP1, [BUF1] + ldr TMP2, [BUF2] + subs BUFSIZE, BUFSIZE, #4 + str TMP1, [BUF2], #4 + str TMP2, [BUF1], #4 + bne swap_next_word + b swap_next_buffer + +setup_stack: /* Save the original SP, LR and CPSR to stack */ + adr BUF1, stack_end + str sp, [BUF1, #-4]! + mov sp, BUF1 + mrs TMP1, cpsr + push {TMP1, lr} + + /* Disable IRQ and FIQ */ + orr TMP1, #0xc0 + msr cpsr_c, TMP1 + + /* Check if the instructions or data cache is enabled */ + mrc p15, 0, TMP1, c1, c0, 0 + movw TMP2, #((1 << 12) | (1 << 2)) + tst TMP1, TMP2 + bne cache_is_unsupported + + bl swap_all_buffers + +verify_checksum: + movw CHECKSUM, #0x6c39 + movt CHECKSUM, #0x5f0a + mov BUF1, #0 + ldr FULLSIZE, [BUF1, #16] +check_next_word: + ldr TMP1, [BUF1], #4 + subs FULLSIZE, FULLSIZE, #4 + add CHECKSUM, CHECKSUM, TMP1 + bne check_next_word + + mov BUF1, #0 + ldr TMP1, [BUF1, #12] + subs CHECKSUM, CHECKSUM, TMP1, lsl #1 + bne checksum_is_bad + + /* Change 'eGON.BT0' -> 'eGON.FEL' */ + mov BUF1, #0 + movw TMP1, (('F' << 8) + '.') + movt TMP1, (('L' << 8) + 'E') + str TMP1, [BUF1, #8] + + /* Call the SPL code */ + dsb + isb + blx BUF1 + + /* Return back to FEL */ + b return_to_fel + +cache_is_unsupported: + /* Bail out if cache is enabled and change 'eGON.BT0' -> 'eGON.???' */ + mov BUF1, #0 + movw TMP1, (('?' << 8) + '.') + movt TMP1, (('?' << 8) + '?') + str TMP1, [BUF1, #8] + b return_to_fel + +checksum_is_bad: + /* The checksum test failed, so change 'eGON.BT0' -> 'eGON.BAD' */ + mov BUF1, #0 + movw TMP1, (('B' << 8) + '.') + movt TMP1, (('D' << 8) + 'A') + str TMP1, [BUF1, #8] + +return_to_fel: + bl swap_all_buffers + pop {TMP1, lr} + msr cpsr_c, TMP1 /* Restore the original CPSR */ + ldr sp, [sp] + bx lr + +swaptbl_start: diff --git a/fel-to-spl-thunk.h b/fel-to-spl-thunk.h new file mode 100644 index 0000000000..08e90e73c7 --- /dev/null +++ b/fel-to-spl-thunk.h @@ -0,0 +1,69 @@ + 0xea000015, /* 0: b 5c */ + 0xe1a00000, /* 4: nop */ + 0xe1a00000, /* 8: nop */ + 0xe1a00000, /* c: nop */ + 0xe1a00000, /* 10: nop */ + 0xe1a00000, /* 14: nop */ + 0xe1a00000, /* 18: nop */ + 0xe1a00000, /* 1c: nop */ + 0xe1a00000, /* 20: nop */ + 0xe1a00000, /* 24: nop */ + 0xe28f40e4, /* 28: add r4, pc, #228 */ + 0xe4940004, /* 2c: ldr r0, [r4], #4 */ + 0xe4941004, /* 30: ldr r1, [r4], #4 */ + 0xe4946004, /* 34: ldr r6, [r4], #4 */ + 0xe3560000, /* 38: cmp r6, #0 */ + 0x012fff1e, /* 3c: bxeq lr */ + 0xe5902000, /* 40: ldr r2, [r0] */ + 0xe5913000, /* 44: ldr r3, [r1] */ + 0xe2566004, /* 48: subs r6, r6, #4 */ + 0xe4812004, /* 4c: str r2, [r1], #4 */ + 0xe4803004, /* 50: str r3, [r0], #4 */ + 0x1afffff9, /* 54: bne 40 */ + 0xeafffff3, /* 58: b 2c */ + 0xe24f0040, /* 5c: sub r0, pc, #64 */ + 0xe520d004, /* 60: str sp, [r0, #-4]! */ + 0xe1a0d000, /* 64: mov sp, r0 */ + 0xe10f2000, /* 68: mrs r2, CPSR */ + 0xe92d4004, /* 6c: push {r2, lr} */ + 0xe38220c0, /* 70: orr r2, r2, #192 */ + 0xe121f002, /* 74: msr CPSR_c, r2 */ + 0xee112f10, /* 78: mrc 15, 0, r2, cr1, cr0, {0} */ + 0xe3013004, /* 7c: movw r3, #4100 */ + 0xe1120003, /* 80: tst r2, r3 */ + 0x1a000014, /* 84: bne dc */ + 0xebffffe6, /* 88: bl 28 */ + 0xe3067c39, /* 8c: movw r7, #27705 */ + 0xe3457f0a, /* 90: movt r7, #24330 */ + 0xe3a00000, /* 94: mov r0, #0 */ + 0xe5905010, /* 98: ldr r5, [r0, #16] */ + 0xe4902004, /* 9c: ldr r2, [r0], #4 */ + 0xe2555004, /* a0: subs r5, r5, #4 */ + 0xe0877002, /* a4: add r7, r7, r2 */ + 0x1afffffb, /* a8: bne 9c */ + 0xe3a00000, /* ac: mov r0, #0 */ + 0xe590200c, /* b0: ldr r2, [r0, #12] */ + 0xe0577082, /* b4: subs r7, r7, r2, lsl #1 */ + 0x1a00000c, /* b8: bne f0 */ + 0xe3a00000, /* bc: mov r0, #0 */ + 0xe304262e, /* c0: movw r2, #17966 */ + 0xe3442c45, /* c4: movt r2, #19525 */ + 0xe5802008, /* c8: str r2, [r0, #8] */ + 0xf57ff04f, /* cc: dsb sy */ + 0xf57ff06f, /* d0: isb sy */ + 0xe12fff30, /* d4: blx r0 */ + 0xea000008, /* d8: b 100 */ + 0xe3a00000, /* dc: mov r0, #0 */ + 0xe3032f2e, /* e0: movw r2, #16174 */ + 0xe3432f3f, /* e4: movt r2, #16191 */ + 0xe5802008, /* e8: str r2, [r0, #8] */ + 0xea000003, /* ec: b 100 */ + 0xe3a00000, /* f0: mov r0, #0 */ + 0xe304222e, /* f4: movw r2, #16942 */ + 0xe3442441, /* f8: movt r2, #17473 */ + 0xe5802008, /* fc: str r2, [r0, #8] */ + 0xebffffc8, /* 100: bl 28 */ + 0xe8bd4004, /* 104: pop {r2, lr} */ + 0xe121f002, /* 108: msr CPSR_c, r2 */ + 0xe59dd000, /* 10c: ldr sp, [sp] */ + 0xe12fff1e, /* 110: bx lr */ diff --git a/fel.c b/fel.c index 542bf4b2d4..d18a1d8083 100644 --- a/fel.c +++ b/fel.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "endian_compat.h" @@ -294,6 +295,237 @@ void aw_fel_fill(libusb_device_handle *usb, uint32_t offset, size_t size, unsign aw_fel_write(usb, buf, offset, size); } +/* + * The 'sram_swap_buffers' structure is used to describe information about + * two buffers in SRAM, the content of which needs to be exchanged before + * calling the U-Boot SPL code and then exchanged again before returning + * control back to the FEL code from the BROM. + */ + +typedef struct { + uint32_t buf1; /* BROM buffer */ + uint32_t buf2; /* backup storage location */ + uint32_t size; /* buffer size */ +} sram_swap_buffers; + +/* + * Each SoC variant may have its own list of memory buffers to be exchanged + * and the information about the placement of the thunk code, which handles + * the transition of execution from the BROM FEL code to the U-Boot SPL and + * back. + * + * Note: the entries in the 'swap_buffers' tables need to be sorted by 'buf1' + * addresses. And the 'buf1' addresses are the BROM data buffers, while 'buf2' + * addresses are the intended backup locations. + */ +typedef struct { + uint32_t soc_id; /* ID of the SoC */ + uint32_t thunk_addr; /* Address of the thunk code */ + uint32_t thunk_size; /* Maximal size of the thunk code */ + sram_swap_buffers *swap_buffers; +} soc_sram_info; + +/* + * The FEL code from BROM in A10/A13/A20 sets up two stacks for itself. One + * at 0x2000 (and growing down) for the IRQ handler. And another one at 0x7000 + * (and also growing down) for the regular code. In order to use the whole + * 32 KiB in the A1/A2 sections of SRAM, we need to temporarily move these + * stacks elsewhere. And the addresses above 0x7000 are also a bit suspicious, + * so it might be safer to backup the 0x7000-0x8000 area too. On A10/A13/A20 + * we can use the SRAM section A3 (0x8000) for this purpose. + */ +sram_swap_buffers a10_a13_a20_sram_swap_buffers[] = { + { .buf1 = 0x01800, .buf2 = 0x8000, .size = 0x800 }, + { .buf1 = 0x05C00, .buf2 = 0x8800, .size = 0x8000 - 0x5C00 }, + { 0 } /* End of the table */ +}; + +/* + * A31 is very similar to A10/A13/A20, except that it has no SRAM at 0x8000. + * So we use the SRAM section at 0x44000 instead. This is the memory, which + * is normally shared with the OpenRISC core (should we do an extra check to + * ensure that this core is powered off and can't interfere?). + */ +sram_swap_buffers a31_sram_swap_buffers[] = { + { .buf1 = 0x01800, .buf2 = 0x44000, .size = 0x800 }, + { .buf1 = 0x05C00, .buf2 = 0x44800, .size = 0x8000 - 0x5C00 }, + { 0 } /* End of the table */ +}; + +soc_sram_info soc_sram_info_table[] = { + { + .soc_id = 0x1623, /* Allwinner A10 */ + .thunk_addr = 0xAE00, .thunk_size = 0x200, + .swap_buffers = a10_a13_a20_sram_swap_buffers, + }, + { + .soc_id = 0x1625, /* Allwinner A13 */ + .thunk_addr = 0xAE00, .thunk_size = 0x200, + .swap_buffers = a10_a13_a20_sram_swap_buffers, + }, + { + .soc_id = 0x1651, /* Allwinner A20 */ + .thunk_addr = 0xAE00, .thunk_size = 0x200, + .swap_buffers = a10_a13_a20_sram_swap_buffers, + }, + { + .soc_id = 0x1633, /* Allwinner A31 */ + .thunk_addr = 0x46E00, .thunk_size = 0x200, + .swap_buffers = a31_sram_swap_buffers, + }, + { 0 } /* End of the table */ +}; + +/* + * This generic record assumes BROM with similar properties to A10/A13/A20/A31, + * but no extra SRAM sections beyond 0x8000. It also assumes that the IRQ + * handler stack usage never exceeds 0x400 bytes. + * + * The users may or may not hope that the 0x7000-0x8000 area is also unused + * by the BROM and re-purpose it for the SPL stack. + * + * The size limit for the ".text + .data" sections is ~21 KiB. + */ +sram_swap_buffers generic_sram_swap_buffers[] = { + { .buf1 = 0x01C00, .buf2 = 0x5800, .size = 0x400 }, + { 0 } /* End of the table */ +}; + +soc_sram_info generic_sram_info = { + .thunk_addr = 0x5680, .thunk_size = 0x180, + .swap_buffers = generic_sram_swap_buffers, +}; + +soc_sram_info *aw_fel_get_sram_info(libusb_device_handle *usb) +{ + int i; + struct aw_fel_version buf; + + aw_fel_get_version(usb, &buf); + + for (i = 0; soc_sram_info_table[i].swap_buffers; i++) + if (soc_sram_info_table[i].soc_id == buf.soc_id) + return &soc_sram_info_table[i]; + + printf("Warning: no 'soc_sram_info' data for your SoC (id=%04X)\n", + buf.soc_id); + return &generic_sram_info; +} + +static uint32_t fel_to_spl_thunk[] = { + #include "fel-to-spl-thunk.h" +}; + +void aw_fel_write_and_execute_spl(libusb_device_handle *usb, + uint8_t *buf, size_t len) +{ + soc_sram_info *sram_info = aw_fel_get_sram_info(usb); + sram_swap_buffers *swap_buffers; + char header_signature[9] = { 0 }; + size_t i, thunk_size; + uint32_t *thunk_buf; + uint32_t spl_checksum, spl_len, spl_len_limit = 0x8000; + uint32_t *buf32 = (uint32_t *)buf; + uint32_t written = 0; + + if (!sram_info || !sram_info->swap_buffers) { + fprintf(stderr, "SPL: Unsupported SoC type\n"); + exit(1); + } + + if (len < 32 || memcmp(buf + 4, "eGON.BT0", 8) != 0) { + fprintf(stderr, "SPL: eGON header is not found\n"); + exit(1); + } + + spl_checksum = 2 * le32toh(buf32[3]) - 0x5F0A6C39; + spl_len = le32toh(buf32[4]); + + if (spl_len > len || (spl_len % 4) != 0) { + fprintf(stderr, "SPL: bad length in the eGON header\n"); + exit(1); + } + + len = spl_len; + for (i = 0; i < len / 4; i++) + spl_checksum -= le32toh(buf32[i]); + + if (spl_checksum != 0) { + fprintf(stderr, "SPL: checksum check failed\n"); + exit(1); + } + + swap_buffers = sram_info->swap_buffers; + for (i = 0; swap_buffers[i].size; i++) { + if (swap_buffers[i].buf2 < spl_len_limit) + spl_len_limit = swap_buffers[i].buf2; + if (len > 0 && written < swap_buffers[i].buf1) { + uint32_t tmp = swap_buffers[i].buf1 - written; + if (tmp > len) + tmp = len; + aw_fel_write(usb, buf, written, tmp); + written += tmp; + buf += tmp; + len -= tmp; + } + if (len > 0 && written == swap_buffers[i].buf1) { + uint32_t tmp = swap_buffers[i].size; + if (tmp > len) + tmp = len; + aw_fel_write(usb, buf, swap_buffers[i].buf2, tmp); + written += tmp; + buf += tmp; + len -= tmp; + } + } + + /* Clarify the SPL size limitations, and bail out if they are not met */ + if (sram_info->thunk_addr < spl_len_limit) + spl_len_limit = sram_info->thunk_addr; + + if (spl_len > spl_len_limit) { + fprintf(stderr, "SPL: too large (need %d, have %d)\n", + (int)spl_len, (int)spl_len_limit); + exit(1); + } + + /* Write the remaining part of the SPL */ + if (len > 0) + aw_fel_write(usb, buf, written, len); + + thunk_size = sizeof(fel_to_spl_thunk) + (i + 1) * sizeof(*swap_buffers); + + if (thunk_size > sram_info->thunk_size) { + fprintf(stderr, "SPL: bad thunk size (need %d, have %d)\n", + (int)sizeof(fel_to_spl_thunk), sram_info->thunk_size); + exit(1); + } + + thunk_buf = malloc(thunk_size); + memcpy(thunk_buf, fel_to_spl_thunk, sizeof(fel_to_spl_thunk)); + memcpy(thunk_buf + sizeof(fel_to_spl_thunk) / sizeof(uint32_t), + swap_buffers, (i + 1) * sizeof(*swap_buffers)); + + for (i = 0; i < thunk_size / sizeof(uint32_t); i++) + thunk_buf[i] = htole32(thunk_buf[i]); + + aw_fel_write(usb, thunk_buf, sram_info->thunk_addr, thunk_size); + aw_fel_execute(usb, sram_info->thunk_addr); + + free(thunk_buf); + + /* TODO: Try to find and fix the bug, which needs this workaround */ + usleep(250000); + + /* Read back the result and check if everything was fine */ + aw_fel_read(usb, 4, header_signature, 8); + if (strcmp(header_signature, "eGON.FEL") != 0) { + fprintf(stderr, "SPL: failure code '%s'\n", + header_signature); + exit(1); + } +} + static int aw_fel_get_endpoint(libusb_device_handle *usb) { struct libusb_device *dev = libusb_get_device(usb); @@ -352,6 +584,7 @@ int main(int argc, char **argv) " ver[sion] Show BROM version\n" " clear address length Clear memory\n" " fill address length value Fill memory\n" + " spl file Load and execute U-Boot SPL\n" , argv[0] ); } @@ -417,6 +650,11 @@ int main(int argc, char **argv) } else if (strcmp(argv[1], "fill") == 0 && argc > 3) { aw_fel_fill(handle, strtoul(argv[2], NULL, 0), strtoul(argv[3], NULL, 0), (unsigned char)strtoul(argv[4], NULL, 0)); skip=4; + } else if (strcmp(argv[1], "spl") == 0 && argc > 2) { + size_t size; + uint8_t *buf = load_file(argv[2], &size); + aw_fel_write_and_execute_spl(handle, buf, size); + skip=2; } else { fprintf(stderr,"Invalid command %s\n", argv[1]); exit(1);