/
matrix.p
348 lines (298 loc) · 9.79 KB
/
matrix.p
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
// \file
/* PRU based 16x32 LED Matrix driver.
*
* Drives up to sixteen 16x32 matrices using the PRU hardware.
*
* Uses sixteen data pins in GPIO0 (one for each data line on each
* matrix) and six control pins in GPIO1 shared between all the matrices.
*
* The ARM writes a 24-bit color 512x16 into the shared RAM, sets the
* frame buffer pointer in the command structure and the PRU clocks it out
* to the sixteen matrices. Since there is no PWM in the matrix hardware,
* the PRU will cycle through various brightness levels. After each PWM
* cycle it rechecks the frame buffer pointer, allowing a glitch-free
* transition to a new frame.
*
* To pause the redraw loop, write a NULL to the buffer pointer.
* To shut down the PRU, write -1 to the buffer pointer.
*
* HOW WE THINK THIS WORKS IS WRONG: it is not serial on all three rows,
* but instead each R, G and B has its own input, and the two rows that
* are simultaneously being scanned have their own. So there are only 32
* clocks for the input.
*
* This means that we need 6 IOs/panel for data, which is far more than
* we thought.
* 16 GPIO0 IOs == 2 with 4 IO left over.
*/
// Pins available in GPIO0
#define gpio0_row1_r 2
#define gpio0_row1_g 3
#define gpio0_row1_b 7
#define gpio0_row2_r 5
#define gpio0_row2_b 4
#define gpio0_row2_g 12
#define gpio0_row3_r 13
#define gpio0_row3_g 14
#define gpio0_row3_b 15
#define gpio0_row4_r 20
#define gpio0_row4_g 22
#define gpio0_row4_b 23
#define gpio0_bit12 26
#define gpio0_bit13 27
#define gpio0_bit14 30
#define gpio0_bit15 31
// could move clock and sel into gpio0
// Pins available in GPIO1
#define gpio1_sel0 12
#define gpio1_sel1 13
#define gpio1_sel2 14
#define gpio1_latch 28
#define gpio1_oe 29
#define gpio1_clock 19
/** Generate a bitmask of which pins in GPIO0-3 are used.
*
* \todo wtf "parameter too long": only 128 chars allowed?
*/
#define GPIO0_LED_MASK (0\
|(1<<gpio0_row1_r)\
|(1<<gpio0_row1_g)\
|(1<<gpio0_row1_b)\
|(1<<gpio0_row2_r)\
|(1<<gpio0_row2_b)\
|(1<<gpio0_row2_g)\
|(1<<gpio0_row3_r)\
|(1<<gpio0_row3_g)\
|(1<<gpio0_row3_b)\
|(1<<gpio0_row4_r)\
|(1<<gpio0_row4_g)\
|(1<<gpio0_row4_b)\
)
#define GPIO1_SEL_MASK (0\
|(1<<gpio1_sel0)\
|(1<<gpio1_sel1)\
|(1<<gpio1_sel2)\
)
.origin 0
.entrypoint START
#include "ws281x.hp"
/** Mappings of the GPIO devices */
#define GPIO0 0x44E07000
#define GPIO1 0x4804c000
#define GPIO2 0x481AC000
#define GPIO3 0x481AE000
/** Offsets for the clear and set registers in the devices.
* These are adjacent; can one sbbo instruction be used?
*/
#define GPIO_CLRDATAOUT 0x190
#define GPIO_SETDATAOUT 0x194
/** Register map */
#define data_addr r0
#define gpio0_set r20
#define gpio0_clr r21
#define gpio1_set r2
#define gpio1_clr r3
#define row r4
#define offset r5
#define scan r6
#define pix_ptr r7
#define pixel r8
#define out0_set r9
#define out1_set r10
#define p2 r12
#define bright r13
#define gpio0_led_mask r14
#define gpio1_sel_mask r15
#define pix r16
#define clock_pin r17
#define latch_pin r18
/** Sleep a given number of nanoseconds with 10 ns resolution.
*
* This busy waits for a given number of cycles. Not for use
* with things that must happen on a tight schedule.
*/
.macro SLEEPNS
.mparam ns,inst,lab
MOV p2, (ns/10)-1-inst
lab:
SUB p2, p2, 1
QBNE lab, p2, 0
.endm
#define CLOCK(pin) \
SLEEPNS 50, 1, pin##_on ; \
SBBO clock_pin, gpio1_set, 0, 4; \
SLEEPNS 50, 1, pin##_off ; \
SBBO clock_pin, gpio1_clr, 0, 4; \
#define LATCH \
SLEEPNS 50, 1, latch_on ; \
SBBO latch_pin, gpio1_set, 0, 4; \
SLEEPNS 50, 1, latch_off ; \
SBBO latch_pin, gpio1_clr, 0, 4; \
START:
// Enable OCP master port
// clear the STANDBY_INIT bit in the SYSCFG register,
// otherwise the PRU will not be able to write outside the
// PRU memory space and to the BeagleBon's pins.
LBCO r0, C4, 4, 4
CLR r0, r0, 4
SBCO r0, C4, 4, 4
// Configure the programmable pointer register for PRU0 by setting
// c28_pointer[15:0] field to 0x0120. This will make C28 point to
// 0x00012000 (PRU shared RAM).
MOV r0, 0x00000120
MOV r1, CTPPR_0
ST32 r0, r1
// Configure the programmable pointer register for PRU0 by setting
// c31_pointer[15:0] field to 0x0010. This will make C31 point to
// 0x80001000 (DDR memory).
MOV r0, 0x00100000
MOV r1, CTPPR_1
ST32 r0, r1
// Write a 0x1 into the response field so that they know we have started
MOV r2, #0x1
SBCO r2, CONST_PRUDRAM, 12, 4
// Wait for the start condition from the main program to indicate
// that we have a rendered frame ready to clock out. This also
// handles the exit case if an invalid value is written to the start
// start position.
#define DISPLAY_WIDTH 32
#define DISPLAYS 16 /* Maximum! */
#define ROW_WIDTH (DISPLAYS * DISPLAY_WIDTH)
#define MAX_BRIGHT 16
/*
for bright in 0..MAX_BRIGHT:
for row in 0..7:
set_address row
offset = row * ROW_WIDTH
for pixel in 0..32:
for scan in 0..1:
r_clr = r_set = 0
g_clr = g_set = 0
b_clr = b_set = 0
for display in 0..DISPLAYS:
read rgb from 4*(pixel + display*DISPLAY_WIDTH + offset)
pin = display_pin(display)
if r < bright:
r_clr |= pin
if b < bright:
b_clr |= pin
if g < bright:
g_clr |= pin
# All bitmasks have been built, clock them out
clr_bits r_clr
set_bits r_set
clock
clr_bits g_clr
set_bits g_set
clock
clr_bits b_clr
set_bits b_set
clock
# read the paired row for the next pass
# on the second scan
offset += 8 * DISPLAY_WIDTH
# latch after both scan rows have been output
latch
# Check for new frame buffer now; this is a safe time to change it
*/
MOV bright, #0
MOV gpio0_set, GPIO0 | GPIO_SETDATAOUT
MOV gpio0_clr, GPIO0 | GPIO_CLRDATAOUT
MOV gpio0_led_mask, GPIO0_LED_MASK
MOV gpio1_set, GPIO1 | GPIO_SETDATAOUT
MOV gpio1_clr, GPIO1 | GPIO_CLRDATAOUT
MOV gpio1_sel_mask, GPIO1_SEL_MASK
MOV clock_pin, 0
MOV latch_pin, 0
SET clock_pin, gpio1_clock
SET latch_pin, gpio1_latch
PWM_LOOP:
//CLOCK
//QBA PWM_LOOP
// Load the pointer to the buffer from PRU DRAM into r0 and the
// length (in bytes-bit words) into r1.
// start command into r2
LBCO data_addr, CONST_PRUDRAM, 0, 4
// Wait for a non-zero command
QBEQ PWM_LOOP, data_addr, #0
// Command of 0xFF is the signal to exit
QBEQ EXIT, data_addr, #0xFF
MOV offset, 0
MOV row, 0
ROW_LOOP:
// set address; pins in gpio1
MOV out1_set, 0
QBBC sel0, row, 0
SET out1_set, gpio1_sel0
sel0:
QBBC sel1, row, 1
SET out1_set, gpio1_sel1
sel1:
QBBC sel2, row, 2
SET out1_set, gpio1_sel2
sel2:
// write bits to output
SBBO out1_set, gpio1_set, 0, 4
XOR out1_set, out1_set, gpio1_sel_mask
SBBO out1_set, gpio1_clr, 0, 4
MOV scan, 0
SCAN_LOOP:
ADD pix_ptr, data_addr, offset
ADD pix_ptr, pix_ptr, pixel
MOV pixel, 0
PIXEL_LOOP:
MOV out0_set, 0
// This should be unrolled for every display
// read a pixel worth of data
LBBO pix, pix_ptr, 0*DISPLAY_WIDTH, 4
QBGE disp0_r, pix.b0, bright
SET out0_set, gpio0_row1_r
disp0_r:
QBGE disp0_g, pix.b1, bright
SET out0_set, gpio0_row1_g
disp0_g:
QBGE disp0_b, pix.b2, bright
SET out0_set, gpio0_row1_b
disp0_b:
// All bits are configured;
// the non-set ones will be cleared
SBBO out0_set, gpio0_set, 0, 4
XOR out0_set, out0_set, gpio0_led_mask
SBBO out0_set, gpio0_clr, 0, 4
CLOCK(PIX)
ADD pix_ptr, pix_ptr, 4
ADD pixel, pixel, 1
QBNE PIXEL_LOOP, pixel, DISPLAY_WIDTH
#if 0
// WRONG WRONG WRONG -- the second half is scanned simultaneousl
// The panel scans rows 0 and 8 in the
// same pass. Adjust the pixel pointer
// to the 8th row; pix_ptr is now pointing to
// the end of the row that was scanned
MOV p2, (8-1) * ROW_WIDTH
ADD pix_ptr, pix_ptr, p2
ADD scan, scan, 1
QBNE SCAN_LOOP, scan, 2
#endif
// We have clocked out all of the pixels for
// this row and the one eigth rows later.
// Latch the data
LATCH
ADD row, row, 1
MOV p2, ROW_WIDTH * 4
ADD offset, offset, p2
QBNE ROW_LOOP, row, 8
// We have clocked out all of the panels.
// Celebrate and go back to the PWM loop
// Limit brightness to 0..MAX_BRIGHT
ADD bright, bright, 1
AND bright, bright, (MAX_BRIGHT-1)
QBA PWM_LOOP
EXIT:
#ifdef AM33XX
// Send notification to Host for program completion
MOV R31.b0, PRU0_ARM_INTERRUPT+16
#else
MOV R31.b0, PRU0_ARM_INTERRUPT
#endif
HALT