@@ -12,6 +12,19 @@
* Also need to add code to deal with cards endians that are different than
* the native cpu endians. I also need to deal with MSB position in the word.
*
* Modified by Harm Hanemaaijer (fgenfb@yahoo.com 2013):
*
* The previous implementation was doing reads from the uncached framebuffer,
* even for simple fills, which for performance reasons, on most platforms,
* is ill-advised unless strictly necessary to conform to the raster operation.
* In these circumstances, doing 64-bit access on 64-bit systems does not serve
* much purpose except for probing for corner-case bugs and race conditions in
* the hardware's framebuffer bus implementation. For 16bpp, it is better to
* take advantage of write-combining features of the framebuffer and write a
* half-word if required for the left and right edges.
*
* Additionally, on most platforms, integer divides are relatively slow so are
* best avoided, especially in inner loops.
*/
#include <linux/module.h>
#include <linux/string.h>
@@ -275,6 +288,130 @@ bitfill_unaligned_rev(struct fb_info *p, unsigned long __iomem *dst,
}
}

static void
fast_fill16(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
unsigned long pat, u32 width_in_bits, u32 height)
{
for (; height--; dst_idx += p->fix.line_length * 8) {
u32 n;
unsigned long __iomem *dstp;
u32 last_bits;
dst += dst_idx >> (ffs(BITS_PER_LONG) - 1);
dst_idx &= (BITS_PER_LONG - 1);
n = width_in_bits;
dstp = dst;
#if BITS_PER_LONG == 32
if (dst_idx) {
fb_writew(pat, (u16 *)dstp + 1);
dstp++;
n -= 16;
if (n == 0)
continue;
}
else if (n == 16) {
fb_writew(pat, (u16 *)dstp);
continue;
}
#else /* BITS_PER_LONG == 64 */
if (dst_idx) {
if (dst_idx == 16) {
fb_writew(pat, (u16 *)dstp + 1);
if (n == 32) {
fb_writew(pat, (u16 *)dstp + 2);
continue;
}
fb_writel(pat, (u32 *)dstp + 1);
}
else if (dst_idx == 32) {
if (n == 16) {
fb_writew(pat, (u16 *)dstp + 2);
continue;
}
fb_writel(pat, (u32 *)dstp + 1);
}
else if (dst_idx == 48) {
fb_writew(pat, (u16 *)dstp + 3);
dstp++;
n -= 64 - dist_idx;
if (n == 0)
continue;
}
#endif
n /= BITS_PER_LONG;
while (n >= 8) {
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
n -= 8;
}
while (n--)
FB_WRITEL(pat, dstp++);
last_bits = (dst_idx + width_in_bits) % BITS_PER_LONG;
#if BITS_PER_LONG == 32
if (last_bits)
fb_writew(pat, dstp);
#else /* BITS_PER_LONG == 64 */
if (last_bits & 32) {
fb_writel(pat, dstp);
if (last_bits & 16)
fb_writew(pat, (u16 *)dstp + 2);
}
else if (last_bits & 16)
fb_writew(pat, dstp);
#endif
}
}

static void
fast_fill32(struct fb_info *p, unsigned long __iomem *dst, int dst_idx,
unsigned long pat, u32 width_in_bits, u32 height)
{
for (; height--; dst_idx += p->fix.line_length * 8) {
u32 n;
unsigned long __iomem *dstp;
dst += dst_idx >> (ffs(BITS_PER_LONG) - 1);
dst_idx &= (BITS_PER_LONG - 1);
n = width_in_bits;
dstp = dst;
#if BITS_PER_LONG == 64
if (dst_idx) {
fb_writel(pat, (u32 *)dstp + 1);
dstp++;
n -= 32;
if (n == 0)
continue;
}
else if (n == 32) {
fb_writel(pat, dstp);
continue;
}
#endif
n /= BITS_PER_LONG;
while (n >= 8) {
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
FB_WRITEL(pat, dstp++);
n -= 8;
}
while (n--)
FB_WRITEL(pat, dstp++);
#if BITS_PER_LONG == 64
if ((dst_idx + width_in_bits) % 64)
fb_writel(pat, dstp);
#endif
}
}

void cfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
{
unsigned long pat, pat2, fg;
@@ -299,9 +436,25 @@ void cfb_fillrect(struct fb_info *p, const struct fb_fillrect *rect)
dst_idx = ((unsigned long)p->screen_base & (bytes - 1))*8;
dst_idx += rect->dy*p->fix.line_length*8+rect->dx*bpp;
/* FIXME For now we support 1-32 bpp only */
left = bits % bpp;
if (p->fbops->fb_sync)
p->fbops->fb_sync(p);
/*
* Note: fb_be_math(p) could be used to check fb endianness, but
* __LITTLE_ENDIAN is used later in the function, so also use it here.
*/
#if !defined(CONFIG_FB_CFB_REV_PIXELS_IN_BYTE) && defined(__LITTLE_ENDIAN)
if (rect->rop == ROP_COPY) {
if (bpp == 16) {
fast_fill16(p, dst, dst_idx, pat, width * 16, height);
return;
}
else if (bpp == 32) {
fast_fill32(p, dst, dst_idx, pat, width * 32, height);
return;
}
}
#endif
left = bits % bpp;
if (!left) {
u32 bswapmask = fb_compute_bswapmask(p);
void (*fill_op32)(struct fb_info *p,
@@ -28,6 +28,11 @@
*
* Also need to add code to deal with cards endians that are different than
* the native cpu endians. I also need to deal with MSB position in the word.
* Modified by Harm Hanemaaijer (fgenfb@yahoo.com) 2013:
* - Provide optimized versions of fast_imageblit for 16 and 32bpp that are
* significantly faster than the previous implementation.
* - Simplify the fast/slow_imageblit selection code, avoiding integer
* divides.
*/
#include <linux/module.h>
#include <linux/string.h>
@@ -262,6 +267,133 @@ static inline void fast_imageblit(const struct fb_image *image, struct fb_info *
}
}

/*
* Optimized fast_imageblit for bpp == 16. ppw = 2, bit_mask = 3 folded
* into the code, main loop unrolled.
*/

static inline void fast_imageblit16(const struct fb_image *image,
struct fb_info *p, u8 __iomem * dst1,
u32 fgcolor, u32 bgcolor)
{
u32 fgx = fgcolor, bgx = bgcolor;
u32 spitch = (image->width + 7) / 8;
u32 end_mask, eorx;
const char *s = image->data, *src;
u32 __iomem *dst;
const u32 *tab = NULL;
int i, j, k;

tab = fb_be_math(p) ? cfb_tab16_be : cfb_tab16_le;

fgx <<= 16;
bgx <<= 16;
fgx |= fgcolor;
bgx |= bgcolor;

eorx = fgx ^ bgx;
k = image->width / 2;

for (i = image->height; i--;) {
dst = (u32 __iomem *) dst1;
src = s;

j = k;
while (j >= 4) {
u8 bits = *src;
end_mask = tab[(bits >> 6) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 4) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 2) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[bits & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
src++;
j -= 4;
}
if (j != 0) {
u8 bits = *src;
end_mask = tab[(bits >> 6) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
if (j >= 2) {
end_mask = tab[(bits >> 4) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
if (j == 3) {
end_mask = tab[(bits >> 2) & 3];
FB_WRITEL((end_mask & eorx) ^ bgx, dst);
}
}
}
dst1 += p->fix.line_length;
s += spitch;
}
}

/*
* Optimized fast_imageblit for bpp == 32. ppw = 1, bit_mask = 1 folded
* into the code, main loop unrolled.
*/

static inline void fast_imageblit32(const struct fb_image *image,
struct fb_info *p, u8 __iomem * dst1,
u32 fgcolor, u32 bgcolor)
{
u32 fgx = fgcolor, bgx = bgcolor;
u32 spitch = (image->width + 7) / 8;
u32 end_mask, eorx;
const char *s = image->data, *src;
u32 __iomem *dst;
const u32 *tab = NULL;
int i, j, k;

tab = cfb_tab32;

eorx = fgx ^ bgx;
k = image->width;

for (i = image->height; i--;) {
dst = (u32 __iomem *) dst1;
src = s;

j = k;
while (j >= 8) {
u8 bits = *src;
end_mask = tab[(bits >> 7) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 6) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 5) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 4) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 3) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 2) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[(bits >> 1) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
end_mask = tab[bits & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
src++;
j -= 8;
}
if (j != 0) {
u32 bits = (u32) * src;
while (j > 1) {
end_mask = tab[(bits >> 7) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst++);
bits <<= 1;
j--;
}
end_mask = tab[(bits >> 7) & 1];
FB_WRITEL((end_mask & eorx) ^ bgx, dst);
}
dst1 += p->fix.line_length;
s += spitch;
}
}

void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
{
u32 fgcolor, bgcolor, start_index, bitstart, pitch_index = 0;
@@ -294,11 +426,21 @@ void cfb_imageblit(struct fb_info *p, const struct fb_image *image)
bgcolor = image->bg_color;
}

if (32 % bpp == 0 && !start_index && !pitch_index &&
((width & (32/bpp-1)) == 0) &&
bpp >= 8 && bpp <= 32)
fast_imageblit(image, p, dst1, fgcolor, bgcolor);
else
if (!start_index && !pitch_index) {
if (bpp == 32)
fast_imageblit32(image, p, dst1, fgcolor,
bgcolor);
else if (bpp == 16 && (width & 1) == 0)
fast_imageblit16(image, p, dst1, fgcolor,
bgcolor);
else if (bpp == 8 && (width & 3) == 0)
fast_imageblit(image, p, dst1, fgcolor,
bgcolor);
else
slow_imageblit(image, p, dst1, fgcolor,
bgcolor,
start_index, pitch_index);
} else
slow_imageblit(image, p, dst1, fgcolor, bgcolor,
start_index, pitch_index);
} else

This file was deleted.

@@ -1100,6 +1100,7 @@ struct sched_class {

#ifdef CONFIG_SMP
int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
void (*migrate_task_rq)(struct task_struct *p, int next_cpu);

void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
void (*post_schedule) (struct rq *this_rq);
@@ -1233,6 +1234,7 @@ struct sched_entity {
struct cfs_rq *my_q;
#endif
#ifdef CONFIG_SMP
/* Per-entity load-tracking */
struct sched_avg avg;
#endif
};

This file was deleted.

This file was deleted.

@@ -1190,6 +1190,8 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);

if (task_cpu(p) != new_cpu) {
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
}

This file was deleted.

@@ -218,18 +218,18 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight);
#ifdef CONFIG_FAIR_GROUP_SCHED
#ifdef CONFIG_SMP
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_avg",
SPLIT_NS(cfs_rq->load_avg));
SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "load_period",
SPLIT_NS(cfs_rq->load_period));
SEQ_printf(m, " .%-30s: %ld\n", "load_contrib",
cfs_rq->load_contribution);
SEQ_printf(m, " .%-30s: %d\n", "load_tg",
atomic_read(&cfs_rq->tg->load_weight));
SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg",
cfs_rq->runnable_load_avg);
SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",
cfs_rq->blocked_load_avg);
SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg",
atomic64_read(&cfs_rq->tg->load_avg));
SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib",
cfs_rq->tg_load_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib",
cfs_rq->tg_runnable_contrib);
SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
atomic_read(&cfs_rq->tg->runnable_avg));
#endif
#ifdef CONFIG_CFS_BANDWIDTH
SEQ_printf(m, " .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",

This file was deleted.

Large diffs are not rendered by default.

This file was deleted.

This file was deleted.

This file was deleted.

@@ -114,6 +114,8 @@ struct task_group {
unsigned long shares;

atomic_t load_weight;
atomic64_t load_avg;
atomic_t runnable_avg;
#endif

#ifdef CONFIG_RT_GROUP_SCHED
@@ -232,9 +234,23 @@ struct cfs_rq {
* the FAIR_GROUP_SCHED case).
*/
u64 runnable_load_avg, blocked_load_avg;
atomic64_t decay_counter;
atomic64_t decay_counter, removed_load;
u64 last_decay;
#endif

#ifdef CONFIG_FAIR_GROUP_SCHED
u32 tg_runnable_contrib;
u64 tg_load_contrib;
#endif /* CONFIG_FAIR_GROUP_SCHED */

/*
* h_load = weight * f(tg)
*
* Where f(tg) is the recursive weight fraction assigned to
* this group.
*/
unsigned long h_load;
#endif /* CONFIG_SMP */

#ifdef CONFIG_FAIR_GROUP_SCHED
struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */

@@ -250,34 +266,13 @@ struct cfs_rq {
struct list_head leaf_cfs_rq_list;
struct task_group *tg; /* group that "owns" this runqueue */

#ifdef CONFIG_SMP
/*
* h_load = weight * f(tg)
*
* Where f(tg) is the recursive weight fraction assigned to
* this group.
*/
unsigned long h_load;

/*
* Maintaining per-cpu shares distribution for group scheduling
*
* load_stamp is the last time we updated the load average
* load_last is the last time we updated the load average and saw load
* load_unacc_exec_time is currently unaccounted execution time
*/
u64 load_avg;
u64 load_period;
u64 load_stamp, load_last, load_unacc_exec_time;

unsigned long load_contribution;
#endif /* CONFIG_SMP */
#ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled;
u64 runtime_expires;
s64 runtime_remaining;

u64 throttled_timestamp;
u64 throttled_clock, throttled_clock_task;
u64 throttled_clock_task_time;
int throttled, throttle_count;
struct list_head throttled_list;
#endif /* CONFIG_CFS_BANDWIDTH */

This file was deleted.

This file was deleted.

This file was deleted.