Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Documentation/vm/00-INDEX
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ hwpoison.txt
- explains what hwpoison is
ksm.txt
- how to use the Kernel Samepage Merging feature.
uksm.txt
- Introduction to Ultra KSM
locking
- info on how locking and synchronization is done in the Linux vm code.
map_hugetlb.c
Expand Down
57 changes: 57 additions & 0 deletions Documentation/vm/uksm.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
The Ultra Kernel Samepage Merging feature
----------------------------------------------
/*
* Ultra KSM. Copyright (C) 2011-2012 Nai Xia
*
* This is an improvement upon KSM. Some basic data structures and routines
* are borrowed from ksm.c .
*
* Its new features:
* 1. Full system scan:
* It automatically scans all user processes' anonymous VMAs. Kernel-user
* interaction to submit a memory area to KSM is no longer needed.
*
* 2. Rich area detection:
* It automatically detects rich areas containing abundant duplicated
* pages based. Rich areas are given a full scan speed. Poor areas are
* sampled at a reasonable speed with very low CPU consumption.
*
* 3. Ultra Per-page scan speed improvement:
* A new hash algorithm is proposed. As a result, on a machine with
* Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
* can scan memory areas that does not contain duplicated pages at speed of
* 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
* 477MB/sec ~ 923MB/sec.
*
* 4. Thrashing area avoidance:
* Thrashing area(an VMA that has frequent Ksm page break-out) can be
* filtered out. My benchmark shows it's more efficient than KSM's per-page
* hash value based volatile page detection.
*
*
* 5. Misc changes upon KSM:
* * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
* comparison. It's much faster than default C version on x86.
* * rmap_item now has an struct *page member to loosely cache a
* address-->page mapping, which reduces too much time-costly
* follow_page().
* * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
* * try_to_merge_two_pages() now can revert a pte if it fails. No break_
* ksm is needed for this case.
*
* 6. Full Zero Page consideration(contributed by Figo Zhang)
* Now uksmd consider full zero pages as special pages and merge them to an
* special unswappable uksm zero page.
*/

ChangeLog:

2012-05-05 The creation of this Doc
2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
2012-05-28 UKSM 0.1.1.2 bug fix release
2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
2012-07-2 UKSM 0.1.2-beta2
2012-07-10 UKSM 0.1.2-beta3
2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
2012-10-13 UKSM 0.1.2.1 Bug fixes.
2012-12-31 UKSM 0.1.2.2 Minor bug fixes
31 changes: 23 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -243,10 +243,15 @@ CONFIG_SHELL := $(shell if [ -x "$$BASH" ]; then echo $$BASH; \
else if [ -x /bin/bash ]; then echo /bin/bash; \
else echo sh; fi ; fi)

HOSTCC = gcc
HOSTCXX = g++
HOSTCFLAGS = -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer
HOSTCXXFLAGS = -O2
HOSTCC = $(CCACHE) gcc
HOSTCXX = $(CCACHE) g++
ifdef CCONFIG_CC_OPTIMIZE_O3
HOSTCFLAGS = -Wall -W -Wmissing-prototypes -Wno-sign-compare -Wstrict-prototypes -Wno-unused-parameter -Wno-missing-field-initializers -O3 -fno-delete-null-pointer-checks
HOSTCXXFLAGS = -O3 -Wall -W -fno-delete-null-pointer-checks
else
HOSTCFLAGS = -Wall -W -Wmissing-prototypes -Wno-sign-compare -Wstrict-prototypes -Wno-unused-parameter -Wno-missing-field-initializers -O2 -fno-delete-null-pointer-checks
HOSTCXXFLAGS = -O2 -Wall -W -fno-delete-null-pointer-checks
endif

# Decide whether to build built-in, modular, or both.
# Normally, just do built-in.
Expand Down Expand Up @@ -373,7 +378,13 @@ KBUILD_CFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
-funswitch-loops -fpredictive-commoning -fgcse-after-reload \
-fno-delete-null-pointer-checks
KBUILD_AFLAGS_KERNEL :=
KBUILD_CFLAGS_KERNEL :=

ifdef CCONFIG_CC_OPTIMIZE_O3
KBUILD_CFLAGS_KERNEL := -O3 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize
else
KBUILD_CFLAGS_KERNEL := -O2 -mtune=cortex-a9 -march=armv7-a -mfpu=neon -ftree-vectorize
endif

KBUILD_AFLAGS := -D__ASSEMBLY__
KBUILD_AFLAGS_MODULE := -DMODULE
KBUILD_CFLAGS_MODULE := -DMODULE
Expand Down Expand Up @@ -562,9 +573,13 @@ endif # $(dot-config)
all: vmlinux

ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
KBUILD_CFLAGS += -Os
else
KBUILD_CFLAGS += -O2
KBUILD_CFLAGS += -Os
endif
ifdef CONFIG_CC_OPTIMIZE_DEFAULT
KBUILD_CFLAGS += -O2
endif
ifdef CONFIG_CC_OPTIMIZE_O3
KBUILD_CFLAGS += -O3
endif

include $(srctree)/arch/$(SRCARCH)/Makefile
Expand Down
27 changes: 27 additions & 0 deletions arch/arm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ config ARM
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_XZ
select HAVE_IRQ_WORK
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
Expand Down Expand Up @@ -1380,6 +1381,31 @@ config SMP_ON_UP

If you don't know what to do here, say Y.

config ARM_CPU_TOPOLOGY
bool "Support cpu topology definition"
depends on SMP && CPU_V7
default y
help
Support ARM cpu topology definition. The MPIDR register defines
affinity between processors which is then used to describe the cpu
topology of an ARM System.

config SCHED_MC
bool "Multi-core scheduler support"
depends on ARM_CPU_TOPOLOGY
help
Multi-core scheduler support improves the CPU scheduler's decision
making when dealing with multi-core CPU chips at a cost of slightly
increased overhead in some places. If unsure say N here.

config SCHED_SMT
bool "SMT scheduler support"
depends on ARM_CPU_TOPOLOGY
help
Improves the CPU scheduler's decision making when dealing with
MultiThreading at a cost of slightly increased overhead in some
places. If unsure say N here.

config HAVE_ARM_SCU
bool
depends on SMP
Expand Down Expand Up @@ -2086,3 +2112,4 @@ source "security/Kconfig"
source "crypto/Kconfig"

source "lib/Kconfig"

34 changes: 24 additions & 10 deletions arch/arm/boot/compressed/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
# create a compressed vmlinuz image from the original vmlinux
#

plus_sec := $(call as-instr,.arch_extension sec,+sec)

OBJS =
plus_sec := $(call as-instr,.arch_extension sec,+sec)

# Ensure that mmcif loader code appears early in the image
# to minimise that number of bocks that have to be read in
Expand All @@ -17,9 +16,16 @@ OBJS += mmcif-sh7372.o
endif
endif

AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
HEAD = head.o
OBJS += misc.o decompress.o
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o += -Wa,-march=armv7-a$(plus_sec)
HEAD = head.o

AFLAGS_misc.o +=-Wa,-march=armv7-a$(plus_sec)
MISC = misc.o

AFLAGS_decompress.o += -Wa,-march=armv7-a$(plus_sec)
DECOMPRESS = decompress.o

FONTC = $(srctree)/drivers/video/console/font_acorn_8x8.c

#
Expand Down Expand Up @@ -84,21 +90,21 @@ SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
suffix_$(CONFIG_KERNEL_GZIP) = gzip
suffix_$(CONFIG_KERNEL_LZO) = lzo
suffix_$(CONFIG_KERNEL_LZMA) = lzma
suffix_$(CONFIG_KERNEL_XZ) = xzkern

targets := vmlinux vmlinux.lds \
piggy.$(suffix_y) piggy.$(suffix_y).o \
font.o font.c head.o misc.o $(OBJS)

# Make sure files are removed during clean
extra-y += piggy.gzip piggy.lzo piggy.lzma lib1funcs.S

extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern lib1funcs.S ashldi3.S
ifeq ($(CONFIG_FUNCTION_TRACER),y)
ORIG_CFLAGS := $(KBUILD_CFLAGS)
KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS))
endif

ccflags-y := -fpic -fno-builtin
asflags-y := -Wa,-march=armv7-a$(plus_sec)
asflags-y := -Wa,-march=armv7-a

# Supply ZRELADDR to the decompressor via a linker symbol.
ifneq ($(CONFIG_AUTO_ZRELADDR),y)
Expand All @@ -117,11 +123,18 @@ LDFLAGS_vmlinux += -X
LDFLAGS_vmlinux += -T

# For __aeabi_uidivmod
AFLAGS_lib1funcs.o +=-Wa,-march=armv7-a$(plus_sec)
lib1funcs = $(obj)/lib1funcs.o

$(obj)/lib1funcs.S: $(srctree)/arch/$(SRCARCH)/lib/lib1funcs.S FORCE
$(call cmd,shipped)

# For __aeabi_llsl
ashldi3 = $(obj)/ashldi3.o

$(obj)/ashldi3.S: $(srctree)/arch/$(SRCARCH)/lib/ashldi3.S FORCE
$(call cmd,shipped)

# We need to prevent any GOTOFF relocs being used with references
# to symbols in the .bss section since we cannot relocate them
# independently from the rest at run time. This can be achieved by
Expand All @@ -135,14 +148,15 @@ bad_syms=$$($(CROSS_COMPILE)nm $@ | sed -n 's/^.\{8\} [bc] \(.*\)/\1/p') && \
( echo "following symbols must have non local/private scope:" >&2; \
echo "$$bad_syms" >&2; rm -f $@; false )

$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/piggy.$(suffix_y).o \
$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) FORCE
$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/$(HEAD) $(obj)/$(MISC) $(obj)/$(DECOMPRESS) $(obj)/piggy.$(suffix_y).o \
$(addprefix $(obj)/, $(OBJS)) $(lib1funcs) $(ashldi3) FORCE
$(call if_changed,ld)
@$(check_for_bad_syms)

$(obj)/piggy.$(suffix_y): $(obj)/../Image FORCE
$(call if_changed,$(suffix_y))

AFLAGS_piggy.$(suffix_y).o += -Wa,-march=armv7-a$(plus_sec)
$(obj)/piggy.$(suffix_y).o: $(obj)/piggy.$(suffix_y) FORCE

CFLAGS_font.o := -Dstatic=
Expand Down
53 changes: 53 additions & 0 deletions arch/arm/boot/compressed/ashldi3.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/* Copyright 1995, 1996, 1998, 1999, 2000, 2003, 2004, 2005
Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */


#include <linux/linkage.h>

#ifdef __ARMEB__
#define al r1
#define ah r0
#else
#define al r0
#define ah r1
#endif

ENTRY(__ashldi3)
ENTRY(__aeabi_llsl)

subs r3, r2, #32
rsb ip, r2, #32
movmi ah, ah, lsl r2
movpl ah, al, lsl r3
ARM( orrmi ah, ah, al, lsr ip )
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
mov pc, lr

ENDPROC(__ashldi3)
ENDPROC(__aeabi_llsl)
4 changes: 4 additions & 0 deletions arch/arm/boot/compressed/decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ extern void error(char *);
#include "../../../../lib/decompress_unlzma.c"
#endif

#ifdef CONFIG_KERNEL_XZ
#include "../../../../lib/decompress_unxz.c"
#endif

int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x))
{
return decompress(input, len, NULL, NULL, output, NULL, error);
Expand Down
7 changes: 7 additions & 0 deletions arch/arm/boot/compressed/piggy.xzkern.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
.section .piggydata,#alloc
.globl input_data
input_data:
.incbin "arch/arm/boot/compressed/piggy.xzkern"
.globl input_data_end
input_data_end:

Loading