Skip to content

Commit b53ede6

Browse files
pwithnallpoettering
authored andcommitted
nspawn: Add support for sysroot pivoting (#5258)
Add a new --pivot-root argument to systemd-nspawn, which specifies a directory to pivot to / inside the container; while the original / is pivoted to another specified directory (if provided). This adds support for booting container images which may contain several bootable sysroots, as is common with OSTree disk images. When these disk images are booted on real hardware, ostree-prepare-root is run in conjunction with sysroot.mount in the initramfs to achieve the same results.
1 parent 6a909d4 commit b53ede6

File tree

8 files changed

+211
-1
lines changed

8 files changed

+211
-1
lines changed

man/systemd-nspawn.xml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,21 @@
335335
an absolute path in the container's file system namespace.</para></listitem>
336336
</varlistentry>
337337

338+
<varlistentry>
339+
<term><option>--pivot-root=</option></term>
340+
341+
<listitem><para>Pivot the specified directory to <filename>/</filename> inside the container, and either unmount the
342+
container's old root, or pivot it to another specified directory. Takes one of: a path argument — in which case the
343+
specified path will be pivoted to <filename>/</filename> and the old root will be unmounted; or a colon-separated pair
344+
of new root path and pivot destination for the old root. The new root path will be pivoted to <filename>/</filename>,
345+
and the old <filename>/</filename> will be pivoted to the other directory. Both paths must be absolute, and are resolved
346+
in the container's file system namespace.</para>
347+
348+
<para>This is for containers which have several bootable directories in them; for example, several
349+
<ulink url="https://ostree.readthedocs.io/en/latest/">OSTree</ulink> deployments. It emulates the behavior of the boot
350+
loader and initial RAM disk which normally select which directory to mount as root and start the container's PID 1 in.</para></listitem>
351+
</varlistentry>
352+
338353
<varlistentry>
339354
<term><option>-u</option></term>
340355
<term><option>--user=</option></term>
@@ -1082,6 +1097,12 @@
10821097
<programlisting># chcon system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -R /srv/container
10831098
# systemd-nspawn -L system_u:object_r:svirt_sandbox_file_t:s0:c0,c1 -Z system_u:system_r:svirt_lxc_net_t:s0:c0,c1 -D /srv/container /bin/sh</programlisting>
10841099
</example>
1100+
1101+
<example>
1102+
<title>Run a container with an OSTree deployment</title>
1103+
1104+
<programlisting># systemd-nspawn -b -i ~/image.raw --pivot-root=/ostree/deploy/$OS/deploy/$CHECKSUM:/sysroot --bind=+/sysroot/ostree/deploy/$OS/var:/var</programlisting>
1105+
</example>
10851106
</refsect1>
10861107

10871108
<refsect1>

man/systemd.nspawn.xml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,15 @@
201201
switch.</para></listitem>
202202
</varlistentry>
203203

204+
<varlistentry>
205+
<term><varname>PivotRoot=</varname></term>
206+
207+
<listitem><para>Selects a directory to pivot to <filename>/</filename> inside the container when starting up.
208+
Takes a single path, or a pair of two paths separated by a colon. Both paths must be absolute, and are resolved
209+
in the container's file system namespace. This corresponds to the <option>--pivot-root=</option> command line
210+
switch.</para></listitem>
211+
</varlistentry>
212+
204213
<varlistentry>
205214
<term><varname>Capability=</varname></term>
206215
<term><varname>DropCapability=</varname></term>

src/nspawn/nspawn-gperf.gperf

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ Exec.KillSignal, config_parse_signal, 0, offsetof(Settings,
2626
Exec.Personality, config_parse_personality, 0, offsetof(Settings, personality)
2727
Exec.MachineID, config_parse_id128, 0, offsetof(Settings, machine_id)
2828
Exec.WorkingDirectory, config_parse_path, 0, offsetof(Settings, working_directory)
29+
Exec.PivotRoot, config_parse_pivot_root, 0, 0
2930
Exec.PrivateUsers, config_parse_private_users, 0, 0
3031
Exec.NotifyReady, config_parse_bool, 0, offsetof(Settings, notify_ready)
3132
Files.ReadOnly, config_parse_tristate, 0, offsetof(Settings, read_only)

src/nspawn/nspawn-mount.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,3 +1349,116 @@ int setup_volatile(
13491349
(void) rmdir(template);
13501350
return r;
13511351
}
1352+
1353+
/* Expects *pivot_root_new and *pivot_root_old to be initialised to allocated memory or NULL. */
1354+
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s) {
1355+
_cleanup_free_ char *root_new = NULL, *root_old = NULL;
1356+
const char *p = s;
1357+
int r;
1358+
1359+
assert(pivot_root_new);
1360+
assert(pivot_root_old);
1361+
1362+
r = extract_first_word(&p, &root_new, ":", EXTRACT_DONT_COALESCE_SEPARATORS);
1363+
if (r < 0)
1364+
return r;
1365+
if (r == 0)
1366+
return -EINVAL;
1367+
1368+
if (isempty(p))
1369+
root_old = NULL;
1370+
else {
1371+
root_old = strdup(p);
1372+
if (!root_old)
1373+
return -ENOMEM;
1374+
}
1375+
1376+
if (!path_is_absolute(root_new))
1377+
return -EINVAL;
1378+
if (root_old && !path_is_absolute(root_old))
1379+
return -EINVAL;
1380+
1381+
free_and_replace(*pivot_root_new, root_new);
1382+
free_and_replace(*pivot_root_old, root_old);
1383+
1384+
return 0;
1385+
}
1386+
1387+
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old) {
1388+
_cleanup_free_ char *directory_pivot_root_new = NULL;
1389+
_cleanup_free_ char *pivot_tmp_pivot_root_old = NULL;
1390+
char pivot_tmp[] = "/tmp/nspawn-pivot-XXXXXX";
1391+
bool remove_pivot_tmp = false;
1392+
int r;
1393+
1394+
assert(directory);
1395+
1396+
if (!pivot_root_new)
1397+
return 0;
1398+
1399+
/* Pivot pivot_root_new to / and the existing / to pivot_root_old.
1400+
* If pivot_root_old is NULL, the existing / disappears.
1401+
* This requires a temporary directory, pivot_tmp, which is
1402+
* not a child of either.
1403+
*
1404+
* This is typically used for OSTree-style containers, where
1405+
* the root partition contains several sysroots which could be
1406+
* run. Normally, one would be chosen by the bootloader and
1407+
* pivoted to / by initramfs.
1408+
*
1409+
* For example, for an OSTree deployment, pivot_root_new
1410+
* would be: /ostree/deploy/$os/deploy/$checksum. Note that this
1411+
* code doesn’t do the /var mount which OSTree expects: use
1412+
* --bind +/sysroot/ostree/deploy/$os/var:/var for that.
1413+
*
1414+
* So in the OSTree case, we’ll end up with something like:
1415+
* - directory = /tmp/nspawn-root-123456
1416+
* - pivot_root_new = /ostree/deploy/os/deploy/123abc
1417+
* - pivot_root_old = /sysroot
1418+
* - directory_pivot_root_new =
1419+
* /tmp/nspawn-root-123456/ostree/deploy/os/deploy/123abc
1420+
* - pivot_tmp = /tmp/nspawn-pivot-123456
1421+
* - pivot_tmp_pivot_root_old = /tmp/nspawn-pivot-123456/sysroot
1422+
*
1423+
* Requires all file systems at directory and below to be mounted
1424+
* MS_PRIVATE or MS_SLAVE so they can be moved.
1425+
*/
1426+
directory_pivot_root_new = prefix_root(directory, pivot_root_new);
1427+
1428+
/* Remount directory_pivot_root_new to make it movable. */
1429+
r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory_pivot_root_new, NULL, MS_BIND, NULL);
1430+
if (r < 0)
1431+
goto done;
1432+
1433+
if (pivot_root_old) {
1434+
if (!mkdtemp(pivot_tmp)) {
1435+
r = log_error_errno(errno, "Failed to create temporary directory: %m");
1436+
goto done;
1437+
}
1438+
1439+
remove_pivot_tmp = true;
1440+
pivot_tmp_pivot_root_old = prefix_root(pivot_tmp, pivot_root_old);
1441+
1442+
r = mount_verbose(LOG_ERR, directory_pivot_root_new, pivot_tmp, NULL, MS_MOVE, NULL);
1443+
if (r < 0)
1444+
goto done;
1445+
1446+
r = mount_verbose(LOG_ERR, directory, pivot_tmp_pivot_root_old, NULL, MS_MOVE, NULL);
1447+
if (r < 0)
1448+
goto done;
1449+
1450+
r = mount_verbose(LOG_ERR, pivot_tmp, directory, NULL, MS_MOVE, NULL);
1451+
if (r < 0)
1452+
goto done;
1453+
} else {
1454+
r = mount_verbose(LOG_ERR, directory_pivot_root_new, directory, NULL, MS_MOVE, NULL);
1455+
if (r < 0)
1456+
goto done;
1457+
}
1458+
1459+
done:
1460+
if (remove_pivot_tmp)
1461+
(void) rmdir(pivot_tmp);
1462+
1463+
return r;
1464+
}

src/nspawn/nspawn-mount.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,6 @@ int mount_custom(const char *dest, CustomMount *mounts, unsigned n, bool userns,
7070

7171
int setup_volatile(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
7272
int setup_volatile_state(const char *directory, VolatileMode mode, bool userns, uid_t uid_shift, uid_t uid_range, const char *selinux_apifs_context);
73+
74+
int pivot_root_parse(char **pivot_root_new, char **pivot_root_old, const char *s);
75+
int setup_pivot_root(const char *directory, const char *pivot_root_new, const char *pivot_root_old);

src/nspawn/nspawn-settings.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ Settings* settings_free(Settings *s) {
9090
strv_free(s->parameters);
9191
strv_free(s->environment);
9292
free(s->user);
93+
free(s->pivot_root_new);
94+
free(s->pivot_root_old);
9395
free(s->working_directory);
9496

9597
strv_free(s->network_interfaces);
@@ -237,6 +239,34 @@ int config_parse_id128(
237239
return 0;
238240
}
239241

242+
int config_parse_pivot_root(
243+
const char *unit,
244+
const char *filename,
245+
unsigned line,
246+
const char *section,
247+
unsigned section_line,
248+
const char *lvalue,
249+
int ltype,
250+
const char *rvalue,
251+
void *data,
252+
void *userdata) {
253+
254+
Settings *settings = data;
255+
int r;
256+
257+
assert(filename);
258+
assert(lvalue);
259+
assert(rvalue);
260+
261+
r = pivot_root_parse(&settings->pivot_root_new, &settings->pivot_root_old, rvalue);
262+
if (r < 0) {
263+
log_syntax(unit, LOG_ERR, filename, line, r, "Invalid pivot root mount specification %s: %m", rvalue);
264+
return 0;
265+
}
266+
267+
return 0;
268+
}
269+
240270
int config_parse_bind(
241271
const char *unit,
242272
const char *filename,

src/nspawn/nspawn-settings.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,8 @@ typedef enum SettingsMask {
5757
SETTING_WORKING_DIRECTORY = 1 << 12,
5858
SETTING_USERNS = 1 << 13,
5959
SETTING_NOTIFY_READY = 1 << 14,
60-
_SETTINGS_MASK_ALL = (1 << 15) -1
60+
SETTING_PIVOT_ROOT = 1 << 15,
61+
_SETTINGS_MASK_ALL = (1 << 16) -1
6162
} SettingsMask;
6263

6364
typedef struct Settings {
@@ -72,6 +73,8 @@ typedef struct Settings {
7273
unsigned long personality;
7374
sd_id128_t machine_id;
7475
char *working_directory;
76+
char *pivot_root_new;
77+
char *pivot_root_old;
7578
UserNamespaceMode userns_mode;
7679
uid_t uid_shift, uid_range;
7780
bool notify_ready;
@@ -109,6 +112,7 @@ int config_parse_capability(const char *unit, const char *filename, unsigned lin
109112
int config_parse_id128(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
110113
int config_parse_expose_port(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
111114
int config_parse_volatile_mode(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
115+
int config_parse_pivot_root(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
112116
int config_parse_bind(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
113117
int config_parse_tmpfs(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);
114118
int config_parse_overlay(const char *unit, const char *filename, unsigned line, const char *section, unsigned section_line, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata);

src/nspawn/nspawn.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,8 @@ typedef enum LinkJournal {
132132
static char *arg_directory = NULL;
133133
static char *arg_template = NULL;
134134
static char *arg_chdir = NULL;
135+
static char *arg_pivot_root_new = NULL;
136+
static char *arg_pivot_root_old = NULL;
135137
static char *arg_user = NULL;
136138
static sd_id128_t arg_uuid = {};
137139
static char *arg_machine = NULL;
@@ -221,6 +223,8 @@ static void help(void) {
221223
" -a --as-pid2 Maintain a stub init as PID1, invoke binary as PID2\n"
222224
" -b --boot Boot up full system (i.e. invoke init)\n"
223225
" --chdir=PATH Set working directory in the container\n"
226+
" --pivot-root=PATH[:PATH]\n"
227+
" Pivot root to given directory in the container\n"
224228
" -u --user=USER Run the command under specified user or uid\n"
225229
" -M --machine=NAME Set the machine name for the container\n"
226230
" --uuid=UUID Set a specific machine UUID for the container\n"
@@ -427,6 +431,7 @@ static int parse_argv(int argc, char *argv[]) {
427431
ARG_KILL_SIGNAL,
428432
ARG_SETTINGS,
429433
ARG_CHDIR,
434+
ARG_PIVOT_ROOT,
430435
ARG_PRIVATE_USERS_CHOWN,
431436
ARG_NOTIFY_READY,
432437
ARG_ROOT_HASH,
@@ -478,6 +483,7 @@ static int parse_argv(int argc, char *argv[]) {
478483
{ "kill-signal", required_argument, NULL, ARG_KILL_SIGNAL },
479484
{ "settings", required_argument, NULL, ARG_SETTINGS },
480485
{ "chdir", required_argument, NULL, ARG_CHDIR },
486+
{ "pivot-root", required_argument, NULL, ARG_PIVOT_ROOT },
481487
{ "notify-ready", required_argument, NULL, ARG_NOTIFY_READY },
482488
{ "root-hash", required_argument, NULL, ARG_ROOT_HASH },
483489
{}
@@ -1012,6 +1018,14 @@ static int parse_argv(int argc, char *argv[]) {
10121018
arg_settings_mask |= SETTING_WORKING_DIRECTORY;
10131019
break;
10141020

1021+
case ARG_PIVOT_ROOT:
1022+
r = pivot_root_parse(&arg_pivot_root_new, &arg_pivot_root_old, optarg);
1023+
if (r < 0)
1024+
return log_error_errno(r, "Failed to parse --pivot-root= argument %s: %m", optarg);
1025+
1026+
arg_settings_mask |= SETTING_PIVOT_ROOT;
1027+
break;
1028+
10151029
case ARG_NOTIFY_READY:
10161030
r = parse_boolean(optarg);
10171031
if (r < 0) {
@@ -2493,6 +2507,13 @@ static int outer_child(
24932507
if (r < 0)
24942508
return r;
24952509

2510+
r = setup_pivot_root(
2511+
directory,
2512+
arg_pivot_root_new,
2513+
arg_pivot_root_old);
2514+
if (r < 0)
2515+
return r;
2516+
24962517
r = setup_volatile(
24972518
directory,
24982519
arg_volatile_mode,
@@ -2915,6 +2936,12 @@ static int load_settings(void) {
29152936
settings->parameters = NULL;
29162937
}
29172938

2939+
if ((arg_settings_mask & SETTING_PIVOT_ROOT) == 0 &&
2940+
settings->pivot_root_new) {
2941+
free_and_replace(arg_pivot_root_new, settings->pivot_root_new);
2942+
free_and_replace(arg_pivot_root_old, settings->pivot_root_old);
2943+
}
2944+
29182945
if ((arg_settings_mask & SETTING_WORKING_DIRECTORY) == 0 &&
29192946
settings->working_directory) {
29202947
free(arg_chdir);
@@ -3915,6 +3942,8 @@ int main(int argc, char *argv[]) {
39153942
free(arg_image);
39163943
free(arg_machine);
39173944
free(arg_user);
3945+
free(arg_pivot_root_new);
3946+
free(arg_pivot_root_old);
39183947
free(arg_chdir);
39193948
strv_free(arg_setenv);
39203949
free(arg_network_bridge);

0 commit comments

Comments
 (0)