Skip to content
This repository has been archived by the owner on Aug 29, 2018. It is now read-only.

Commit

Permalink
Bug 974268 - Narrow the window where user and quota data can get out …
Browse files Browse the repository at this point in the history
…of sync and set the start time prior to any other collection. Deal with a race condition with the lock files in unix_user.
  • Loading branch information
rmillner committed Jun 17, 2013
1 parent 898487e commit 3b2d095
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 26 deletions.
47 changes: 27 additions & 20 deletions node-util/bin/oo-accept-node
Expand Up @@ -74,6 +74,12 @@ rescue LoadError => e
end

$TC_CHECK=false

# Narrow the window where USERS, TC_DATA and ALL_QUOTAS can reflect
# the system in different states.
$START_TIME=Time.now
$USERS=[]
Etc.passwd { |u| $USERS << u if u.gecos == ENV['GEAR_GECOS'] }
$TC_DATA = %x[/sbin/tc clas show dev eth0].split("\n").grep(/parent/)
$ALL_QUOTAS = %x[/usr/sbin/repquota -a].split("\n")

Expand Down Expand Up @@ -151,7 +157,16 @@ end
# transition during the run of this script.
#
def user_fail(uuid, msg)
user_ents = users.select { |u| (u.name == uuid) or (u.uid == uuid) }

# Did a create/destroy hook get modified for the user?
begin
if File.stat("/var/lock/oo-create.#{uuid}").mtime >= $START_TIME
return
end
rescue Errno::ENOENT
end

user_ents = $USERS.select { |u| (u.name == uuid) or (u.uid == uuid) }

pw_ent = nil
begin
Expand Down Expand Up @@ -179,7 +194,7 @@ def user_fail(uuid, msg)

# Was the home directory modified during the script run?
begin
if File.stat(pw_ent.dir).mtime > $START_TIME
if File.stat(pw_ent.dir).mtime >= $START_TIME
return
end
rescue Errno::ENOENT
Expand Down Expand Up @@ -211,14 +226,6 @@ end

###############################

def users
if not $USERS
$USERS=[]
Etc.passwd { |u| $USERS << u if u.gecos == ENV['GEAR_GECOS'] }
end
$USERS
end

def load_node_conf
verbose("loading node configuration file #{$NODE_CONF_FILE}")
do_fail("FAIL: No configuration file: #{$NODE_CONF_FILE}") unless File.exists? $NODE_CONF_FILE
Expand Down Expand Up @@ -311,11 +318,12 @@ def check_selinux()
# This will likely only detect the effects of a full relabel but it's
# better than nothing. My unscientific tests shows that checking 100 gears
# only added about 1.5 seconds execution time.
Dir.glob(File.join($GEAR_BASE_DIR, "*/app-root"))[0,99].each do |dir|
out = %x[/bin/ls --scontext #{dir}]
Dir.glob(File.join($GEAR_BASE_DIR, "*")).select { |d| File.directory?(d) and not (File.symlink?(d) or (File.stat(d).uid == 0)) }[0,99].each do |dir|
out = %x[/bin/ls -d --scontext #{dir}/app-root 2>/dev/null]
mcs = /.+:.+:.+:.+:c\d+,c\d+? /.match(out)
if mcs.nil?
do_fail("invalid MCS labels on #{dir}. run oo-restorecon to restore OpenShift SELinux categories")
user = File.basename(dir)
user_fail(user, "invalid MCS labels on #{dir}/app-root. run oo-restorecon to restore OpenShift SELinux categories")
break
end
end
Expand Down Expand Up @@ -400,7 +408,7 @@ def check_cgroup_procs
uid,pid = line.split[0,2]

if (min_uid..max_uid).include?(uid)
passwd_lines = users.select { |u| u.uid == uid }
passwd_lines = $USERS.select { |u| u.uid == uid }

if paswd_lines.empty?
user_fail(uid, "Process #{pid} is owned by a gear that's no longer on the system, uid: #{uid}")
Expand Down Expand Up @@ -490,9 +498,9 @@ def check_quotas
end

def check_users
verbose("checking #{users.length} user accounts")
verbose("checking #{$USERS.length} user accounts")
conf_dir_templ = "#{$LIMITS_CONF_DIR}/84-%s.conf"
users.each do |user|
$USERS.each do |user|
#Test home dir
user_fail(user.name, "user #{user.name} does not have a home directory #{user.dir}") unless File.exists? user.dir
user_fail(user.name, "user #{user.name} does not have a PAM limits file") unless File.exists?(conf_dir_templ % user.name)
Expand Down Expand Up @@ -575,7 +583,7 @@ def check_system_httpd_configs

httpconfs['nodes.txt'].delete_if { |k,v| k.split('/')[0] == '__default__' }
mangled_gears=[]
users.each do |u|
$USERS.each do |u|
dnsfile = File.join($GEAR_BASE_DIR, u.name, '.env', 'OPENSHIFT_GEAR_DNS')
if not File.exists?(dnsfile)
user_fail(u.name, "Gear does not have an OPENSHIFT_GEAR_DNS variable: '#{u.name}'")
Expand Down Expand Up @@ -687,15 +695,15 @@ def check_system_httpd_configs
Dir.foreach(ENV['OPENSHIFT_HTTP_CONF_DIR']) { |cfile|
next if cfile[0,1] == "."
next if cfile[-5,5] != ".conf"
if users.select { |u| u.name == cfile[0..cfile.index('_')-1]}.empty?
if $USERS.select { |u| u.name == cfile[0..cfile.index('_')-1]}.empty?
do_fail("httpd config file #{cfile} doesn't have an associated user")
end
}
end

def check_app_dirs
verbose("checking application dirs")
copy_all_users = users.dup
copy_all_users = $USERS.dup

Dir.foreach($GEAR_BASE_DIR) do |entry|
next if entry[0,1] == "."
Expand Down Expand Up @@ -754,7 +762,6 @@ end

if __FILE__ == $0
$STATUS=0
$START_TIME=Time.now
load_node_conf
check_node_public_resolution
check_selinux
Expand Down
25 changes: 25 additions & 0 deletions node/jobs/openshift-origin-stale-lockfiles
@@ -0,0 +1,25 @@
#!/bin/bash

#
# Cleanup stale uuid lock files from the unix_user module
#
#
# There is an extremely narrow but unavoidable race condition where
# this script could break the synchronization between two instances of
# unix_user.rb waiting for the same lock.
#

# Find lock files from unix_user.rb that are over a week old. The
# number of files could be well beyond the command line length limit.
find /var/lock -mindepth 1 -maxdepth 1 -type f -mtime +7 \
\( -name 'oo-create.*' -o -name 'oo-modify-ssh-keys.*' \) 2>/dev/null | {
while read lockfile
do
(
flock 200
rm -f "$lockfile"
flock -u 200
) 200>"$lockfile"
done
}

10 changes: 4 additions & 6 deletions node/lib/openshift-origin-node/model/unix_user.rb
Expand Up @@ -101,14 +101,14 @@ def create

# lock to prevent race condition between create and delete of gear
uuid_lock_file = "/var/lock/oo-create.#{@uuid}"
File.open(uuid_lock_file, File::RDWR|File::CREAT, 0o0600) do | uuid_lock |
File.open(uuid_lock_file, File::RDWR|File::CREAT|File::TRUNC, 0o0600) do | uuid_lock |
uuid_lock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
uuid_lock.flock(File::LOCK_EX)

# Lock to prevent race condition on obtaining a UNIX user uid.
# When running without districts, there is a simple search on the
# passwd file for the next available uid.
File.open("/var/lock/oo-create", File::RDWR|File::CREAT, 0o0600) do | uid_lock |
File.open("/var/lock/oo-create", File::RDWR|File::CREAT|File::TRUNC, 0o0600) do | uid_lock |
uid_lock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
uid_lock.flock(File::LOCK_EX)

Expand Down Expand Up @@ -150,7 +150,6 @@ def create
initialize_openshift_port_proxy

uuid_lock.flock(File::LOCK_UN)
File.unlink(uuid_lock_file)
end
end

Expand Down Expand Up @@ -180,7 +179,7 @@ def destroy

# Don't try to delete a gear that is being scaled-up|created|deleted
uuid_lock_file = "/var/lock/oo-create.#{@uuid}"
File.open(uuid_lock_file, File::RDWR|File::CREAT, 0o0600) do | lock |
File.open(uuid_lock_file, File::RDWR|File::CREAT|File::TRUNC, 0o0600) do | lock |
lock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
lock.flock(File::LOCK_EX)

Expand Down Expand Up @@ -240,7 +239,6 @@ def destroy
end

lock.flock(File::LOCK_UN)
File.unlink(uuid_lock_file)
end
end

Expand Down Expand Up @@ -686,7 +684,7 @@ def modify_ssh_keys
keys = Hash.new

$OpenShift_UnixUser_SSH_KEY_MUTEX.synchronize do
File.open("/var/lock/oo-modify-ssh-keys", File::RDWR|File::CREAT, 0o0600) do | lock |
File.open("/var/lock/oo-modify-ssh-keys.#{@uuid}", File::RDWR|File::CREAT|File::TRUNC, 0o0600) do | lock |
lock.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC)
lock.flock(File::LOCK_EX)
begin
Expand Down
3 changes: 3 additions & 0 deletions node/rubygem-openshift-origin-node.spec
Expand Up @@ -198,6 +198,7 @@ ln -s /usr/lib/openshift/node/jobs/openshift-origin-cron-hourly %{buildroot}/etc
ln -s /usr/lib/openshift/node/jobs/openshift-origin-cron-daily %{buildroot}/etc/cron.daily/
ln -s /usr/lib/openshift/node/jobs/openshift-origin-cron-weekly %{buildroot}/etc/cron.weekly/
ln -s /usr/lib/openshift/node/jobs/openshift-origin-cron-monthly %{buildroot}/etc/cron.monthly/
ln -s /usr/lib/openshift/node/jobs/openshift-origin-stale-lockfiles %{buildroot}/etc/cron.daily/
%post
/bin/rm -f /etc/openshift/env/*.rpmnew
Expand Down Expand Up @@ -269,13 +270,15 @@ echo "/usr/bin/oo-trap-user" >> /etc/shells
%attr(0755,-,-) /usr/lib/openshift/node/jobs/openshift-origin-cron-daily
%attr(0755,-,-) /usr/lib/openshift/node/jobs/openshift-origin-cron-weekly
%attr(0755,-,-) /usr/lib/openshift/node/jobs/openshift-origin-cron-monthly
%attr(0755,-,-) /usr/lib/openshift/node/jobs/openshift-origin-stale-lockfiles
%dir /etc/cron.minutely
%config(noreplace) %attr(0644,-,-) /etc/cron.d/1minutely
%attr(0755,-,-) /etc/cron.minutely/openshift-origin-cron-minutely
%attr(0755,-,-) /etc/cron.hourly/openshift-origin-cron-hourly
%attr(0755,-,-) /etc/cron.daily/openshift-origin-cron-daily
%attr(0755,-,-) /etc/cron.weekly/openshift-origin-cron-weekly
%attr(0755,-,-) /etc/cron.monthly/openshift-origin-cron-monthly
%attr(0755,-,-) /etc/cron.daily/openshift-origin-stale-lockfiles
%changelog
* Thu May 30 2013 Adam Miller <admiller@redhat.com> 1.10.1-1
Expand Down

0 comments on commit 3b2d095

Please sign in to comment.