Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100755 142 lines (122 sloc) 4.192 kB
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
1 #!/usr/bin/env ruby
2
3 puts "my argv:"
4 puts ARGV.join(' ')
5
6 require 'socket'
7
8 def sh(*args)
9 print "# #{args.join(' ')}\n"
10 unless system(*args)
11 raise "failed"
12 end
13 end
14
15 def poll_for_condition(timeout=10.0, delay=0.1, &block)
16 deadline = Time.now + timeout
17 return if yield
18 begin
19 sleep delay
20 return if yield
21 end while (Time.now < deadline)
22 raise "timed out"
23 end
24
25 if ARGV == ["--setup"]
26 puts "will set things up for host (assuming 172.25.0/24 subnetwork for 'cluster')"
27 puts "spawning host's vde switch"
28 sh "mkdir -p /tmp/vdesock"
29 sh "vde_switch -s /tmp/vdesock/c_main -t tapMain -d"
30 puts "setting up host's tap interface"
31 sh "ifconfig tapMain 172.25.0.1/24 up"
32 puts "done."
33 exit
34 end
35
3ee126d @alk MB-5307: made run_in_ns.rb slightly less broken
alk authored
36 if ARGV == ["--help"]
37 puts <<HERE
38 Example:
39 # NS_NODE_NAME='n_11@lh' ./cluster_run -n1 --start-index=11 --dont-rename -i --prepend-extras ./scripts/run_in_ns.rb
40 HERE
41 exit
42 end
43
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
44 # apparently vde is eating HUP. Or maybe not. Anyway ruby is normally
45 # messing up with SIGHUP, so lets make it at least do some work for
46 # us. That sending out of SIGTERM is really crucial here
47 trap("HUP") do
48 # puts "broadcasting TERM"
49 trap("TERM") {exit}
50 Process.kill("TERM", 0)
51 exit
52 end
53
54 sleeper = fork do
55 # NOTE: we'll inherit SIGHUP action. This sleeper process is
56 # normally the guy to handle killing rest of our pgroup
57 #
58 # this process ensures that we have stopped process in our process
59 # group.
60 #
61 # This will cause whole group to be sent SIGHUP when process group
62 # will become orphaned. That will happen when our main process or
63 # it's parent will die.
64 while true
65 sleep 3600
66 end
67 end
68 Process.kill("STOP", sleeper)
69
70 dummy_name, node_name = ARGV.each_cons(2).detect {|(maybe_name, val)| maybe_name == "-name"}
71
3ee126d @alk MB-5307: made run_in_ns.rb slightly less broken
alk authored
72 node_name ||= ENV['NS_NODE_NAME']
73
74 raise "dont't have node -name" unless node_name =~ /\Ans?_([0-9]+)@/
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
75
76 node_number = $1.to_i
77 node_host = $'
78
3ee126d @alk MB-5307: made run_in_ns.rb slightly less broken
alk authored
79 puts "node_number: #{node_number}\nnode_host: #{node_host}"
80
81 vde_sock = "/tmp/vdesock/cs_node_#{node_number}"
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
82 tap_if = "tapCNode#{node_number}"
3ee126d @alk MB-5307: made run_in_ns.rb slightly less broken
alk authored
83 netns = "cs_node_#{node_number}"
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
84 ifaddr = "172.25.0.#{2+node_number}"
85
86 puts "Creating vde switch"
87 sh "vde_switch -s #{vde_sock} -t #{tap_if} &"
88 puts "Wiring it to host side"
89 rd, wr = IO.pipe
90 # wirefilter a) seems to mess terminal settings a bit thus we protect
91 # real stdout via pipe to cat and b) seems to expect some "console"
92 # commands on stdin. Which we fake with empty pipe
93 sh "wirefilter -v #{vde_sock}:/tmp/vdesock/c_main -d 1 -s 30M 0<&#{rd.fileno}- #{wr.fileno}<&- 2>&1 | cat &"
94 rd.close
95 # we keep write side for extra safety. I.e. we could leave it open in
96 # child process. But we close it in child and keep it open in
97 # ourselves (in fact it'll be open in erlang and all it's child :). So
98 # when we're 'done' wirefilter has less chance 'escaping'
99 puts "Creating netns"
3ee126d @alk MB-5307: made run_in_ns.rb slightly less broken
alk authored
100 sh "ip netns exec #{netns} ifconfig lo down || true"
4c6e0d7 @alk MB-5307: support slow network simulation for cluster_run
alk authored
101 sh "ip netns del #{netns}; ip netns add #{netns}"
102 sh "ip netns exec #{netns} ifconfig lo 127.0.0.1/8 up"
103 puts "Passing tap interface into netns"
104 sh "ip link set #{tap_if} netns #{netns}"
105 puts "Setting up tap interface"
106 sh "ip netns exec #{netns} ifconfig #{tap_if} #{ifaddr}/24 up"
107 puts "cleaning up arp entry for child ifaddr (because of different mac address of new tap interface)"
108 sh "arp -d #{ifaddr} || true"
109
110 # we need to replace hostname in args because original ip is not part
111 # of new netns.
112 new_args = ARGV.map do |arg|
113 if arg == node_name
114 "n_#{node_number}@#{ifaddr}"
115 else
116 arg
117 end
118 end
119
120 # erlang would spawn epmd for us normally. But it would daemonize
121 # itself. Which we don't need. We want it to receive HUP & TERM
122 # signals so that there's nobody running in our netns after we're done
123 puts "spawning epmd in our pgroup"
124 # sh "ip netns exec #{netns} erl -noshell -setcookie nocookie -sname init -run init stop 2>&1 > /dev/null"
125 sh "ip netns exec #{netns} epmd </dev/null >/dev/null 2>&1 &"
126
127 poll_for_condition do
128 begin
129 puts "checking epmd alivedness"
130 TCPSocket.new(ifaddr, 4369).close
131 true
132 rescue Errno::ECONNREFUSED
133 false
134 end
135 end
136
137 puts "exec-ing erlang #{new_args.join(' ')}"
138 STDOUT.flush
139 STDERR.flush
140 exec("ip", "netns", "exec", netns, *new_args)
141 raise "cannot happen"
Something went wrong with that request. Please try again.