forked from hashicorp/consul
/
serf.go
174 lines (156 loc) · 4.2 KB
/
serf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
package consul
import (
"github.com/hashicorp/serf/serf"
"net"
"strings"
)
const (
// StatusReap is used to update the status of a node if we
// are handling a EventMemberReap
StatusReap = serf.MemberStatus(-1)
)
// lanEventHandler is used to handle events from the lan Serf cluster
func (s *Server) lanEventHandler() {
for {
select {
case e := <-s.eventChLAN:
switch e.EventType() {
case serf.EventMemberJoin:
fallthrough
case serf.EventMemberLeave:
fallthrough
case serf.EventMemberFailed:
fallthrough
case serf.EventMemberReap:
s.localMemberEvent(e.(serf.MemberEvent))
case serf.EventUser:
s.localEvent(e.(serf.UserEvent))
case serf.EventMemberUpdate: // Ignore
case serf.EventQuery: // Ignore
default:
s.logger.Printf("[WARN] consul: unhandled LAN Serf Event: %#v", e)
}
case <-s.shutdownCh:
return
}
}
}
// wanEventHandler is used to handle events from the wan Serf cluster
func (s *Server) wanEventHandler() {
for {
select {
case e := <-s.eventChWAN:
switch e.EventType() {
case serf.EventMemberJoin:
s.remoteJoin(e.(serf.MemberEvent))
case serf.EventMemberLeave:
fallthrough
case serf.EventMemberFailed:
s.remoteFailed(e.(serf.MemberEvent))
case serf.EventMemberUpdate: // Ignore
case serf.EventMemberReap: // Ignore
case serf.EventUser:
case serf.EventQuery: // Ignore
default:
s.logger.Printf("[WARN] consul: unhandled WAN Serf Event: %#v", e)
}
case <-s.shutdownCh:
return
}
}
}
// localMemberEvent is used to reconcile Serf events with the strongly
// consistent store if we are the current leader
func (s *Server) localMemberEvent(me serf.MemberEvent) {
// Do nothing if we are not the leader
if !s.IsLeader() {
return
}
// Check if this is a reap event
isReap := me.EventType() == serf.EventMemberReap
// Queue the members for reconciliation
for _, m := range me.Members {
// Change the status if this is a reap event
if isReap {
m.Status = StatusReap
}
select {
case s.reconcileCh <- m:
default:
}
}
}
// localEvent is called when we receive an event on the local Serf
func (s *Server) localEvent(event serf.UserEvent) {
// Handle only consul events
if !strings.HasPrefix(event.Name, "consul:") {
return
}
switch event.Name {
case newLeaderEvent:
s.logger.Printf("[INFO] consul: New leader elected: %s", event.Payload)
// Trigger the callback
if s.config.ServerUp != nil {
s.config.ServerUp()
}
default:
s.logger.Printf("[WARN] consul: Unhandled local event: %v", event)
}
}
// remoteJoin is used to handle join events on the wan serf cluster
func (s *Server) remoteJoin(me serf.MemberEvent) {
for _, m := range me.Members {
ok, parts := isConsulServer(m)
if !ok {
s.logger.Printf("[WARN] consul: non-server in WAN pool: %s %s", m.Name)
continue
}
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port}
s.logger.Printf("[INFO] consul: adding server for datacenter: %s, addr: %s", parts.Datacenter, addr)
// Check if this server is known
found := false
s.remoteLock.Lock()
existing := s.remoteConsuls[parts.Datacenter]
for _, e := range existing {
if e.String() == addr.String() {
found = true
break
}
}
// Add ot the list if not known
if !found {
s.remoteConsuls[parts.Datacenter] = append(existing, addr)
}
s.remoteLock.Unlock()
}
}
// remoteFailed is used to handle fail events on the wan serf cluster
func (s *Server) remoteFailed(me serf.MemberEvent) {
for _, m := range me.Members {
ok, parts := isConsulServer(m)
if !ok {
continue
}
var addr net.Addr = &net.TCPAddr{IP: m.Addr, Port: parts.Port}
s.logger.Printf("[INFO] consul: removing server for datacenter: %s, addr: %s", parts.Datacenter, addr)
// Remove the server if known
s.remoteLock.Lock()
existing := s.remoteConsuls[parts.Datacenter]
n := len(existing)
for i := 0; i < n; i++ {
if existing[i].String() == addr.String() {
existing[i], existing[n-1] = existing[n-1], nil
existing = existing[:n-1]
n--
break
}
}
// Trim the list if all known consuls are dead
if n == 0 {
delete(s.remoteConsuls, parts.Datacenter)
} else {
s.remoteConsuls[parts.Datacenter] = existing
}
s.remoteLock.Unlock()
}
}