-
Notifications
You must be signed in to change notification settings - Fork 0
/
upgrader.go
247 lines (225 loc) · 8.69 KB
/
upgrader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
package dcosupgrade
import (
"encoding/json"
"fmt"
"strings"
"github.com/Azure/aks-engine/pkg/operations"
"github.com/pkg/errors"
)
type nodeHealthReport struct {
IP string `json:"host_ip"`
Role string `json:"role"`
Health int `json:"health"`
}
type healthReport struct {
Nodes []nodeHealthReport `json:"nodes"`
}
var bootstrapUpgradeScript = `#!/bin/bash
echo "Starting upgrade configuration"
if [ ! -e /opt/azure/dcos/upgrade/NEW_VERSION/upgrade_url ]; then
echo "Setting up bootstrap node"
rm -rf /opt/azure/dcos/upgrade/NEW_VERSION
mkdir -p /opt/azure/dcos/upgrade/NEW_VERSION/genconf
cp /opt/azure/dcos/genconf/config.yaml /opt/azure/dcos/genconf/ip-detect /opt/azure/dcos/upgrade/NEW_VERSION/genconf/
cd /opt/azure/dcos/upgrade/NEW_VERSION/
curl -s -O https://dcos-mirror.azureedge.net/dcos-NEW_DASHED_VERSION/dcos_generate_config.sh
bash dcos_generate_config.sh --generate-node-upgrade-script CURR_VERSION | tee /opt/azure/dcos/upgrade/NEW_VERSION/log
process=\$(docker ps -f ancestor=nginx -q)
if [ ! -z "\$process" ]; then
echo "Stopping nginx service \$process"
docker kill \$process
fi
echo "Starting nginx service \$process"
docker run -d -p 8086:80 -v \$PWD/genconf/serve:/usr/share/nginx/html:ro nginx
docker ps
grep 'Node upgrade script URL' /opt/azure/dcos/upgrade/NEW_VERSION/log | awk -F ': ' '{print \$2}' | cat > /opt/azure/dcos/upgrade/NEW_VERSION/upgrade_url
fi
upgrade_url=\$(cat /opt/azure/dcos/upgrade/NEW_VERSION/upgrade_url)
if [ -z \${upgrade_url} ]; then
rm -f /opt/azure/dcos/upgrade/NEW_VERSION/upgrade_url
echo "Failed to set up bootstrap node. Please try again"
exit 1
else
echo "Setting up bootstrap node completed. Node upgrade script URL \${upgrade_url}"
fi
`
var nodeUpgradeScript = `#!/bin/bash
echo "Starting node upgrade"
mkdir -p /opt/azure/dcos/upgrade/NEW_VERSION
cd /opt/azure/dcos/upgrade/NEW_VERSION
curl -O UPGRADE_SCRIPT_URL
bash ./dcos_node_upgrade.sh
`
func (uc *UpgradeCluster) runUpgrade() error {
if uc.ClusterTopology.DataModel.Properties.OrchestratorProfile.DcosConfig == nil ||
uc.ClusterTopology.DataModel.Properties.OrchestratorProfile.DcosConfig.BootstrapProfile == nil {
return errors.New("BootstrapProfile is not set")
}
cs := uc.ClusterTopology.DataModel
newVersion := cs.Properties.OrchestratorProfile.OrchestratorVersion
dashedVersion := strings.Replace(newVersion, ".", "-", -1)
masterDNS := cs.GetAzureProdFQDN()
masterCount := cs.Properties.MasterProfile.Count
bootstrapIP := cs.Properties.OrchestratorProfile.DcosConfig.BootstrapProfile.StaticIP
uc.Logger.Infof("masterDNS:%s masterCount:%d bootstrapIP:%s", masterDNS, masterCount, bootstrapIP)
// copy SSH key to master
uc.Logger.Infof("Copy SSH key to master")
out, err := operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, fmt.Sprintf("cat << END > .ssh/id_rsa_cluster\n%s\nEND\n", string(uc.SSHKey)))
if err != nil {
uc.Logger.Errorf(out)
return err
}
// set SSH key permissions
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, "chmod 600 .ssh/id_rsa_cluster")
if err != nil {
uc.Logger.Errorf(out)
return err
}
// upgrade bootstrap node
bootstrapScript := strings.Replace(bootstrapUpgradeScript, "CURR_VERSION", uc.CurrentDcosVersion, -1)
bootstrapScript = strings.Replace(bootstrapScript, "NEW_VERSION", newVersion, -1)
bootstrapScript = strings.Replace(bootstrapScript, "NEW_DASHED_VERSION", dashedVersion, -1)
upgradeScriptURL, err := uc.upgradeBootstrapNode(masterDNS, bootstrapIP, bootstrapScript)
if err != nil {
return err
}
nodeScript := strings.Replace(nodeUpgradeScript, "NEW_VERSION", newVersion, -1)
nodeScript = strings.Replace(nodeScript, "UPGRADE_SCRIPT_URL", upgradeScriptURL, -1)
// upgrade master nodes
if err = uc.upgradeMasterNodes(masterDNS, masterCount, nodeScript); err != nil {
return err
}
// get the node list
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, "curl -s http://localhost:1050/system/health/v1/nodes")
if err != nil {
uc.Logger.Errorf(out)
return err
}
uc.Logger.Info(out)
nodes := &healthReport{}
if err = json.Unmarshal([]byte(out), nodes); err != nil {
return err
}
// upgrade agent nodes
return uc.upgradeAgentNodes(masterDNS, nodes)
}
func (uc *UpgradeCluster) upgradeBootstrapNode(masterDNS, bootstrapIP, bootstrapScript string) (string, error) {
// copy bootstrap script to master
uc.Logger.Infof("Copy bootstrap script to master")
out, err := operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, fmt.Sprintf("cat << END > bootstrap_upgrade.sh\n%s\nEND\n", bootstrapScript))
if err != nil {
uc.Logger.Errorf(out)
return "", err
}
// set script permissions
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, "chmod 755 ./bootstrap_upgrade.sh")
if err != nil {
uc.Logger.Errorf(out)
return "", err
}
// copy bootstrap script to the bootstrap node
uc.Logger.Infof("Copy bootstrap script to the bootstrap node")
cmd := fmt.Sprintf("scp -i .ssh/id_rsa_cluster -o ConnectTimeout=30 -o StrictHostKeyChecking=no bootstrap_upgrade.sh %s:", bootstrapIP)
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, cmd)
if err != nil {
uc.Logger.Errorf(out)
return "", err
}
// run bootstrap script
uc.Logger.Infof("Run bootstrap upgrade script")
cmd = fmt.Sprintf("ssh -i .ssh/id_rsa_cluster -o ConnectTimeout=30 -o StrictHostKeyChecking=no %s sudo ./bootstrap_upgrade.sh", bootstrapIP)
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, cmd)
if err != nil {
uc.Logger.Errorf(out)
return "", err
}
uc.Logger.Info(out)
// retrieve upgrade script URL
var url string
arr := strings.Split(out, "\n")
prefix := "Setting up bootstrap node completed. Node upgrade script URL"
for _, str := range arr {
if strings.HasPrefix(str, prefix) {
url = strings.TrimSpace(str[len(prefix):])
break
}
}
if len(url) == 0 {
return "", errors.New("Undefined upgrade script URL")
}
return url, nil
}
func (uc *UpgradeCluster) upgradeMasterNodes(masterDNS string, masterCount int, nodeScript string) error {
// run master upgrade script
catCmd := fmt.Sprintf("cat << END > node_upgrade.sh\n%s\nEND\n", nodeScript)
for i := 0; i < masterCount; i++ {
uc.Logger.Infof("Upgrading master node #%d", i+1)
// check current version
out, err := operations.RemoteRun("azureuser", masterDNS, 2200+i, uc.SSHKey, "grep version /opt/mesosphere/etc/dcos-version.json | cut -d '\"' -f 4")
if err != nil {
uc.Logger.Errorf(out)
return err
}
if strings.TrimSpace(out) == uc.ClusterTopology.DataModel.Properties.OrchestratorProfile.OrchestratorVersion {
uc.Logger.Infof("Master node is up-to-date. Skipping upgrade")
continue
}
// copy script to the node
out, err = operations.RemoteRun("azureuser", masterDNS, 2200+i, uc.SSHKey, catCmd)
if err != nil {
uc.Logger.Errorf(out)
return err
}
// set script permissions
out, err = operations.RemoteRun("azureuser", masterDNS, 2200+i, uc.SSHKey, "chmod 755 ./node_upgrade.sh")
if err != nil {
uc.Logger.Errorf(out)
return err
}
// run the script
out, err = operations.RemoteRun("azureuser", masterDNS, 2200+i, uc.SSHKey, "sudo ./node_upgrade.sh")
if err != nil {
uc.Logger.Errorf(out)
return err
}
uc.Logger.Info(out)
}
return nil
}
func (uc *UpgradeCluster) upgradeAgentNodes(masterDNS string, nodes *healthReport) error {
for _, node := range nodes.Nodes {
if node.Role == "master" {
continue
}
uc.Logger.Infof("Upgrading %s %s", node.Role, node.IP)
// check current version
cmd := fmt.Sprintf("ssh -i .ssh/id_rsa_cluster -o ConnectTimeout=30 -o StrictHostKeyChecking=no %s grep version /opt/mesosphere/etc/dcos-version.json | cut -d '\"' -f 4", node.IP)
out, err := operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, cmd)
if err != nil {
uc.Logger.Errorf(out)
return err
}
if strings.TrimSpace(out) == uc.ClusterTopology.DataModel.Properties.OrchestratorProfile.OrchestratorVersion {
uc.Logger.Infof("Agent node is up-to-date. Skipping upgrade")
continue
}
// copy script to the node
cmd = fmt.Sprintf("scp -i .ssh/id_rsa_cluster -o ConnectTimeout=30 -o StrictHostKeyChecking=no node_upgrade.sh %s:", node.IP)
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, cmd)
if err != nil {
uc.Logger.Errorf(out)
return err
}
// run the script
cmd = fmt.Sprintf("ssh -i .ssh/id_rsa_cluster -o ConnectTimeout=30 -o StrictHostKeyChecking=no %s sudo ./node_upgrade.sh", node.IP)
out, err = operations.RemoteRun("azureuser", masterDNS, 2200, uc.SSHKey, cmd)
if err != nil {
uc.Logger.Errorf(out)
return err
}
uc.Logger.Info(out)
}
return nil
}