/
cloudspaceservice.go
368 lines (328 loc) · 11.2 KB
/
cloudspaceservice.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
package service
import (
"context"
"github.com/mangohow/cloud-ide-k8s-controller/pb"
"github.com/mangohow/cloud-ide-k8s-controller/tools/statussync"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
"sigs.k8s.io/controller-runtime/pkg/client"
"time"
)
/*
由于K8S无法停止(Stop)正在运行的Pod,因此想要停止一个Pod的运行,必须删除该Pod
如果后续需要继续运行该Pod,就需要重新创建,那么就会存在下面的问题:
Pod的运行状态(Pod中产生的数据)无法保存,如果里面运行的是Code-Server,
那么不光用户的代码数据无法保存,而且用户安装的插件和软件在Pod被删除时被销毁,
因此,如果要实现保存用户的数据,就需要将使用存储卷(PV,PVC;nfs)
在创建工作空间后第一次启动,需要将Code-Server的插件以及配置数据复制到存储卷中,
并且修改Code-Server的插件保存位置(默认为/root/.local中)
在后续的启动中就无需再次复制了(可以解决用户数据和Code-Server插件的保存,用户安装的程序在工作空间重新启动后就会消失)
*/
var Mode string
const ModeRelease = "release"
var (
ResponseSuccess = &pb.Response{Status: 200, Message: "success"}
ResponseFailed = &pb.Response{Status: 400, Message: "failed"}
)
var (
EmptyWorkspaceRunningInfo = &pb.WorkspaceRunningInfo{}
EmptyResponse = &pb.Response{}
EmptyWorkspaceInfo = &pb.WorkspaceInfo{}
EmptyWorkspaceStatus = &pb.WorkspaceStatus{}
)
const (
PodNotExist int32 = iota
PodExist
)
var _ pb.CloudIdeServiceServer = &CloudSpaceService{}
type CloudSpaceService struct {
client client.Client
statusInformer *statussync.StatusInformer
}
func NewCloudSpaceService(client client.Client, manager *statussync.StatusInformer) *CloudSpaceService {
return &CloudSpaceService{
client: client,
statusInformer: manager,
}
}
// CreateSpace 创建云IDE空间并等待Pod状态变为Running,第一次创建,需要挂载存储卷
func (s *CloudSpaceService) CreateSpace(ctx context.Context, info *pb.WorkspaceInfo) (*pb.WorkspaceRunningInfo, error) {
// 1. 创建pvc,pvc的name和pod相同
pvcName := info.Name
pvc, err := s.constructPVC(pvcName, info.Namespace, info.ResourceLimit.Storage)
if err != nil {
klog.Errorf("construct pvc error:%v, info:%v", err, info)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrConstructPVC.Error())
}
klog.Infof("[CreateSpace] 1.construct pvc")
deadline, cancel := context.WithTimeout(context.Background(), time.Second*30)
defer cancel()
err = s.client.Create(deadline, pvc)
if err != nil {
// 如果PVC已经存在
if errors.IsAlreadyExists(err) {
klog.Infof("create pvc while pvc is already exist, pvc:%s", pvcName)
} else {
klog.Errorf("create pvc error:%v", err)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrConstructPVC.Error())
}
}
klog.Info("[CreateSpace] 2.create pvc success")
// 2.创建Pod
return s.createPod(ctx, info)
}
func (s *CloudSpaceService) createPod(c context.Context, info *pb.WorkspaceInfo) (*pb.WorkspaceRunningInfo, error) {
pod := podTpl.DeepCopy()
s.fillPod(info, pod, info.Name)
ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
defer cancel()
err := s.client.Create(ctx, pod)
if err != nil {
// 如果该Pod已经存在
if errors.IsAlreadyExists(err) {
klog.Infof("create pod while pod is already exist, pod:%s", info.Name)
// 判断Pod是否处于running状态
existPod := v1.Pod{}
err = s.client.Get(context.Background(), client.ObjectKeyFromObject(pod), &existPod)
if err != nil {
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrCreatePod.Error())
}
if existPod.Status.Phase == v1.PodRunning {
return &pb.WorkspaceRunningInfo{
NodeName: existPod.Spec.NodeName,
Ip: existPod.Status.PodIP,
Port: existPod.Spec.Containers[0].Ports[0].ContainerPort,
}, nil
} else {
s.deletePod(&existPod)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrCreatePod.Error())
}
} else {
klog.Errorf("create pod err:%v", err)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrCreatePod.Error())
}
}
klog.Info("[createPod] create pod success")
// 向informer中添加chan,当Pod准备就绪时就会收到通知
ch := s.statusInformer.Add(pod.Name)
// 从informer中删除
defer s.statusInformer.Delete(pod.Name)
select {
// 等待pod状态处于Running
case <-ch:
// Pod已经处于running状态
return s.GetPodSpaceInfo(context.Background(), &pb.QueryOption{Name: info.Name, Namespace: info.Namespace})
case <-c.Done():
// 超时,Pod启动失败,可能是由于资源不足,将Pod删除
klog.Error("pod start failed, maybe resources is not enough")
s.deletePod(pod)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, ErrCreatePod.Error())
}
}
/*
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc1
namespace: cloud-ide
spec:
accessModes: # 访客模式
- ReadWriteMany
resources: # 请求空间
requests:
storage: 5Gi
*/
// 构造PVC
func (s *CloudSpaceService) constructPVC(name, namespace, storage string) (*v1.PersistentVolumeClaim, error) {
quantity, err := resource.ParseQuantity(storage)
if err != nil {
return nil, err
}
pvc := &v1.PersistentVolumeClaim{
TypeMeta: metav1.TypeMeta{
APIVersion: "v1",
Kind: "PersistentVolumeClaim",
},
ObjectMeta: metav1.ObjectMeta{
Name: name,
Namespace: namespace,
},
Spec: v1.PersistentVolumeClaimSpec{
AccessModes: []v1.PersistentVolumeAccessMode{v1.ReadWriteMany},
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceStorage: quantity},
Requests: v1.ResourceList{v1.ResourceStorage: quantity},
},
},
}
return pvc, nil
}
/*
apiVersion: v1
kind: Pod
metadata:
name: code-server-volum
namespace: cloud-ide
labels:
kind: code-server
spec:
containers:
- name: code-server
image: mangohow/code-server-go1.19:v0.1
volumeMounts:
- name: volume
mountPath: /root/workspace
volumes:
- name: volume
persistentVolumeClaim:
claimName: pvc3
readOnly: false
*/
func (s *CloudSpaceService) fillPod(info *pb.WorkspaceInfo, pod *v1.Pod, pvc string) {
volumeName := "volume-user-workspace"
pod.Name = info.Name
pod.Namespace = info.Namespace
// 配置持久化存储
pod.Spec.Volumes = []v1.Volume{
v1.Volume{
Name: volumeName,
VolumeSource: v1.VolumeSource{
PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{
ClaimName: pvc,
ReadOnly: false,
},
},
},
}
pod.Spec.Containers = []v1.Container{
{
Name: info.Name,
Image: info.Image,
ImagePullPolicy: v1.PullIfNotPresent,
Ports: []v1.ContainerPort{
{
ContainerPort: int32(info.Port),
},
},
// 容器挂载存储卷
VolumeMounts: []v1.VolumeMount{
v1.VolumeMount{
Name: volumeName,
ReadOnly: false,
MountPath: "/user_data/",
},
},
},
}
if Mode == ModeRelease {
// 最小需求CPU2核、内存1Gi == 1 * 2^10
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
Requests: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse("2"),
v1.ResourceMemory: resource.MustParse("1Gi"),
},
Limits: map[v1.ResourceName]resource.Quantity{
v1.ResourceCPU: resource.MustParse(info.ResourceLimit.Cpu),
v1.ResourceMemory: resource.MustParse(info.ResourceLimit.Memory),
},
}
}
}
// StartSpace 启动(创建)云IDE空间,非第一次创建,无需挂载存储卷,使用之前的存储卷
func (s *CloudSpaceService) StartSpace(ctx context.Context, info *pb.WorkspaceInfo) (*pb.WorkspaceRunningInfo, error) {
return s.createPod(ctx, info)
}
// DeleteSpace 删除云IDE空间, 只需要删除存储卷
func (s *CloudSpaceService) DeleteSpace(ctx context.Context, option *pb.QueryOption) (*pb.Response, error) {
// 删除pvc
pvc := &v1.PersistentVolumeClaim{
ObjectMeta: metav1.ObjectMeta{
Name: option.Name,
Namespace: option.Namespace,
},
}
c, cancelFunc := context.WithTimeout(context.Background(), time.Second*30)
defer cancelFunc()
err := s.client.Delete(c, pvc)
if err != nil {
// 如果是PVC不存在引起的错误就认为是成功了,因为就是要删除PVC
if errors.IsNotFound(err) {
klog.Infof("pvc not found,err:%v", err)
return ResponseSuccess, nil
}
klog.Errorf("delete pvc error:%v", err)
return ResponseFailed, status.Error(codes.Unknown, ErrDeletePVC.Error())
}
klog.Info("[DeleteSpace] delete pvc success")
return ResponseSuccess, nil
}
func (s *CloudSpaceService) deletePod(pod *v1.Pod) (*pb.Response, error) {
// k8s的默认最大宽限时间为30s,因此在这设置为32s
ctx, cancelFunc := context.WithTimeout(context.Background(), time.Second*32)
defer cancelFunc()
err := s.client.Delete(ctx, pod)
if err != nil {
if errors.IsNotFound(err) {
klog.Infof("delete pod while pod not exist, pod:%s", pod.Name)
return ResponseSuccess, nil
}
klog.Errorf("delete pod error:%v", err)
return ResponseFailed, status.Error(codes.Unknown, ErrDeletePod.Error())
}
klog.Info("[deletePod] delete pod success")
return ResponseSuccess, nil
}
// StopSpace 停止(删除)云工作空间,无需删除存储卷
func (s *CloudSpaceService) StopSpace(ctx context.Context, option *pb.QueryOption) (*pb.Response, error) {
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: option.Name,
Namespace: option.Namespace,
},
}
return s.deletePod(pod)
}
// GetPodSpaceStatus 获取Pod运行状态
func (s *CloudSpaceService) GetPodSpaceStatus(ctx context.Context, option *pb.QueryOption) (*pb.WorkspaceStatus, error) {
pod := v1.Pod{}
err := s.client.Get(ctx, client.ObjectKey{Name: option.Name, Namespace: option.Namespace}, &pod)
if err != nil {
if errors.IsNotFound(err) {
return EmptyWorkspaceStatus, status.Error(codes.NotFound, "pod not found")
}
klog.Errorf("get pod space status error:%v", err)
return &pb.WorkspaceStatus{Status: PodNotExist, Message: "NotExist"}, status.Error(codes.Unknown, err.Error())
}
return &pb.WorkspaceStatus{Status: PodExist, Message: string(pod.Status.Phase)}, nil
}
// GetPodSpaceInfo 获取云IDE空间Pod的信息
func (s *CloudSpaceService) GetPodSpaceInfo(ctx context.Context, option *pb.QueryOption) (*pb.WorkspaceRunningInfo, error) {
pod := v1.Pod{}
err := s.client.Get(ctx, client.ObjectKey{Name: option.Name, Namespace: option.Namespace}, &pod)
if err != nil {
if errors.IsNotFound(err) {
return EmptyWorkspaceRunningInfo, status.Error(codes.NotFound, "pod not found")
}
klog.Errorf("get pod space info error:%v", err)
return EmptyWorkspaceRunningInfo, status.Error(codes.Unknown, err.Error())
}
return &pb.WorkspaceRunningInfo{NodeName: pod.Spec.NodeName,
Ip: pod.Status.PodIP,
Port: pod.Spec.Containers[0].Ports[0].ContainerPort}, nil
}
var podTpl = &v1.Pod{
TypeMeta: metav1.TypeMeta{
Kind: "Pod",
APIVersion: "v1",
},
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"kind": "cloud-ide",
},
},
}