Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions docs/service_manager/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
# Service Manager 服务管理模块

Service Manager 是 ZeroOps 平台的核心服务管理模块,负责微服务的生命周期管理、部署协调和状态监控。

## 架构设计

### 分层架构

```
┌─────────────────┐
│ HTTP API │ ← REST API 接口层
├─────────────────┤
│ Service │ ← 业务逻辑层
├─────────────────┤
│ Database │ ← 数据访问层
├─────────────────┤
│ PostgreSQL │ ← 数据存储层
└─────────────────┘
```

- **API层** (`api/`): 处理HTTP请求和响应,参数验证
- **Service层** (`service/`): 核心业务逻辑,事务管理
- **Database层** (`database/`): 数据库操作,SQL查询
- **Model层** (`model/`): 数据模型和类型定义

## 核心功能

### 1. 服务信息管理

- **服务注册**: 创建和注册新的微服务
- **依赖管理**: 维护服务间的依赖关系图
- **版本管理**: 跟踪服务的多个版本
- **健康监控**: 实时监控服务健康状态

### 2. 部署管理

- **部署协调**: 管理服务的部署任务
- **灰度发布**: 支持渐进式部署策略
- **状态控制**: 暂停、继续、回滚部署
- **实例管理**: 跟踪服务实例的分布

### 3. 监控集成

- **指标收集**: 集成时序数据库(Prometheus格式)
- **状态报告**: 服务运行状态实时上报
- **告警处理**: 异常状态检测和告警

## API接口

### 服务管理接口

| 方法 | 路径 | 描述 |
|------|------|------|
| GET | `/v1/services` | 获取所有服务列表 |
| POST | `/v1/services` | 创建新服务 |
| PUT | `/v1/services/:service` | 更新服务信息 |
| DELETE | `/v1/services/:service` | 删除服务 |
| GET | `/v1/services/:service/activeVersions` | 获取服务详情 |
| GET | `/v1/services/:service/availableVersions` | 获取可用服务版本 |
| GET | `/v1/metrics/:service/:name` | 获取服务监控指标 |

### 部署管理接口

| 方法 | 路径 | 描述 |
|------|------|------|
| POST | `/v1/deployments` | 创建部署任务 |
| GET | `/v1/deployments` | 获取部署任务列表 |
| GET | `/v1/deployments/:deployID` | 获取部署任务详情 |
| POST | `/v1/deployments/:deployID` | 更新部署任务 |
| DELETE | `/v1/deployments/:deployID` | 删除部署任务 |
| POST | `/v1/deployments/:deployID/pause` | 暂停部署 |
| POST | `/v1/deployments/:deployID/continue` | 继续部署 |
| POST | `/v1/deployments/:deployID/rollback` | 回滚部署 |

## 数据模型

### 核心实体

#### Service (服务)
```go
type Service struct {
Name string `json:"name"` // 服务名称(主键)
Deps []string `json:"deps"` // 依赖关系列表
}
```

#### ServiceInstance (服务实例)
```go
type ServiceInstance struct {
ID string `json:"id"` // 实例ID(主键)
Service string `json:"service"` // 关联服务名
Version string `json:"version"` // 服务版本
}
```

#### ServiceState (服务状态)
- 健康状态等级
- 状态报告时间
- 异常信息

#### DeployTask (部署任务)
- 部署ID
- 目标服务和版本
- 部署状态
- 创建和更新时间

### 数据库设计

使用 PostgreSQL 作为主数据库:

- **services**: 服务基础信息表
- **service_instances**: 服务实例表
- **service_versions**: 服务版本表
- **service_states**: 服务状态表
- **deploy_tasks**: 部署任务表

## 使用示例

### 创建服务

```bash
curl -X POST http://localhost:8080/v1/services \
-H "Content-Type: application/json" \
-d '{
"name": "user-service",
"deps": ["database-service", "cache-service"]
}'
```

### 创建部署任务

```bash
curl -X POST http://localhost:8080/v1/deployments \
-H "Content-Type: application/json" \
-d '{
"service": "user-service",
"version": "v1.2.0",
"strategy": "rolling"
}'
```

### 获取服务列表

```bash
curl http://localhost:8080/v1/services
```

响应示例:
```json
{
"items": [
{
"name": "user-service",
"deployState": "deployed",
"health": "normal",
"deps": ["database-service"]
}
],
"relation": {
"user-service": ["database-service"]
}
}
```

## 配置说明

### 数据库配置
```yaml
database:
host: localhost
port: 5432
user: postgres
password: password
dbname: zeroops
sslmode: disable
```

### 服务配置
```yaml
service_manager:
port: 8080
log_level: info
```
93 changes: 93 additions & 0 deletions docs/service_manager/model/schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
-- ZeroOps Service Manager Database Schema

-- 删除现有表(按依赖关系逆序删除)
DROP TABLE IF EXISTS deploy_tasks;
DROP TABLE IF EXISTS service_states;
DROP TABLE IF EXISTS service_instances;
DROP TABLE IF EXISTS service_versions;
DROP TABLE IF EXISTS services;

-- 服务表
CREATE TABLE IF NOT EXISTS services (
name VARCHAR(255) PRIMARY KEY,
deps JSONB DEFAULT '[]'::jsonb
);

-- 服务版本表
CREATE TABLE IF NOT EXISTS service_versions (
version VARCHAR(255),
service VARCHAR(255),
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (version, service),
FOREIGN KEY (service) REFERENCES services(name) ON DELETE CASCADE
);

-- 服务实例表
CREATE TABLE IF NOT EXISTS service_instances (
id VARCHAR(255) PRIMARY KEY,
service VARCHAR(255),
version VARCHAR(255),
FOREIGN KEY (service) REFERENCES services(name) ON DELETE CASCADE
);

-- 服务状态表
CREATE TABLE IF NOT EXISTS service_states (
service VARCHAR(255),
version VARCHAR(255),
level VARCHAR(50),
detail TEXT,
report_at TIMESTAMP,
resolved_at TIMESTAMP,
health_status VARCHAR(50),
correlation_id VARCHAR(255),
PRIMARY KEY (service, version),
FOREIGN KEY (service) REFERENCES services(name) ON DELETE CASCADE
);

-- 部署任务表 (deploy_tasks)
CREATE TABLE IF NOT EXISTS deploy_tasks (
id VARCHAR(32) PRIMARY KEY,
start_time TIMESTAMP,
end_time TIMESTAMP,
target_ratio DOUBLE PRECISION,
instances JSONB DEFAULT '[]'::jsonb,
deploy_state VARCHAR(50)
);

-- 创建索引以提高查询性能
CREATE INDEX IF NOT EXISTS idx_service_states_service ON service_states(service);
CREATE INDEX IF NOT EXISTS idx_service_states_report_at ON service_states(service, report_at DESC);
CREATE INDEX IF NOT EXISTS idx_deploy_tasks_state ON deploy_tasks(deploy_state);
CREATE INDEX IF NOT EXISTS idx_service_instances_service ON service_instances(service);

-- 插入Mock S3项目的真实服务数据
-- 服务及其依赖关系(基于实际业务流程)
INSERT INTO services (name, deps) VALUES
('storage', '[]'::jsonb), -- 存储服务:基础服务
('metadata', '["storage"]'::jsonb), -- 元数据服务:依赖存储服务
('queue', '["storage"]'::jsonb), -- 队列服务:依赖存储服务
('third-party', '[]'::jsonb), -- 第三方服务:独立
('mock-error', '[]'::jsonb) -- 错误模拟服务:独立
ON CONFLICT (name) DO NOTHING;

-- 服务版本:metadata, storage, queue, third-party 各有3个版本,mock-error只有1个版本
INSERT INTO service_versions (version, service, create_time) VALUES
-- metadata service versions
('v1.0.0', 'metadata', CURRENT_TIMESTAMP - INTERVAL '60 days'),
('v1.1.0', 'metadata', CURRENT_TIMESTAMP - INTERVAL '30 days'),
('v1.2.0', 'metadata', CURRENT_TIMESTAMP - INTERVAL '7 days'),
-- storage service versions
('v1.0.0', 'storage', CURRENT_TIMESTAMP - INTERVAL '55 days'),
('v1.1.0', 'storage', CURRENT_TIMESTAMP - INTERVAL '25 days'),
('v1.2.0', 'storage', CURRENT_TIMESTAMP - INTERVAL '5 days'),
-- queue service versions
('v1.0.0', 'queue', CURRENT_TIMESTAMP - INTERVAL '50 days'),
('v1.1.0', 'queue', CURRENT_TIMESTAMP - INTERVAL '20 days'),
('v1.2.0', 'queue', CURRENT_TIMESTAMP - INTERVAL '3 days'),
-- third-party service versions
('v1.0.0', 'third-party', CURRENT_TIMESTAMP - INTERVAL '45 days'),
('v1.1.0', 'third-party', CURRENT_TIMESTAMP - INTERVAL '15 days'),
('v1.2.0', 'third-party', CURRENT_TIMESTAMP - INTERVAL '1 day'),
-- mock-error service version
('v1.0.0', 'mock-error', CURRENT_TIMESTAMP - INTERVAL '40 days')
ON CONFLICT (version, service) DO NOTHING;
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.24

require (
github.com/fox-gonic/fox v0.0.6
github.com/lib/pq v1.10.9
github.com/rs/zerolog v1.34.0
)

Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI=
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
Expand Down
1 change: 0 additions & 1 deletion internal/client/README.md

This file was deleted.

4 changes: 2 additions & 2 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ func Load() (*Config, error) {
Database: DatabaseConfig{
Host: getEnv("DB_HOST", "localhost"),
Port: getEnvInt("DB_PORT", 5432),
User: getEnv("DB_USER", "postgres"),
Password: getEnv("DB_PASSWORD", ""),
User: getEnv("DB_USER", "admin"),
Password: getEnv("DB_PASSWORD", "password"),
DBName: getEnv("DB_NAME", "zeroops"),
SSLMode: getEnv("DB_SSLMODE", "disable"),
},
Expand Down
1 change: 0 additions & 1 deletion internal/middleware/README.md

This file was deleted.

1 change: 0 additions & 1 deletion internal/service_manager/api/README.md

This file was deleted.

1 change: 1 addition & 0 deletions internal/service_manager/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ func NewApi(db *database.Database, service *service.Service, router *fox.Engine)
service: service,
router: router,
}

api.setupRouters(router)
return api, nil
}
Expand Down
2 changes: 1 addition & 1 deletion internal/service_manager/api/deploy_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ import (
func (api *Api) setupDeployRouters(router *fox.Engine) {
// 部署任务基本操作
router.POST("/v1/deployments", api.CreateDeployment)
router.GET("/v1/deployments", api.GetDeployments)
router.GET("/v1/deployments/:deployID", api.GetDeploymentByID)
router.POST("/v1/deployments/:deployID", api.UpdateDeployment)
router.GET("/v1/deployments", api.GetDeployments)
router.DELETE("/v1/deployments/:deployID", api.DeleteDeployment)

// 部署任务控制操作
Expand Down
34 changes: 0 additions & 34 deletions internal/service_manager/api/info_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ import (
func (api *Api) setupInfoRouters(router *fox.Engine) {
// 服务列表和信息查询
router.GET("/v1/services", api.GetServices)
router.GET("/v1/services/:service", api.GetServiceByName)
router.GET("/v1/services/:service/activeVersions", api.GetServiceActiveVersions)
router.GET("/v1/services/:service/availableVersions", api.GetServiceAvailableVersions)
router.GET("/v1/metrics/:service/:name", api.GetServiceMetricTimeSeries)
Expand Down Expand Up @@ -185,39 +184,6 @@ func (api *Api) CreateService(c *fox.Context) {
})
}

// GetServiceByName 获取单个服务信息(GET /v1/services/:service)
func (api *Api) GetServiceByName(c *fox.Context) {
ctx := c.Request.Context()
serviceName := c.Param("service")

if serviceName == "" {
c.JSON(http.StatusBadRequest, map[string]any{
"error": "bad request",
"message": "service name is required",
})
return
}

svc, err := api.service.GetServiceByName(ctx, serviceName)
if err != nil {
if err == service.ErrServiceNotFound {
c.JSON(http.StatusNotFound, map[string]any{
"error": "not found",
"message": "service not found",
})
return
}
log.Error().Err(err).Str("service", serviceName).Msg("failed to get service")
c.JSON(http.StatusInternalServerError, map[string]any{
"error": "internal server error",
"message": "failed to get service",
})
return
}

c.JSON(http.StatusOK, svc)
}

// UpdateService 更新服务信息(PUT /v1/services/:service)
func (api *Api) UpdateService(c *fox.Context) {
ctx := c.Request.Context()
Expand Down
3 changes: 0 additions & 3 deletions internal/service_manager/database/README.md

This file was deleted.

Loading