Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration tests for windows service support #3733

Merged
merged 11 commits into from
Jan 19, 2023
49 changes: 46 additions & 3 deletions pkg/common/entrypoint/entrypoint_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ package entrypoint

import (
"context"
"errors"
"fmt"
"os"
"strings"
"unsafe"

"golang.org/x/sys/windows"

"golang.org/x/sys/windows/svc"
)
Expand All @@ -20,7 +25,10 @@ type systemCall struct {
}

func (s *systemCall) IsWindowsService() (bool, error) {
return svc.IsWindowsService()
// We are using a custom function because the svc.IsWindowsService() one still has an open issue in which it states
// that it is not working properly in Windows containers: https://github.com/golang/go/issues/56335. Soon as we have
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth submitting a PR? That issue has a help wanted tag and it seems like you have a fix below.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A PR is already opened for this, in fact, the custom function with the workaround is based on the opened solution proposal: golang/sys#141

// a fix for that, we can use the original function.
return isWindowsService()
}

func (s *systemCall) Run(name string, handler svc.Handler) error {
Expand Down Expand Up @@ -51,11 +59,11 @@ func (e *EntryPoint) Main() int {
// Determining if SPIRE is running as a Windows service is done
// with a best-effort approach. If there is an error, just fallback
// to the behavior of not running as a Windows service.
isWindowsService, err := e.sc.IsWindowsService()
isWindowsSvc, err := e.sc.IsWindowsService()
if err != nil {
fmt.Fprintf(os.Stderr, "Could not determine if running as a Windows service: %v", err)
}
if isWindowsService {
if isWindowsSvc {
errChan := make(chan error)
go func() {
// Since the service runs in its own process, the service name is ignored.
Expand All @@ -71,3 +79,38 @@ func (e *EntryPoint) Main() int {

return e.runCmdFn(context.Background(), os.Args[1:])
}

// isWindowsService is a copy of the svc.IsWindowsService() function, but without the parentProcess.SessionID == 0 check
// that is causing the issue in Windows containers, this logic is exactly the same from .NET runtime (>= 6.0.10).
func isWindowsService() (bool, error) {
// The below technique looks a bit hairy, but it's actually
// exactly what the .NET runtime (>= 6.0.10) does for the similarly named function:
// https://github.com/dotnet/runtime/blob/36bf84fc4a89209f4fdbc1fc201e81afd8be49b0/src/libraries/Microsoft.Extensions.Hosting.WindowsServices/src/WindowsServiceHelpers.cs#L20-L33
// Specifically, it looks up whether the parent process is called "services".

var currentProcess windows.PROCESS_BASIC_INFORMATION
infoSize := uint32(unsafe.Sizeof(currentProcess))
err := windows.NtQueryInformationProcess(windows.CurrentProcess(), windows.ProcessBasicInformation, unsafe.Pointer(&currentProcess), infoSize, &infoSize)
if err != nil {
return false, err
}
var parentProcess *windows.SYSTEM_PROCESS_INFORMATION
for infoSize = uint32((unsafe.Sizeof(*parentProcess) + unsafe.Sizeof(uintptr(0))) * 1024); ; {
parentProcess = (*windows.SYSTEM_PROCESS_INFORMATION)(unsafe.Pointer(&make([]byte, infoSize)[0]))
err = windows.NtQuerySystemInformation(windows.SystemProcessInformation, unsafe.Pointer(parentProcess), infoSize, &infoSize)
if err == nil {
break
} else if !errors.Is(err, windows.STATUS_INFO_LENGTH_MISMATCH) {
return false, err
}
}
for ; ; parentProcess = (*windows.SYSTEM_PROCESS_INFORMATION)(unsafe.Pointer(uintptr(unsafe.Pointer(parentProcess)) + uintptr(parentProcess.NextEntryOffset))) {
if parentProcess.UniqueProcessID == currentProcess.InheritedFromUniqueProcessId {
return strings.EqualFold("services.exe", parentProcess.ImageName.String()), nil
}
if parentProcess.NextEntryOffset == 0 {
break
}
}
return false, nil
}
1 change: 1 addition & 0 deletions test/integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,4 @@ The following environment variables are available to the teardown script:
* [Self Test](suites/self-test/README.md)
* [SPIRE Server CLI](suites/spire-server-cli/README.md)
* [Upgrade](suites/upgrade/README.md)
* [Windows Service](suites-windows/windows-service/README.md)
3 changes: 3 additions & 0 deletions test/integration/suites-windows/windows-service/00-setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

"${ROOTDIR}/setup/x509pop/setup.sh" conf/server conf/agent
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./common

docker-up spire-server

create-service spire-server C:/spire/bin/spire-server.exe
start-service spire-server run -config C:/spire/conf/server/server.conf
assert-service-status spire-server RUNNING
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

log-debug "bootstrapping agent..."
docker-compose exec -T spire-server \
c:/spire/bin/spire-server bundle show > conf/agent/bootstrap.crt || fail-now "failed to bootstrap agent"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./common

docker-up spire-agent

create-service spire-agent C:/spire/bin/spire-agent.exe
start-service spire-agent run -config C:/spire/conf/agent/agent.conf
assert-service-status spire-agent RUNNING
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
source ./common

log-debug "creating regular registration entry..."
docker-compose exec -T spire-server \
c:/spire/bin/spire-server entry create \
-parentID "spiffe://domain.test/spire/agent/x509pop/$(fingerprint conf/agent/agent.crt.pem)" \
-spiffeID "spiffe://domain.test/workload" \
-selector "windows:user_name:User Manager\ContainerUser" \
-ttl 0

assert-synced-entry "spiffe://domain.test/workload"
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

log-debug "test fetch x509 SVID..."
docker-compose exec -T -u ContainerUser spire-agent \
c:/spire/bin/spire-agent api fetch x509 || fail-now "failed to fetch x509"

log-debug "test fetch JWT SVID..."
docker-compose exec -T -u ContainerUser spire-agent \
c:/spire/bin/spire-agent api fetch jwt -audience mydb || fail-now "failed to fetch JWT"
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
source ./common

stop-service spire-agent
assert-service-status spire-agent STOPPED
assert-graceful-shutdown agent

stop-service spire-server
assert-service-status spire-server STOPPED
assert-graceful-shutdown server
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
source ./common

start-service spire-server run -config invalid-config-path
assert-service-status spire-server STOPPED

start-service spire-agent run -config invalid-config-path
assert-service-status spire-agent STOPPED
19 changes: 19 additions & 0 deletions test/integration/suites-windows/windows-service/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# SPIRE Server CLI Suite

## Description

This suite validates that we can run both spire agent and spire server natively on Windows OS, asserting that spire components
can run as a [windows service application](https://learn.microsoft.com/en-us/dotnet/framework/windows-services/introduction-to-windows-service-applications#service-applications-vs-other-visual-studio-applications),
and perform [service state transitions](https://learn.microsoft.com/en-us/windows/win32/services/service-status-transitions).

The suite steps are structured as follows:

1. Spire server and agent are installed as Windows services.
2. Spire server and agent services starts, their respective status is asserted as **_RUNNING_**, and the node attestation
is performed with x509pop.
3. Workload registration entries are created.
4. The feature of fetching SVIDs (x509 and JWT) is asserted with the running spire agent service.
5. Spire server and agent services are stopped, their respective status is asserted as **_STOPPED_**, and graceful
shutdown is verified via application logs.
6. Spire server and agent services are started again, but this time with an invalid config; their respective status is
asserted as **_STOPPED_**.
66 changes: 66 additions & 0 deletions test/integration/suites-windows/windows-service/common
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#!/bin/bash

assert-synced-entry() {
# Check at most 30 times (with one second in between) that the agent has
# successfully synced down the workload entry.
MAXCHECKS=30
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for synced entry ($i of $MAXCHECKS max)..."
if grep -wq "$1" conf/agent/logs.txt; then
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for agent to sync down entry"
}

assert-service-status() {
MAXCHECKS=10
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for $1 service $2 ($i of $MAXCHECKS max)..."
scCommand=$([ "$2" == "STOPPED" ] && echo "query" || echo "interrogate")
if docker-compose exec -T -u ContainerAdministrator "$1" sc "$scCommand" "$1" | grep -wq "$2"; then
log-info "$1 is in $2 state"
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "$1 service failed to reach $2 state"
}

assert-graceful-shutdown() {
MAXCHECKS=10
CHECKINTERVAL=1
for ((i=1;i<=MAXCHECKS;i++)); do
log-info "checking for graceful shutdown ($i of $MAXCHECKS max)..."
if grep -wq "stopped gracefully" conf/"$1"/logs.txt; then
log-info "$1 stopped gracefully"
return 0
fi
sleep "${CHECKINTERVAL}"
done

fail-now "timed out waiting for $1 graceful shutdown"
}

create-service() {
log-info "creating $1 service..."
docker-compose exec -T -u ContainerAdministrator "$1" \
sc create "$1" binPath="$2" || grep "STOPPED" fail-now "failed to create $1 service"
}

stop-service() {
log-info "stopping $1 service..."
docker-compose exec -T -u ContainerAdministrator "$1" \
sc stop "$1" || fail-now "failed to stop $1 service"
}

start-service(){
log-info "starting $1 service..."
docker-compose exec -T -u ContainerAdministrator "$1" \
sc start "$@" | grep -wq "START_PENDING" || fail-now "failed to start $2 service"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
agent {
data_dir = "c:/spire/data/agent"
log_level = "DEBUG"
server_address = "spire-server"
log_file ="c:/spire/conf/agent/logs.txt"
server_port = "8081"
trust_bundle_path = "c:/spire/conf/agent/bootstrap.crt"
trust_domain = "domain.test"
}

plugins {
NodeAttestor "x509pop" {
plugin_data {
private_key_path = "c:/spire/conf/agent/agent.key.pem"
certificate_path = "c:/spire/conf/agent/agent.crt.pem"
}
}
KeyManager "disk" {
plugin_data {
directory = "c:/spire/data/agent"
}
}
WorkloadAttestor "windows" {
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
server {
bind_address = "0.0.0.0"
bind_port = "8081"
trust_domain = "domain.test"
log_file ="c:/spire/conf/server/logs.txt"
data_dir = "c:/spire/data/server"
log_level = "DEBUG"
}

plugins {
DataStore "sql" {
plugin_data {
database_type = "sqlite3"
connection_string = "c:/spire/data/server/datastore.sqlite3"
}
}
NodeAttestor "x509pop" {
plugin_data {
ca_bundle_path = "c:/spire/conf/server/agent-cacert.pem"
}
}
KeyManager "memory" {
plugin_data = {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
version: '3'

services:
spire-server:
image: spire-server-windows:latest-local
hostname: spire-server
volumes:
- ./conf/server:c:/spire/conf/server
user: ContainerAdministrator
entrypoint:
- cmd
command:
- cmd /c ping -t localhost > NUL
spire-agent:
image: spire-agent-windows:latest-local
hostname: spire-agent
depends_on: ["spire-server"]
volumes:
- ./conf/agent:c:/spire/conf/agent
user: ContainerAdministrator
entrypoint:
- cmd
command:
- cmd /c ping -t localhost > NUL
6 changes: 6 additions & 0 deletions test/integration/suites-windows/windows-service/teardown
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

if [ -z "$SUCCESS" ]; then
docker-compose logs
fi
docker-down