Skip to content

Commit

Permalink
Merge branch 'master' into andrewd-kubevirt-longhorn-part4
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewd-zededa authored Apr 25, 2024
2 parents 28a776d + 751af32 commit aa54809
Show file tree
Hide file tree
Showing 24 changed files with 1,161 additions and 57 deletions.
25 changes: 24 additions & 1 deletion pkg/debug/scripts/collect-info.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# Script version, don't forget to bump up once something is changed

VERSION=20
VERSION=22
# Add required packages here, it will be passed to "apk add".
# Once something added here don't forget to add the same package
# to the Dockerfile ('ENV PKGS' line) of the debug container,
Expand Down Expand Up @@ -81,6 +81,10 @@ while getopts "vhsa:dj" o; do
esac
done

is_in_debug_service() {
grep -q '/eve/services/debug' < /proc/self/cgroup
}

sort_cat_jq()
{
# Sort and extract a filename
Expand Down Expand Up @@ -143,6 +147,13 @@ if [ -d "$SCRIPT_DIR/persist-newlog" ]; then
exit
fi

# We are on EVE? Switch to collect-info mode
# but only if we are in debug container
if ! is_in_debug_service; then
echo "$0 has to be started from debug container; use 'eve enter debug' to enter debug container"
exit 1
fi

# Create temporary dir
echo "- basic setup"
TMP_DIR=$(mktemp -d)
Expand Down Expand Up @@ -234,6 +245,17 @@ collect_network_info()
echo "- done network info"
}

collect_pillar_memory_backtraces()
{
echo "- pillar memory backtraces"

eve http-debug > /dev/null 2>&1
curl --retry-all-errors --retry 3 --retry-delay 3 -m 5 -s "http://127.1:6543/debug/pprof/heap?debug=1" > "$DIR/pillar-memory-backtraces"
eve http-debug stop > /dev/null 2>&1

echo "- done pillar memory backtraces"
}

collect_pillar_backtraces()
{
echo "- pillar backtraces"
Expand Down Expand Up @@ -425,6 +447,7 @@ collect_network_info

# Pillar part
collect_pillar_backtraces
collect_pillar_memory_backtraces

# ZFS part
collect_zfs_info
Expand Down
4 changes: 2 additions & 2 deletions pkg/dom0-ztools/rootfs/bin/eve
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ http_debug_request() {
fi

if [ "$running" = "0" ]; then
pkill -USR2 /opt/zededa/bin/zedbox
pkill -USR2 zedbox
fi

printf "POST %s HTTP/1.0\r\n\r\n" "$URL" | nc 127.1 6543
Expand Down Expand Up @@ -228,7 +228,7 @@ __EOT__
fi
;;
http-debug) if [ -z "$2" ] || [ "$2" = "start" ]; then
pkill -USR2 /opt/zededa/bin/zedbox
pkill -USR2 zedbox
echo "Listening on :6543 -- use 'eve http-debug stop' to stop"
elif [ "$2" = "stop" ]; then
printf "POST /stop HTTP/1.0\r\n\r\n" | nc 127.1 6543
Expand Down
34 changes: 21 additions & 13 deletions pkg/edgeview/src/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,20 +492,28 @@ func getAllAppIPs() []appIPvnc {
status := strings.TrimSuffix(string(retbytes1), "\n")
appIPs, appUUID := getAppIPs(status)
var oneAppIPs []appIPvnc
if len(appIPs) > 0 {
retbytes1, err := os.ReadFile("/run/zedagent/AppInstanceConfig/" + appUUID.String() + ".json")
if err != nil {
log.Errorf("getAllAppIPs: run appinstcfg %v", err)
continue
}
var appInstCfg types.AppInstanceConfig
err = json.Unmarshal(retbytes1, &appInstCfg)
if err != nil {
log.Errorf("getAllAppIPs: unmarshal %v", err)
continue
}
retbytes1, err = os.ReadFile("/run/zedagent/AppInstanceConfig/" + appUUID.String() + ".json")
if err != nil {
log.Errorf("getAllAppIPs: run appinstcfg %v", err)
continue
}
var appInstCfg types.AppInstanceConfig
err = json.Unmarshal(retbytes1, &appInstCfg)
if err != nil {
log.Errorf("getAllAppIPs: unmarshal %v", err)
continue
}

enableVNC := appInstCfg.FixedResources.EnableVnc
enableVNC := appInstCfg.FixedResources.EnableVnc
// Even if the app has no IP address, we still want to allow VNC if it is enabled
if len(appIPs) == 0 {
ipVNC := appIPvnc{
vncEnable: enableVNC,
appName: appInstCfg.DisplayName,
vncPort: int(appInstCfg.FixedResources.VncDisplay),
}
oneAppIPs = append(oneAppIPs, ipVNC)
} else {
for _, ipaddr := range appIPs {
ipVNC := appIPvnc{
ipAddr: ipaddr,
Expand Down
10 changes: 10 additions & 0 deletions pkg/grub/rootfs.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,12 @@ function set_getty {
set_global dom0_extra_args "$dom0_extra_args getty"
}

function set_isolcpus {
# the 'inverse' is our own flag, see details here:
# https://github.com/lf-edge/eve-kernel/pull/90
set_global dom0_extra_args "$dom0_extra_args isolcpus=inverse,0 nohz_full=inverse,0"
}

set arch=${grub_cpu}
if [ "$arch" = "i386" ]; then
# grub CPU i386 means we are running in legacy BIOS mode
Expand Down Expand Up @@ -463,6 +469,10 @@ submenu 'Set Boot Options' {
set_getty
}

menuentry 'isolate CPU0 (only for PREEMPT_RT)' {
set_isolcpus
}

menuentry 'unset dom0_extra_args' {
set_global dom0_extra_args " "
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/kube/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ RUN GO111MODULE=on CGO_ENABLED=0 go build -v -ldflags "-s -w" -mod=vendor -o /ou
FROM scratch
COPY --from=build /out/ /
COPY cluster-init.sh /usr/bin/
COPY nsmounter /usr/bin/
COPY longhorn-generate-support-bundle.sh /usr/bin/
COPY k3s-pod-logs.sh /usr/bin/
COPY iscsid.conf /etc/iscsi/
COPY cgconfig.conf /etc
# kubevirt yaml files are patched files and will be removed later, look at cluster-init.sh
COPY multus-daemonset.yaml /etc
Expand Down
105 changes: 94 additions & 11 deletions pkg/kube/cluster-init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

K3S_VERSION=v1.26.3+k3s1
KUBEVIRT_VERSION=v0.59.0
LONGHORN_VERSION=v1.4.2
CDI_VERSION=v1.56.0
LONGHORN_VERSION=v1.6.0
CDI_VERSION=v1.57.0
NODE_IP=""
MAX_K3S_RESTARTS=10
RESTART_COUNT=0

INSTALL_LOG=/var/lib/install.log
CTRD_LOG=/var/lib/containerd.log
CTRD_LOG=/var/lib/containerd-user.log
LOG_SIZE=$((5*1024*1024))

logmsg() {
Expand Down Expand Up @@ -164,6 +164,28 @@ check_start_k3s() {
return 0
}

apply_longhorn_disk_config() {
node=$1
kubectl label node "$node" node.longhorn.io/create-default-disk='config'
kubectl annotate node "$node" node.longhorn.io/default-disks-config='[ { "path":"/persist/vault/volumes", "allowScheduling":true }]'
}

check_overwrite_nsmounter() {
### REMOVE ME+
# When https://github.com/longhorn/longhorn/issues/6857 is resolved, remove this 'REMOVE ME' section
# In addition to pkg/kube/nsmounter and the copy of it in pkg/kube/Dockerfile
longhornCsiPluginPods=$(kubectl -n longhorn-system get pod -o json | jq -r '.items[] | select(.metadata.labels.app=="longhorn-csi-plugin" and .status.phase=="Running") | .metadata.name')
for csiPod in $longhornCsiPluginPods; do
if ! kubectl -n longhorn-system exec "pod/${csiPod}" --container=longhorn-csi-plugin -- ls /usr/local/sbin/nsmounter.updated > /dev/null 2>@1; then
if kubectl -n longhorn-system exec -i "pod/${csiPod}" --container=longhorn-csi-plugin -- tee /usr/local/sbin/nsmounter < /usr/bin/nsmounter; then
logmsg "Updated nsmounter in longhorn pod ${csiPod}"
kubectl -n longhorn-system exec "pod/${csiPod}" --container=longhorn-csi-plugin -- touch /usr/local/sbin/nsmounter.updated
fi
fi
done
### REMOVE ME-
}

check_start_containerd() {
# Needed to get the pods to start
if [ ! -L /usr/bin/runc ]; then
Expand All @@ -173,12 +195,12 @@ check_start_containerd() {
ln -s /var/lib/rancher/k3s/data/current/bin/containerd-shim-runc-v2 /usr/bin/containerd-shim-runc-v2
fi

if pgrep -f "containerd --config" >> $INSTALL_LOG 2>&1; then
logmsg "k3s-containerd is alive"
else
logmsg "Starting k3s-containerd"
pgrep -f "/var/lib/rancher/k3s/data/current/bin/containerd" > /dev/null 2>&1
if [ $? -eq 1 ]; then
mkdir -p /run/containerd-user
nohup /var/lib/rancher/k3s/data/current/bin/containerd --config /etc/containerd/config-k3s.toml > $CTRD_LOG 2>&1 &
containerd_pid=$!
logmsg "Started k3s-containerd at pid:$containerd_pid"
fi
if [ -f /etc/external-boot-image.tar ]; then
# NOTE: https://kubevirt.io/user-guide/virtual_machines/boot_from_external_source/
Expand All @@ -187,9 +209,7 @@ check_start_containerd() {
# This is very similar to what we do on kvm based eve to start container as a VM.
logmsg "Trying to install new external-boot-image"
# This import happens once per reboot
ctr -a /run/containerd-user/containerd.sock image import /etc/external-boot-image.tar docker.io/lfedge/eve-external-boot-image:latest
res=$?
if [ $res -eq 0 ]; then
if ctr -a /run/containerd-user/containerd.sock image import /etc/external-boot-image.tar docker.io/lfedge/eve-external-boot-image:latest; then
logmsg "Successfully installed external-boot-image"
rm -f /etc/external-boot-image.tar
fi
Expand Down Expand Up @@ -235,6 +255,54 @@ are_all_pods_ready() {
#Make sure all prereqs are set after /var/lib is mounted to get logging info
setup_prereqs

VMICONFIG_FILENAME="/run/zedkube/vmiVNC.run"
VNC_RUNNING=false
# run virtctl vnc
check_and_run_vnc() {
pid=$(pgrep -f "/usr/bin/virtctl vnc" )
# if remote-console config file exist, and either has not started, or need to restart
if [ -f "$VMICONFIG_FILENAME" ] && { [ "$VNC_RUNNING" = false ] || [ -z "$pid" ]; } then
vmiName=""
vmiPort=""

# Read the file and extract values
while IFS= read -r line; do
case "$line" in
*"VMINAME:"*)
vmiName="${line#*VMINAME:}" # Extract the part after "VMINAME:"
vmiName="${vmiName%%[[:space:]]*}" # Remove leading/trailing whitespace
;;
*"VNCPORT:"*)
vmiPort="${line#*VNCPORT:}" # Extract the part after "VNCPORT:"
vmiPort="${vmiPort%%[[:space:]]*}" # Remove leading/trailing whitespace
;;
esac
done < "$VMICONFIG_FILENAME"

# Check if the 'vmiName' and 'vmiPort' values are empty, if so, log an error and return
if [ -z "$vmiName" ] || [ -z "$vmiPort" ]; then
logmsg "Error: VMINAME or VNCPORT is empty in $VMICONFIG_FILENAME"
return 1
fi

logmsg "virctl vnc on vmiName: $vmiName, port $vmiPort"
nohup /usr/bin/virtctl vnc "$vmiName" -n eve-kube-app --port "$vmiPort" --proxy-only &
VNC_RUNNING=true
else
if [ ! -f "$VMICONFIG_FILENAME" ]; then
if [ "$VNC_RUNNING" = true ]; then
if [ -n "$pid" ]; then
logmsg "Killing process with PID $pid"
kill -9 "$pid"
else
logmsg "Error: Process not found"
fi
fi
VNC_RUNNING=false
fi
fi
}

date >> $INSTALL_LOG

#Forever loop every 15 secs
Expand Down Expand Up @@ -300,7 +368,15 @@ if [ ! -f /var/lib/all_components_initialized ]; then

if [ ! -f /var/lib/longhorn_initialized ]; then
logmsg "Installing longhorn version ${LONGHORN_VERSION}"
kubectl apply -f https://raw.githubusercontent.com/longhorn/longhorn/${LONGHORN_VERSION}/deploy/longhorn.yaml
apply_longhorn_disk_config "$(/bin/hostname)"
lhCfgPath=/var/lib/lh-cfg-${LONGHORN_VERSION}.yaml
if [ ! -e $lhCfgPath ]; then
curl -k https://raw.githubusercontent.com/longhorn/longhorn/${LONGHORN_VERSION}/deploy/longhorn.yaml > "$lhCfgPath"
fi
if ! grep -q 'create-default-disk-labeled-nodes: true' "$lhCfgPath"; then
sed -i '/ default-setting.yaml: |-/a\ create-default-disk-labeled-nodes: true' "$lhCfgPath"
fi
kubectl apply -f "$lhCfgPath"
touch /var/lib/longhorn_initialized
fi

Expand All @@ -326,12 +402,19 @@ else
# launch CNI dhcp service
/opt/cni/bin/dhcp daemon &
fi
else
if [ -e /var/lib/longhorn_initialized ]; then
check_overwrite_nsmounter
fi
fi
fi
currentSize=$(wc -c <"$CTRD_LOG")
if [ "$currentSize" -gt "$LOG_SIZE" ]; then
cp "$CTRD_LOG" "${CTRD_LOG}.1"
truncate -s 0 "$CTRD_LOG"
fi

# Check and run vnc
check_and_run_vnc
sleep 15
done
9 changes: 3 additions & 6 deletions pkg/kube/config-k3s.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

# File generated by k3s. DO NOT EDIT. Use config.toml.tmpl instead.
version = 2

state = "/run/containerd-user"
root = "/persist/vault/containerd"

[plugins."io.containerd.internal.v1.opt"]
path = "/var/lib/rancher/k3s/agent/containerd/"
path = "/persist/vault/containerd"
[plugins."io.containerd.grpc.v1.cri"]
stream_server_address = "127.0.0.1"
stream_server_port = "10010"
Expand All @@ -22,14 +22,11 @@ state = "/run/containerd-user"
address = "/run/containerd-user/containerd.sock"

[plugins."io.containerd.grpc.v1.cri".cni]
bin_dir = "/var/lib/rancher/k3s/data/c26e7571d760c5f199d18efd197114f1ca4ab1e6ffe494f96feb65c87fcb8cf0/bin"
bin_dir = "/var/lib/rancher/k3s/data/current/bin"
conf_dir = "/var/lib/rancher/k3s/agent/etc/cni/net.d"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"

[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = false



4 changes: 2 additions & 2 deletions pkg/kube/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
write-kubeconfig-mode: "0644"
cluster-init: true
log: "/var/lib/rancher/k3s/k3s.log"
# Remove debug flag before release to avoid overlogging
debug: true
# Use longhorn storage
disable: local-storage
etcd-expose-metrics: true
container-runtime-endpoint: "/run/containerd-user/containerd.sock"
etcd-arg:
Expand Down
Loading

0 comments on commit aa54809

Please sign in to comment.