Skip to content

Commit

Permalink
Migrate long-running test to new cluster to enable monitoring (#7357)
Browse files Browse the repository at this point in the history
  • Loading branch information
Young Bu Park committed Mar 22, 2024
1 parent 2a681be commit 1a06994
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 72 deletions.
28 changes: 16 additions & 12 deletions .github/scripts/cleanup-cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,26 @@ set -e
echo "cleaning up cluster"

# Delete all test resources in queuemessages.
echo "delete all resources in queuemessages.ucp.dev"
kubectl delete queuemessages.ucp.dev -n radius-system --all
if kubectl get crd queuemessages.ucp.dev > /dev/null 2>&1; then
echo "delete all resources in queuemessages.ucp.dev"
kubectl delete queuemessages.ucp.dev -n radius-system --all
fi

# Testing deletion of deployment.apps.

# Delete all test resources in resources without proxy resource.
echo "delete all resources in resources.ucp.dev"
resources=$(kubectl get resources.ucp.dev -n radius-system --no-headers -o custom-columns=":metadata.name")
for r in $resources; do
if [[ $r == scope.local.* || $r == scope.aws.* || -z "$r" ]]; then
echo "skip deletion: $r"
else
echo "delete resource: $r"
kubectl delete resources.ucp.dev $r -n radius-system --ignore-not-found=true
fi
done
if kubectl get crd resources.ucp.dev > /dev/null 2>&1; then
echo "delete all resources in resources.ucp.dev"
resources=$(kubectl get resources.ucp.dev -n radius-system --no-headers -o custom-columns=":metadata.name")
for r in $resources; do
if [[ $r == scope.local.* || $r == scope.aws.* || -z "$r" ]]; then
echo "skip deletion: $r"
else
echo "delete resource: $r"
kubectl delete resources.ucp.dev $r -n radius-system --ignore-not-found=true
fi
done
fi

# Delete all test namespaces.
echo "delete all test namespaces"
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/long-running-azure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
# the previous build is still valid. If valid, the workflow skips the build steps
# and uses the cached 'rad cli' for testing.
#
# Grafana dashboard URL: https://radiuse2e00-dashboard-audycmffgberbghy.wus3.grafana.azure.com/
# Grafana dashboard URL: https://radlrtest00-dashboard-e4ffc0cwggchdhba.wus3.grafana.azure.com

name: Long-running test on Azure

Expand All @@ -44,6 +44,8 @@ permissions:
packages: write # Required for uploading the package

on:
# Enable manual trigger to deploy the latest changes from main.
workflow_dispatch:
schedule:
# Run every 2 hours
- cron: "0 */2 * * *"
Expand Down Expand Up @@ -82,9 +84,9 @@ env:
VALID_RADIUS_BUILD_WINDOW: 86400

# The AKS cluster name
AKS_CLUSTER_NAME: "radiuse2e00-aks"
AKS_CLUSTER_NAME: "radlrtest00-aks"
# The resource group for AKS_CLUSTER_NAME resource.
AKS_RESOURCE_GROUP: "radiuse2e00"
AKS_RESOURCE_GROUP: "radlrtest00"

# Server where terraform test modules are deployed
TF_RECIPE_MODULE_SERVER_URL: "http://tf-module-server.radius-test-tf-module-server.svc.cluster.local"
Expand Down Expand Up @@ -115,7 +117,7 @@ jobs:
path: ./dist/cache
key: radius-test-latest-
- name: Skip build if build is still valid
if: github.event_name != 'pull_request'
if: github.event_name != 'pull_request' && github.event_name != 'workflow_dispatch'
id: skip-build
run: |
# check if the last build time to see if we need to build again
Expand Down Expand Up @@ -499,7 +501,7 @@ jobs:
name: Report test failure
needs: [build, tests]
runs-on: ubuntu-latest
if: failure() && github.repository == 'radius-project/radius'
if: failure() && github.repository == 'radius-project/radius' && github.event_name == 'schedule'
steps:
- name: Create failure issue for failing long running test run
uses: actions/github-script@v6
Expand Down
8 changes: 4 additions & 4 deletions test/infra/azure/bicepconfig.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"experimentalFeaturesEnabled": {
"extensibility": true
}
}
"experimentalFeaturesEnabled": {
"extensibility": true
}
}
2 changes: 1 addition & 1 deletion test/infra/azure/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ module aksCluster './modules/akscluster.bicep' = {
params: {
name: aksClusterName
location: location
kubernetesVersion: '1.28.3'
kubernetesVersion: '1.28.5'
logAnalyticsWorkspaceId: logAnalyticsWorkspace.outputs.id
systemAgentPoolName: 'agentpool'
systemAgentPoolVmSize: 'Standard_D4as_v5'
Expand Down
52 changes: 2 additions & 50 deletions test/infra/azure/modules/akscluster.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -307,9 +307,6 @@ param autoScalerProfileMaxGracefulTerminationSec string = '600'
@description('Specifies the resource id of the Log Analytics workspace.')
param logAnalyticsWorkspaceId string

@description('Specifies the workspace data retention in days.')
param retentionInDays int = 30

@description('Specifies the location.')
param location string = resourceGroup().location

Expand Down Expand Up @@ -340,42 +337,8 @@ param imageCleanerIntervalHours int = 24
@description('Specifies whether to enable Workload Identity. The default value is false.')
param workloadIdentityEnabled bool = false

// Variables
var diagnosticSettingsName = 'diagnosticSettings'
var logCategories = [
'kube-apiserver'
'kube-audit'
'kube-audit-admin'
'kube-controller-manager'
'kube-scheduler'
'cluster-autoscaler'
'cloud-controller-manager'
'guard'
'csi-azuredisk-controller'
'csi-azurefile-controller'
'csi-snapshot-controller'
]
var metricCategories = [
'AllMetrics'
]
var logs = [for category in logCategories: {
category: category
enabled: true
retentionPolicy: {
enabled: true
days: retentionInDays
}
}]
var metrics = [for category in metricCategories: {
category: category
enabled: true
retentionPolicy: {
enabled: true
days: retentionInDays
}
}]

resource aksCluster 'Microsoft.ContainerService/managedClusters@2023-05-01' = {
resource aksCluster 'Microsoft.ContainerService/managedClusters@2023-10-01' = {
name: name
location: location
tags: tags
Expand Down Expand Up @@ -515,7 +478,7 @@ resource aksCluster 'Microsoft.ContainerService/managedClusters@2023-05-01' = {
}

// Dapr Extension
resource daprExtension 'Microsoft.KubernetesConfiguration/extensions@2022-04-02-preview' = if (daprEnabled) {
resource daprExtension 'Microsoft.KubernetesConfiguration/extensions@2022-07-01' = if (daprEnabled) {
name: 'dapr'
scope: aksCluster
properties: {
Expand All @@ -534,17 +497,6 @@ resource daprExtension 'Microsoft.KubernetesConfiguration/extensions@2022-04-02-
}
}

// Diagnostic Settings
resource diagnosticSettings 'Microsoft.Insights/diagnosticSettings@2021-05-01-preview' = {
name: diagnosticSettingsName
scope: aksCluster
properties: {
workspaceId: logAnalyticsWorkspaceId
logs: logs
metrics: metrics
}
}

// Output
output id string = aksCluster.id
output name string = aksCluster.name
Expand Down

0 comments on commit 1a06994

Please sign in to comment.