Skip to content

Commit

Permalink
Merge pull request #1793 from tgstation/TheHuntForRedOctober [TGSDeploy]
Browse files Browse the repository at this point in the history
v6.3.2: Fix a deployment directory leak
  • Loading branch information
Cyberboss committed Mar 3, 2024
2 parents cde1ea6 + 660b549 commit d36b852
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 39 deletions.
35 changes: 24 additions & 11 deletions .github/workflows/ci-pipeline.yml
Expand Up @@ -1396,19 +1396,32 @@ jobs:
name: CI Completion Gate
needs: [ pages-build, docker-build, build-deb, build-msi, validate-openapi-spec, upload-code-coverage, check-winget-pr-template, code-scanning ]
runs-on: ubuntu-latest
permissions:
checks: write
contents: read
if: (!(cancelled() || failure()) && needs.pages-build.result == 'success' && needs.docker-build.result == 'success' && needs.build-deb.result == 'success' && needs.build-msi.result == 'success' && needs.validate-openapi-spec.result == 'success' && needs.upload-code-coverage.result == 'success' && needs.check-winget-pr-template.result == 'success' && needs.code-scanning.result == 'success')
steps:
- name: Create Completion Check
uses: LouisBrunner/checks-action@6b626ffbad7cc56fd58627f774b9067e6118af23
with:
token: ${{ secrets.GITHUB_TOKEN }}
name: CI Completion
conclusion: success
output: |
{"summary":"The CI Pipeline completed successfully"}
- name: Setup dotnet
uses: actions/setup-dotnet@v4
with:
dotnet-version: '${{ env.TGS_DOTNET_VERSION }}.0.x'
dotnet-quality: ${{ env.TGS_DOTNET_QUALITY }}

- name: Checkout (Branch)
uses: actions/checkout@v4
if: github.event_name == 'push' || github.event_name == 'schedule'

- name: Checkout (PR Merge)
uses: actions/checkout@v4
if: github.event_name != 'push' && github.event_name != 'schedule'
with:
ref: "refs/pull/${{ github.event.number }}/merge"

- name: Restore
run: dotnet restore

- name: Build ReleaseNotes
run: dotnet build -c Release -p:TGS_HOST_NO_WEBPANEL=true tools/Tgstation.Server.ReleaseNotes/Tgstation.Server.ReleaseNotes.csproj

- name: Run ReleaseNotes Create CI Completion Check
run: dotnet run -c Release --no-build --project tools/Tgstation.Server.ReleaseNotes --ci-completion-check ${{ github.sha }} ${{ secrets.TGS_CI_GITHUB_APP_TOKEN_BASE64 }}

deployment-gate:
name: Deployment Start Gate
Expand Down
6 changes: 3 additions & 3 deletions build/TestCommon.props
Expand Up @@ -3,7 +3,7 @@

<ItemGroup>
<!-- Usage: Code coverage collection -->
<PackageReference Include="coverlet.collector" Version="6.0.0">
<PackageReference Include="coverlet.collector" Version="6.0.1">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
Expand All @@ -18,9 +18,9 @@
<!-- Pinned: Be VERY careful about updating https://github.com/moq/moq/issues/1372 -->
<PackageReference Include="Moq" Version="4.20.70" />
<!-- Usage: MSTest execution -->
<PackageReference Include="MSTest.TestAdapter" Version="3.2.1" />
<PackageReference Include="MSTest.TestAdapter" Version="3.2.2" />
<!-- Usage: MSTest asserts etc... -->
<PackageReference Include="MSTest.TestFramework" Version="3.2.1" />
<PackageReference Include="MSTest.TestFramework" Version="3.2.2" />
</ItemGroup>

</Project>
2 changes: 1 addition & 1 deletion build/Version.props
Expand Up @@ -3,7 +3,7 @@
<!-- Integration tests will ensure they match across the board -->
<Import Project="WebpanelVersion.props" />
<PropertyGroup>
<TgsCoreVersion>6.3.1</TgsCoreVersion>
<TgsCoreVersion>6.3.2</TgsCoreVersion>
<TgsConfigVersion>5.1.0</TgsConfigVersion>
<TgsApiVersion>10.2.0</TgsApiVersion>
<TgsCommonLibraryVersion>7.0.0</TgsCommonLibraryVersion>
Expand Down
55 changes: 42 additions & 13 deletions src/Tgstation.Server.Host/Components/Deployment/DmbFactory.cs
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

Expand Down Expand Up @@ -178,7 +179,8 @@ public IDmbProvider LockNextDmb(int lockCount)
{
var jobId = nextDmbProvider.CompileJob.Require(x => x.Id);
var incremented = jobLockCounts[jobId] += lockCount;
logger.LogTrace("Compile job {jobId} lock count now: {lockCount}", jobId, incremented);
logger.LogTrace("Compile job {jobId} lock increased by: {increment}", jobId, lockCount);
LogLockCounts();
return nextDmbProvider;
}
}
Expand Down Expand Up @@ -325,14 +327,18 @@ void CleanupAction()
if (!jobLockCounts.TryGetValue(compileJobId, out int value))
{
value = 1;
logger.LogTrace("Initializing lock count for compile job {id}", compileJobId);
jobLockCounts.Add(compileJobId, 1);
}
else
{
logger.LogTrace("FromCompileJob already had a jobLockCounts entry for {id}. Incrementing lock count to {value}.", compileJobId, value);
jobLockCounts[compileJobId] = ++value;
}

providerSubmitted = true;

logger.LogTrace("Compile job {id} lock count now: {lockCount}", compileJobId, value);
LogLockCounts();
return newProvider;
}
}
Expand Down Expand Up @@ -385,7 +391,7 @@ public async ValueTask CleanUnusedCompileJobs(CancellationToken cancellationToke
await ioManager.CreateDirectory(gameDirectory, cancellationToken);
var directories = await ioManager.GetDirectories(gameDirectory, cancellationToken);
int deleting = 0;
var tasks = directories.Select(async x =>
var tasks = directories.Select<string, ValueTask>(async x =>
{
var nameOnly = ioManager.GetFileName(x);
if (jobUidsToNotErase.Contains(nameOnly))
Expand All @@ -396,17 +402,13 @@ public async ValueTask CleanUnusedCompileJobs(CancellationToken cancellationToke
++deleting;
await DeleteCompileJobContent(x, cancellationToken);
}
catch (OperationCanceledException)
{
throw;
}
catch (Exception e)
catch (Exception e) when (e is not OperationCanceledException)
{
logger.LogWarning(e, "Error deleting directory {dirName}!", x);
}
}).ToList();
if (deleting > 0)
await Task.WhenAll(tasks);
await ValueTaskExtensions.WhenAll(tasks);
}
#pragma warning restore CA1506

Expand Down Expand Up @@ -435,14 +437,14 @@ async Task WrapThrowableTasks()
// First kill the GitHub deployment
var remoteDeploymentManager = remoteDeploymentManagerFactory.CreateRemoteDeploymentManager(metadata, job);

// DCT: None available
var deploymentJob = remoteDeploymentManager.MarkInactive(job, CancellationToken.None);
var cancellationToken = cleanupCts.Token;
var deploymentJob = remoteDeploymentManager.MarkInactive(job, cancellationToken);

var deleteTask = DeleteCompileJobContent(job.DirectoryName!.Value.ToString(), cleanupCts.Token);
var deleteTask = DeleteCompileJobContent(job.DirectoryName!.Value.ToString(), cancellationToken);

await ValueTaskExtensions.WhenAll(deleteTask, deploymentJob);
}
catch (Exception ex)
catch (Exception ex) when (ex is not OperationCanceledException)
{
logger.LogWarning(ex, "Error cleaning up compile job {jobGuid}!", job.DirectoryName);
}
Expand All @@ -468,6 +470,8 @@ async Task WrapThrowableTasks()
}
else
logger.LogError("Extra Dispose of DmbProvider for CompileJob {compileJobId}!", jobId);

LogLockCounts();
}
}

Expand All @@ -483,5 +487,30 @@ async ValueTask DeleteCompileJobContent(string directory, CancellationToken canc
await eventConsumer.HandleEvent(EventType.DeploymentCleanup, new List<string> { ioManager.ResolvePath(directory) }, true, cancellationToken);
await ioManager.DeleteDirectory(directory, cancellationToken);
}

/// <summary>
/// Log out the current lock counts to Trace.
/// </summary>
/// <remarks><see cref="jobLockCounts"/> must be locked before calling this function.</remarks>
void LogLockCounts()
{
if (jobLockCounts.Count == 0)
{
logger.LogWarning("No compile jobs registered!");
return;
}

var builder = new StringBuilder();
foreach (var jobId in jobLockCounts.Keys)
{
builder.AppendLine();
builder.Append("\t- ");
builder.Append(jobId);
builder.Append(": ");
builder.Append(jobLockCounts[jobId]);
}

logger.LogTrace("Compile Job Lock Counts:{details}", builder.ToString());
}
}
}
Expand Up @@ -87,7 +87,7 @@ public ValueTask MakeActive(CancellationToken cancellationToken)
}

/// <summary>
/// Should be <see langword="await"/>. before calling <see cref="MakeActive(CancellationToken)"/> to ensure the <see cref="SwappableDmbProvider"/> is ready to instantly swap. Can be called multiple times.
/// Should be <see langword="await"/>ed. before calling <see cref="MakeActive(CancellationToken)"/> to ensure the <see cref="SwappableDmbProvider"/> is ready to instantly swap. Can be called multiple times.
/// </summary>
/// <param name="cancellationToken">The <see cref="CancellationToken"/> for the operation.</param>
/// <returns>A <see cref="Task"/> representing the preparation process.</returns>
Expand Down
Expand Up @@ -127,8 +127,12 @@ protected sealed override async ValueTask DisposeAndNullControllersImpl()

// If we reach this point, we can guarantee PrepServerForLaunch will be called before starting again.
ActiveSwappable = null;
await (pendingSwappable?.DisposeAsync() ?? ValueTask.CompletedTask);
pendingSwappable = null;

if (pendingSwappable != null)
{
await pendingSwappable.DisposeAsync();
pendingSwappable = null;
}

await DrainDeploymentCleanupTasks(true);
}
Expand All @@ -138,8 +142,6 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
{
if (pendingSwappable != null)
{
ValueTask RunPrequel() => BeforeApplyDmb(pendingSwappable.CompileJob, cancellationToken);

var needToSwap = !pendingSwappable.Swapped;
var controller = Server!;
if (needToSwap)
Expand All @@ -151,7 +153,6 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
// integration test logging will catch this
Logger.LogError(
"The reboot bridge request completed before the watchdog could suspend the server! This can lead to buggy DreamDaemon behaviour and should be reported! To ensure stability, we will need to hard reboot the server");
await RunPrequel();
return MonitorAction.Restart;
}

Expand All @@ -165,7 +166,7 @@ protected sealed override async ValueTask<MonitorAction> HandleNormalReboot(Canc
}
}

var updateTask = RunPrequel();
var updateTask = BeforeApplyDmb(pendingSwappable.CompileJob, cancellationToken);
if (needToSwap)
await PerformDmbSwap(pendingSwappable, cancellationToken);

Expand All @@ -183,7 +184,8 @@ async Task CleanupLingeringDeployment()
currentCompileJobId,
lingeringDeploymentExpirySeconds);

var timeout = AsyncDelayer.Delay(TimeSpan.FromSeconds(lingeringDeploymentExpirySeconds), cancellationToken);
// DCT: A cancel firing here can result in us leaving a dmbprovider undisposed, localDeploymentCleanupGate will always fire in that case
var timeout = AsyncDelayer.Delay(TimeSpan.FromSeconds(lingeringDeploymentExpirySeconds), CancellationToken.None);

var completedTask = await Task.WhenAny(
localDeploymentCleanupGate.Task,
Expand Down
4 changes: 2 additions & 2 deletions src/Tgstation.Server.Host/Tgstation.Server.Host.csproj
Expand Up @@ -104,7 +104,7 @@
<!-- Usage: GitHub.com interop -->
<PackageReference Include="Octokit" Version="10.0.0" />
<!-- Usage: MYSQL/MariaDB ORM plugin -->
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="8.0.0" />
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="8.0.1" />
<!-- Usage: Discord interop -->
<PackageReference Include="Remora.Discord" Version="2024.1.0" />
<!-- Usage: Rich logger builder -->
Expand All @@ -128,7 +128,7 @@
<!-- Usage: Temporary resolution to compatibility issues with EFCore 7 and .NET 8 -->
<PackageReference Include="System.Security.Permissions" Version="8.0.0" />
<!-- Usage: .DeleteAsync() support for IQueryable<T>s -->
<PackageReference Include="Z.EntityFramework.Plus.EFCore" Version="8.102.1" />
<PackageReference Include="Z.EntityFramework.Plus.EFCore" Version="8.102.1.1" />
</ItemGroup>

<ItemGroup>
Expand Down
64 changes: 63 additions & 1 deletion tools/Tgstation.Server.ReleaseNotes/Program.cs
@@ -1,20 +1,25 @@
// This program is minimal effort and should be sent to remedial school

using System;
using System.Buffers.Text;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.IdentityModel.Tokens.Jwt;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.Sockets;
using System.Security;
using System.Security.Cryptography;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using System.Xml.Linq;

using Microsoft.IdentityModel.Tokens;

using Newtonsoft.Json;

using Octokit;
Expand All @@ -32,8 +37,11 @@ namespace Tgstation.Server.ReleaseNotes
static class Program
{
const string OutputPath = "release_notes.md";

// some stuff that should be abstracted for different repos
const string RepoOwner = "tgstation";
const string RepoName = "tgstation-server";
const int AppId = 847638;

/// <summary>
/// The entrypoint for the <see cref="Program"/>
Expand All @@ -52,13 +60,15 @@ static async Task<int> Main(string[] args)
var shaCheck = versionString.Equals("--winget-template-check", StringComparison.OrdinalIgnoreCase);
var fullNotes = versionString.Equals("--generate-full-notes", StringComparison.OrdinalIgnoreCase);
var nuget = versionString.Equals("--nuget", StringComparison.OrdinalIgnoreCase);
var ciCompletionCheck = versionString.Equals("--ci-completion-check", StringComparison.OrdinalIgnoreCase);

if ((!Version.TryParse(versionString, out var version) || version.Revision != -1)
&& !ensureRelease
&& !linkWinget
&& !shaCheck
&& !fullNotes
&& !nuget)
&& !nuget
&& !ciCompletionCheck)
{
Console.WriteLine("Invalid version: " + versionString);
return 2;
Expand Down Expand Up @@ -129,6 +139,17 @@ static async Task<int> Main(string[] args)
return await Winget(client, actionsUrl, null);
}

if (ciCompletionCheck)
{
if (args.Length < 3)
{
Console.WriteLine("Missing SHA or PEM Base64 for creating check run!");
return 4543;
}

return await CICompletionCheck(client, args[1], args[2]);
}

if (shaCheck)
{
if(args.Length < 2)
Expand Down Expand Up @@ -1583,6 +1604,47 @@ static async Task<int> GenDebianChangelog(IGitHubClient client, Version version,
return 0;
}

static async ValueTask<int> CICompletionCheck(GitHubClient gitHubClient, string currentSha, string pemBase64)
{
var pemBytes = Convert.FromBase64String(pemBase64);
var pem = Encoding.UTF8.GetString(pemBytes);

var rsa = RSA.Create();
rsa.ImportFromPem(pem);

var signingCredentials = new SigningCredentials(new RsaSecurityKey(rsa), SecurityAlgorithms.RsaSha256);
var jwtSecurityTokenHandler = new JwtSecurityTokenHandler { SetDefaultTimesOnTokenCreation = false };

var now = DateTime.UtcNow;

var jwt = jwtSecurityTokenHandler.CreateToken(new SecurityTokenDescriptor
{
Issuer = AppId.ToString(),
Expires = now.AddMinutes(10),
IssuedAt = now,
SigningCredentials = signingCredentials
});

var jwtStr = jwtSecurityTokenHandler.WriteToken(jwt);

gitHubClient.Credentials = new Credentials(jwtStr, AuthenticationType.Bearer);

var installation = await gitHubClient.GitHubApps.GetRepositoryInstallationForCurrent(RepoOwner, RepoName);
var installToken = await gitHubClient.GitHubApps.CreateInstallationToken(installation.Id);

gitHubClient.Credentials = new Credentials(installToken.Token);

await gitHubClient.Check.Run.Create(RepoOwner, RepoName, new NewCheckRun("CI Completion", currentSha)
{
CompletedAt = now,
Conclusion = CheckConclusion.Success,
Output = new NewCheckRunOutput("CI Completion", "The CI Pipeline completed successfully"),
Status = CheckStatus.Completed,
});

return 0;
}

static void DebugAssert(bool condition, string message = null)
{
// This exists because one of the fucking asserts evaluates an enumerable or something and it was getting optimized out in release
Expand Down

0 comments on commit d36b852

Please sign in to comment.