-
Notifications
You must be signed in to change notification settings - Fork 113
Migrate IPyPiClient cache to LRU MemoryCache #80
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
1f32ecc
59704e7
a158a35
eca743f
6e939ba
9b5e28a
3f1aa13
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| using System.Net; | ||
|
|
||
| namespace Microsoft.ComponentDetection.Common.Telemetry.Records | ||
| { | ||
| public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord | ||
| { | ||
| public override string RecordName => "PyPiCache"; | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs. | ||
| /// </summary> | ||
| public int NumCacheHits { get; set; } | ||
|
|
||
| /// <summary> | ||
| /// Gets or sets the size of the PyPi cache at class destruction. | ||
| /// </summary> | ||
| public int FinalCacheSize { get; set; } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,4 @@ | ||
| using System; | ||
| using System.Collections.Concurrent; | ||
| using System.Collections.Generic; | ||
| using System.Composition; | ||
| using System.IO; | ||
|
|
@@ -11,6 +10,7 @@ | |
| using System.Threading.Tasks; | ||
| using Microsoft.ComponentDetection.Common.Telemetry.Records; | ||
| using Microsoft.ComponentDetection.Contracts; | ||
| using Microsoft.Extensions.Caching.Memory; | ||
| using Newtonsoft.Json; | ||
| using Polly; | ||
|
|
||
|
|
@@ -31,10 +31,18 @@ public class PyPiClient : IPyPiClient | |
| [Import] | ||
| public ILogger Logger { get; set; } | ||
|
|
||
| [Import] | ||
| public IEnvironmentVariableService EnvironmentVariableService { get; set; } | ||
|
|
||
| private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true }; | ||
|
|
||
| internal static HttpClient HttpClient = new HttpClient(httpClientHandler); | ||
|
|
||
| // Values used for cache creation | ||
| private const long CACHEINTERVALSECONDS = 60; | ||
| private const long DEFAULTCACHEENTRIES = 128; | ||
|
Comment on lines
+42
to
+43
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Were this just an estimate?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, they are currently an estimate, but introduced a new telemetry class to track cache usage statistics so we can adjust in the future. |
||
| private bool checkedMaxEntriesVariable = false; | ||
|
|
||
| // time to wait before retrying a failed call to pypi.org | ||
| private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1); | ||
|
|
||
|
|
@@ -45,52 +53,83 @@ public class PyPiClient : IPyPiClient | |
| private long retries = 0; | ||
|
|
||
| /// <summary> | ||
| /// This cache is used mostly for consistency, to create a unified view of Pypi response. | ||
| /// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage | ||
| /// and has a limited number of entries which will expire after the cache fills or a specified interval. | ||
| /// </summary> | ||
| private readonly ConcurrentDictionary<string, Task<HttpResponseMessage>> cachedResponses = new ConcurrentDictionary<string, Task<HttpResponseMessage>>(); | ||
| private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES }); | ||
|
|
||
| // Keep telemetry on how the cache is being used for future refinements | ||
| private PypiCacheTelemetryRecord cacheTelemetry; | ||
|
|
||
| public PyPiClient() | ||
| { | ||
| cacheTelemetry = new PypiCacheTelemetryRecord() | ||
| { | ||
| NumCacheHits = 0, | ||
| FinalCacheSize = 0, | ||
| }; | ||
| } | ||
|
|
||
| ~PyPiClient() | ||
| { | ||
| cacheTelemetry.FinalCacheSize = cachedResponses.Count; | ||
| cacheTelemetry.Dispose(); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Returns a cached response if it exists, otherwise returns the response from Pypi REST call. | ||
| /// The response from Pypi is not automatically added to the cache, to allow caller to make that decision. | ||
| /// Returns a cached response if it exists, otherwise returns the response from PyPi REST call. | ||
| /// The response from PyPi is automatically added to the cache. | ||
| /// </summary> | ||
| /// <param name="uri">The REST Uri to call.</param> | ||
| /// <returns>The cached response or a new result from Pypi.</returns> | ||
| private async Task<HttpResponseMessage> GetPypiResponse(string uri) | ||
| /// <returns>The cached response or a new result from PyPi.</returns> | ||
| private async Task<HttpResponseMessage> GetAndCachePyPiResponse(string uri) | ||
| { | ||
| if (cachedResponses.TryGetValue(uri, out var value)) | ||
| if (!checkedMaxEntriesVariable) | ||
| { | ||
| InitializeNonDefaultMemoryCache(); | ||
| } | ||
|
|
||
| if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result)) | ||
| { | ||
| return await value; | ||
| cacheTelemetry.NumCacheHits++; | ||
| Logger.LogVerbose("Retrieved cached Python data from " + uri); | ||
| return result; | ||
| } | ||
|
|
||
| Logger.LogInfo("Getting Python data from " + uri); | ||
| return await HttpClient.GetAsync(uri); | ||
| var response = await HttpClient.GetAsync(uri); | ||
|
|
||
| // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race. | ||
| return await cachedResponses.GetOrCreateAsync(uri, cacheEntry => | ||
| { | ||
| cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use | ||
| cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache | ||
| return Task.FromResult(response); | ||
| }); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Used to update the consistency cache, decision has to be made by the caller to allow for retries!. | ||
| /// On the initial caching attempt, see if the user specified an override for | ||
| /// PyPiMaxCacheEntries and recreate the cache if needed. | ||
| /// </summary> | ||
| /// <param name="uri">The REST Uri to call.</param> | ||
| /// <param name="message">The proposed response by the caller to store for this Uri.</param> | ||
| /// <returns>The `first-wins` response accepted into the cache. | ||
| /// This might be different from the input if another caller wins the race!.</returns> | ||
| private async Task<HttpResponseMessage> CachePypiResponse(string uri, HttpResponseMessage message) | ||
| private void InitializeNonDefaultMemoryCache() | ||
| { | ||
| if (!cachedResponses.TryAdd(uri, Task.FromResult(message))) | ||
| var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries"); | ||
| if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries)) | ||
| { | ||
| return await cachedResponses[uri]; | ||
| Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}"); | ||
| cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries }); | ||
| } | ||
|
|
||
| return message; | ||
| checkedMaxEntriesVariable = true; | ||
| } | ||
|
|
||
| public async Task<IList<PipDependencySpecification>> FetchPackageDependencies(string name, string version, PythonProjectRelease release) | ||
| { | ||
| var dependencies = new List<PipDependencySpecification>(); | ||
|
|
||
| var uri = release.Url.ToString(); | ||
| var response = await GetPypiResponse(uri); | ||
|
|
||
| response = await CachePypiResponse(uri, response); | ||
| var response = await GetAndCachePyPiResponse(uri); | ||
|
|
||
| if (!response.IsSuccessStatusCode) | ||
| { | ||
|
|
@@ -169,11 +208,9 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele | |
| return Task.FromResult<HttpResponseMessage>(null); | ||
| } | ||
|
|
||
| return GetPypiResponse(requestUri); | ||
| return GetAndCachePyPiResponse(requestUri); | ||
| }); | ||
|
|
||
| request = await CachePypiResponse(requestUri, request); | ||
|
|
||
| if (request == null) | ||
| { | ||
| using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() }; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.