diff --git a/Directory.Packages.props b/Directory.Packages.props
index 8c9899cfd..db5101ffc 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -14,6 +14,7 @@
+
diff --git a/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs b/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs
index b3aeb89d8..b4dc7733a 100644
--- a/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs
+++ b/src/Microsoft.ComponentDetection.Common/EnvironmentVariableService.cs
@@ -9,13 +9,20 @@ namespace Microsoft.ComponentDetection.Common
public class EnvironmentVariableService : IEnvironmentVariableService
{
public bool DoesEnvironmentVariableExist(string name)
+ {
+ return GetEnvironmentVariable(name) != null;
+ }
+
+ public string GetEnvironmentVariable(string name)
{
// Environment variables are case-insensitive on Windows, and case-sensitive on
// Linux and MacOS.
// https://docs.microsoft.com/en-us/dotnet/api/system.environment.getenvironmentvariable
- return Environment.GetEnvironmentVariables().Keys
+ var caseInsensitiveName = Environment.GetEnvironmentVariables().Keys
.OfType()
- .FirstOrDefault(x => string.Compare(x, name, true) == 0) != null;
+ .FirstOrDefault(x => string.Compare(x, name, true) == 0);
+
+ return caseInsensitiveName != null ? Environment.GetEnvironmentVariable(caseInsensitiveName) : null;
}
}
}
diff --git a/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs
new file mode 100644
index 000000000..39ba28e89
--- /dev/null
+++ b/src/Microsoft.ComponentDetection.Common/Telemetry/Records/PyPiCacheTelemetryRecord.cs
@@ -0,0 +1,19 @@
+using System.Net;
+
+namespace Microsoft.ComponentDetection.Common.Telemetry.Records
+{
+ public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord
+ {
+ public override string RecordName => "PyPiCache";
+
+ ///
+ /// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs.
+ ///
+ public int NumCacheHits { get; set; }
+
+ ///
+ /// Gets or sets the size of the PyPi cache at class destruction.
+ ///
+ public int FinalCacheSize { get; set; }
+ }
+}
diff --git a/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs b/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs
index 436713125..0026aa6fe 100644
--- a/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs
+++ b/src/Microsoft.ComponentDetection.Contracts/IEnvironmentVariableService.cs
@@ -3,5 +3,7 @@ namespace Microsoft.ComponentDetection.Contracts
public interface IEnvironmentVariableService
{
bool DoesEnvironmentVariableExist(string name);
+
+ string GetEnvironmentVariable(string name);
}
}
diff --git a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj
index 1a27aefcf..c71619ee8 100644
--- a/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj
+++ b/src/Microsoft.ComponentDetection.Detectors/Microsoft.ComponentDetection.Detectors.csproj
@@ -9,6 +9,7 @@
+
diff --git a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
index 86077e896..938ee8b5a 100644
--- a/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
+++ b/src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
@@ -1,5 +1,4 @@
using System;
-using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Composition;
using System.IO;
@@ -11,6 +10,7 @@
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Common.Telemetry.Records;
using Microsoft.ComponentDetection.Contracts;
+using Microsoft.Extensions.Caching.Memory;
using Newtonsoft.Json;
using Polly;
@@ -31,10 +31,18 @@ public class PyPiClient : IPyPiClient
[Import]
public ILogger Logger { get; set; }
+ [Import]
+ public IEnvironmentVariableService EnvironmentVariableService { get; set; }
+
private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true };
internal static HttpClient HttpClient = new HttpClient(httpClientHandler);
+ // Values used for cache creation
+ private const long CACHEINTERVALSECONDS = 60;
+ private const long DEFAULTCACHEENTRIES = 128;
+ private bool checkedMaxEntriesVariable = false;
+
// time to wait before retrying a failed call to pypi.org
private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1);
@@ -45,42 +53,75 @@ public class PyPiClient : IPyPiClient
private long retries = 0;
///
- /// This cache is used mostly for consistency, to create a unified view of Pypi response.
+ /// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage
+ /// and has a limited number of entries which will expire after the cache fills or a specified interval.
///
- private readonly ConcurrentDictionary> cachedResponses = new ConcurrentDictionary>();
+ private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES });
+
+ // Keep telemetry on how the cache is being used for future refinements
+ private PypiCacheTelemetryRecord cacheTelemetry;
+
+ public PyPiClient()
+ {
+ cacheTelemetry = new PypiCacheTelemetryRecord()
+ {
+ NumCacheHits = 0,
+ FinalCacheSize = 0,
+ };
+ }
+
+ ~PyPiClient()
+ {
+ cacheTelemetry.FinalCacheSize = cachedResponses.Count;
+ cacheTelemetry.Dispose();
+ }
///
- /// Returns a cached response if it exists, otherwise returns the response from Pypi REST call.
- /// The response from Pypi is not automatically added to the cache, to allow caller to make that decision.
+ /// Returns a cached response if it exists, otherwise returns the response from PyPi REST call.
+ /// The response from PyPi is automatically added to the cache.
///
/// The REST Uri to call.
- /// The cached response or a new result from Pypi.
- private async Task GetPypiResponse(string uri)
+ /// The cached response or a new result from PyPi.
+ private async Task GetAndCachePyPiResponse(string uri)
{
- if (cachedResponses.TryGetValue(uri, out var value))
+ if (!checkedMaxEntriesVariable)
+ {
+ InitializeNonDefaultMemoryCache();
+ }
+
+ if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result))
{
- return await value;
+ cacheTelemetry.NumCacheHits++;
+ Logger.LogVerbose("Retrieved cached Python data from " + uri);
+ return result;
}
Logger.LogInfo("Getting Python data from " + uri);
- return await HttpClient.GetAsync(uri);
+ var response = await HttpClient.GetAsync(uri);
+
+ // The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race.
+ return await cachedResponses.GetOrCreateAsync(uri, cacheEntry =>
+ {
+ cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use
+ cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache
+ return Task.FromResult(response);
+ });
}
///
- /// Used to update the consistency cache, decision has to be made by the caller to allow for retries!.
+ /// On the initial caching attempt, see if the user specified an override for
+ /// PyPiMaxCacheEntries and recreate the cache if needed.
///
- /// The REST Uri to call.
- /// The proposed response by the caller to store for this Uri.
- /// The `first-wins` response accepted into the cache.
- /// This might be different from the input if another caller wins the race!.
- private async Task CachePypiResponse(string uri, HttpResponseMessage message)
+ private void InitializeNonDefaultMemoryCache()
{
- if (!cachedResponses.TryAdd(uri, Task.FromResult(message)))
+ var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries");
+ if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries))
{
- return await cachedResponses[uri];
+ Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}");
+ cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries });
}
- return message;
+ checkedMaxEntriesVariable = true;
}
public async Task> FetchPackageDependencies(string name, string version, PythonProjectRelease release)
@@ -88,9 +129,7 @@ public async Task> FetchPackageDependencies(st
var dependencies = new List();
var uri = release.Url.ToString();
- var response = await GetPypiResponse(uri);
-
- response = await CachePypiResponse(uri, response);
+ var response = await GetAndCachePyPiResponse(uri);
if (!response.IsSuccessStatusCode)
{
@@ -169,11 +208,9 @@ public async Task>> GetRele
return Task.FromResult(null);
}
- return GetPypiResponse(requestUri);
+ return GetAndCachePyPiResponse(requestUri);
});
- request = await CachePypiResponse(requestUri, request);
-
if (request == null)
{
using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() };
diff --git a/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs b/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs
index 203a25df2..f3d0d3a22 100644
--- a/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs
+++ b/test/Microsoft.ComponentDetection.Common.Tests/BaseDetectionTelemetryRecordTests.cs
@@ -59,6 +59,7 @@ public void SerializableProperties()
typeof(string),
typeof(string[]),
typeof(bool),
+ typeof(int),
typeof(int?),
typeof(TimeSpan?),
typeof(HttpStatusCode),
diff --git a/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs b/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs
index ae18825c2..f64df081c 100644
--- a/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs
+++ b/test/Microsoft.ComponentDetection.Detectors.Tests/PyPiClientTests.cs
@@ -1,10 +1,12 @@
using System;
using System.Collections.Generic;
+using System.IO;
using System.Net;
using System.Net.Http;
using System.Threading;
using System.Threading.Tasks;
using FluentAssertions;
+using Microsoft.ComponentDetection.Common;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.ComponentDetection.Detectors.Pip;
using Microsoft.VisualStudio.TestTools.UnitTesting;
@@ -24,6 +26,7 @@ public void Initialize()
{
pypiClient = new PyPiClient()
{
+ EnvironmentVariableService = new EnvironmentVariableService(),
Logger = new Mock().Object,
};
}
@@ -41,14 +44,148 @@ public async Task GetReleases_InvalidSpecVersion_NotThrow()
},
};
- PyPiClient.HttpClient = new HttpClient(MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject)));
+ var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
Func action = async () => await pypiClient.GetReleases(pythonSpecs);
await action.Should().NotThrowAsync();
}
- private HttpMessageHandler MockHttpMessageHandler(string content)
+ [TestMethod]
+ public async Task GetReleases_DuplicateEntries_CallsGetAsync_Once()
+ {
+ var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } };
+ var pythonProject = new PythonProject
+ {
+ Releases = new Dictionary>
+ {
+ { "1.0.0", new List { new PythonProjectRelease() } },
+ },
+ };
+
+ var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
+
+ Func action = async () => await pypiClient.GetReleases(pythonSpecs);
+
+ await action.Should().NotThrowAsync();
+ await action.Should().NotThrowAsync();
+
+ // Verify the API call was performed only once
+ mockHandler.Protected().Verify(
+ "SendAsync",
+ Times.Once(),
+ ItExpr.IsAny(),
+ ItExpr.IsAny());
+ }
+
+ [TestMethod]
+ public async Task GetReleases_DifferentEntries_CallsGetAsync_Once()
+ {
+ var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } };
+ var pythonProject = new PythonProject
+ {
+ Releases = new Dictionary>
+ {
+ { "1.0.0", new List { new PythonProjectRelease() } },
+ },
+ };
+
+ var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
+
+ Func action = async () =>
+ {
+ pythonSpecs.Name = Guid.NewGuid().ToString();
+ await pypiClient.GetReleases(pythonSpecs);
+ };
+
+ await action.Should().NotThrowAsync();
+ await action.Should().NotThrowAsync();
+
+ // Verify the API call was performed only once
+ mockHandler.Protected().Verify(
+ "SendAsync",
+ Times.Exactly(2),
+ ItExpr.IsAny(),
+ ItExpr.IsAny());
+ }
+
+ [TestMethod]
+ public async Task FetchPackageDependencies_DuplicateEntries_CallsGetAsync_Once()
+ {
+ var mockHandler = MockHttpMessageHandler("invalid ZIP");
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
+
+ Func action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://testurl") });
+
+ await action.Should().ThrowAsync();
+ await action.Should().ThrowAsync();
+
+ // Verify the API call was performed only once
+ mockHandler.Protected().Verify(
+ "SendAsync",
+ Times.Once(),
+ ItExpr.IsAny(),
+ ItExpr.IsAny());
+ }
+
+ [TestMethod]
+ public async Task FetchPackageDependencies_DifferentEntries_CallsGetAsync_Once()
+ {
+ var mockHandler = MockHttpMessageHandler("invalid ZIP");
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
+
+ Func action = async () => await pypiClient.FetchPackageDependencies("a", "1.0.0", new PythonProjectRelease { PackageType = "bdist_wheel", PythonVersion = "3.5.2", Size = 1000, Url = new Uri($"https://{Guid.NewGuid()}") });
+
+ await action.Should().ThrowAsync();
+ await action.Should().ThrowAsync();
+
+ // Verify the API call was performed only once
+ mockHandler.Protected().Verify(
+ "SendAsync",
+ Times.Exactly(2),
+ ItExpr.IsAny(),
+ ItExpr.IsAny());
+ }
+
+ [TestMethod]
+ public async Task GetReleases_MaxEntriesVariable_CreatesNewCache()
+ {
+ var pythonSpecs = new PipDependencySpecification { DependencySpecifiers = new List { "==1.0.0" } };
+ var pythonProject = new PythonProject
+ {
+ Releases = new Dictionary>
+ {
+ { "1.0.0", new List { new PythonProjectRelease() } },
+ },
+ };
+
+ var mockHandler = MockHttpMessageHandler(JsonConvert.SerializeObject(pythonProject));
+ PyPiClient.HttpClient = new HttpClient(mockHandler.Object);
+
+ var mockLogger = new Mock();
+ var mockEvs = new Mock();
+ mockEvs.Setup(x => x.GetEnvironmentVariable(It.Is(s => s.Equals("PyPiMaxCacheEntries")))).Returns("32");
+
+ var mockedPyPi = new PyPiClient()
+ {
+ EnvironmentVariableService = mockEvs.Object,
+ Logger = mockLogger.Object,
+ };
+
+ Func action = async () => await mockedPyPi.GetReleases(pythonSpecs);
+
+ await action.Should().NotThrowAsync();
+ await action.Should().NotThrowAsync();
+
+ // Verify the cache setup call was performed only once
+ mockEvs.Verify(x => x.GetEnvironmentVariable(It.IsAny()), Times.Once());
+ mockLogger.Verify(x => x.LogInfo(It.Is(s => s.Equals("Setting IPyPiClient max cache entries to 32"))), Times.Once());
+ }
+
+ private Mock MockHttpMessageHandler(string content)
{
var handlerMock = new Mock();
handlerMock.Protected()
@@ -62,7 +199,7 @@ private HttpMessageHandler MockHttpMessageHandler(string content)
Content = new StringContent(content),
});
- return handlerMock.Object;
+ return handlerMock;
}
}
}