Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
<PackageVersion Include="FluentAssertions" Version="6.1.0"/>
<PackageVersion Include="Microsoft.AspNet.WebApi.Client" Version="5.2.7"/>
<PackageVersion Include="Microsoft.CodeAnalysis.FxCopAnalyzers" Version="3.3.0"/>
<PackageVersion Include="Microsoft.Extensions.Caching.Memory" Version="3.1.23" />
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="16.9.4"/>
<PackageVersion Include="Microsoft.SourceLink.GitHub" Version="1.0.0"/>
<PackageVersion Include="DotNet.Glob" Version="2.1.1"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,20 @@ namespace Microsoft.ComponentDetection.Common
public class EnvironmentVariableService : IEnvironmentVariableService
{
public bool DoesEnvironmentVariableExist(string name)
{
return GetEnvironmentVariable(name) != null;
}

public string GetEnvironmentVariable(string name)
{
// Environment variables are case-insensitive on Windows, and case-sensitive on
// Linux and MacOS.
// https://docs.microsoft.com/en-us/dotnet/api/system.environment.getenvironmentvariable
return Environment.GetEnvironmentVariables().Keys
var caseInsensitiveName = Environment.GetEnvironmentVariables().Keys
.OfType<string>()
.FirstOrDefault(x => string.Compare(x, name, true) == 0) != null;
.FirstOrDefault(x => string.Compare(x, name, true) == 0);

return caseInsensitiveName != null ? Environment.GetEnvironmentVariable(caseInsensitiveName) : null;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using System.Net;

namespace Microsoft.ComponentDetection.Common.Telemetry.Records
{
public class PypiCacheTelemetryRecord : BaseDetectionTelemetryRecord
{
public override string RecordName => "PyPiCache";

/// <summary>
/// Gets or sets total number of PyPi requests that hit the cache instead of PyPi APIs.
/// </summary>
public int NumCacheHits { get; set; }

/// <summary>
/// Gets or sets the size of the PyPi cache at class destruction.
/// </summary>
public int FinalCacheSize { get; set; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,7 @@ namespace Microsoft.ComponentDetection.Contracts
public interface IEnvironmentVariableService
{
bool DoesEnvironmentVariableExist(string name);

string GetEnvironmentVariable(string name);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<PackageReference Include="Polly" />
<PackageReference Include="Semver" />
<PackageReference Include="yamldotnet" />
<PackageReference Include="Microsoft.Extensions.Caching.Memory" />
<PackageReference Include="Newtonsoft.Json" />
<PackageReference Include="System.Composition.AttributedModel" />
<PackageReference Include="System.Composition.Convention" />
Expand Down
87 changes: 62 additions & 25 deletions src/Microsoft.ComponentDetection.Detectors/pip/IPyPiClient.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Composition;
using System.IO;
Expand All @@ -11,6 +10,7 @@
using System.Threading.Tasks;
using Microsoft.ComponentDetection.Common.Telemetry.Records;
using Microsoft.ComponentDetection.Contracts;
using Microsoft.Extensions.Caching.Memory;
using Newtonsoft.Json;
using Polly;

Expand All @@ -31,10 +31,18 @@ public class PyPiClient : IPyPiClient
[Import]
public ILogger Logger { get; set; }

[Import]
public IEnvironmentVariableService EnvironmentVariableService { get; set; }

private static HttpClientHandler httpClientHandler = new HttpClientHandler() { CheckCertificateRevocationList = true };

internal static HttpClient HttpClient = new HttpClient(httpClientHandler);

// Values used for cache creation
private const long CACHEINTERVALSECONDS = 60;
private const long DEFAULTCACHEENTRIES = 128;
Comment on lines +42 to +43
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were this just an estimate?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they are currently an estimate, but introduced a new telemetry class to track cache usage statistics so we can adjust in the future.

private bool checkedMaxEntriesVariable = false;

// time to wait before retrying a failed call to pypi.org
private static readonly TimeSpan RETRYDELAY = TimeSpan.FromSeconds(1);

Expand All @@ -45,52 +53,83 @@ public class PyPiClient : IPyPiClient
private long retries = 0;

/// <summary>
/// This cache is used mostly for consistency, to create a unified view of Pypi response.
/// A thread safe cache implementation which contains a mapping of URI -> HttpResponseMessage
/// and has a limited number of entries which will expire after the cache fills or a specified interval.
/// </summary>
private readonly ConcurrentDictionary<string, Task<HttpResponseMessage>> cachedResponses = new ConcurrentDictionary<string, Task<HttpResponseMessage>>();
private MemoryCache cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = DEFAULTCACHEENTRIES });

// Keep telemetry on how the cache is being used for future refinements
private PypiCacheTelemetryRecord cacheTelemetry;

public PyPiClient()
{
cacheTelemetry = new PypiCacheTelemetryRecord()
{
NumCacheHits = 0,
FinalCacheSize = 0,
};
}

~PyPiClient()
{
cacheTelemetry.FinalCacheSize = cachedResponses.Count;
cacheTelemetry.Dispose();
}

/// <summary>
/// Returns a cached response if it exists, otherwise returns the response from Pypi REST call.
/// The response from Pypi is not automatically added to the cache, to allow caller to make that decision.
/// Returns a cached response if it exists, otherwise returns the response from PyPi REST call.
/// The response from PyPi is automatically added to the cache.
/// </summary>
/// <param name="uri">The REST Uri to call.</param>
/// <returns>The cached response or a new result from Pypi.</returns>
private async Task<HttpResponseMessage> GetPypiResponse(string uri)
/// <returns>The cached response or a new result from PyPi.</returns>
private async Task<HttpResponseMessage> GetAndCachePyPiResponse(string uri)
{
if (cachedResponses.TryGetValue(uri, out var value))
if (!checkedMaxEntriesVariable)
{
InitializeNonDefaultMemoryCache();
}

if (cachedResponses.TryGetValue(uri, out HttpResponseMessage result))
{
return await value;
cacheTelemetry.NumCacheHits++;
Logger.LogVerbose("Retrieved cached Python data from " + uri);
return result;
}

Logger.LogInfo("Getting Python data from " + uri);
return await HttpClient.GetAsync(uri);
var response = await HttpClient.GetAsync(uri);

// The `first - wins` response accepted into the cache. This might be different from the input if another caller wins the race.
return await cachedResponses.GetOrCreateAsync(uri, cacheEntry =>
{
cacheEntry.SlidingExpiration = TimeSpan.FromSeconds(CACHEINTERVALSECONDS); // This entry will expire after CACHEINTERVALSECONDS seconds from last use
cacheEntry.Size = 1; // Specify a size of 1 so a set number of entries can always be in the cache
return Task.FromResult(response);
});
}

/// <summary>
/// Used to update the consistency cache, decision has to be made by the caller to allow for retries!.
/// On the initial caching attempt, see if the user specified an override for
/// PyPiMaxCacheEntries and recreate the cache if needed.
/// </summary>
/// <param name="uri">The REST Uri to call.</param>
/// <param name="message">The proposed response by the caller to store for this Uri.</param>
/// <returns>The `first-wins` response accepted into the cache.
/// This might be different from the input if another caller wins the race!.</returns>
private async Task<HttpResponseMessage> CachePypiResponse(string uri, HttpResponseMessage message)
private void InitializeNonDefaultMemoryCache()
{
if (!cachedResponses.TryAdd(uri, Task.FromResult(message)))
var maxEntriesVariable = EnvironmentVariableService.GetEnvironmentVariable("PyPiMaxCacheEntries");
if (!string.IsNullOrEmpty(maxEntriesVariable) && long.TryParse(maxEntriesVariable, out var maxEntries))
{
return await cachedResponses[uri];
Logger.LogInfo($"Setting IPyPiClient max cache entries to {maxEntries}");
cachedResponses = new MemoryCache(new MemoryCacheOptions { SizeLimit = maxEntries });
}

return message;
checkedMaxEntriesVariable = true;
}

public async Task<IList<PipDependencySpecification>> FetchPackageDependencies(string name, string version, PythonProjectRelease release)
{
var dependencies = new List<PipDependencySpecification>();

var uri = release.Url.ToString();
var response = await GetPypiResponse(uri);

response = await CachePypiResponse(uri, response);
var response = await GetAndCachePyPiResponse(uri);

if (!response.IsSuccessStatusCode)
{
Expand Down Expand Up @@ -169,11 +208,9 @@ public async Task<SortedDictionary<string, IList<PythonProjectRelease>>> GetRele
return Task.FromResult<HttpResponseMessage>(null);
}

return GetPypiResponse(requestUri);
return GetAndCachePyPiResponse(requestUri);
});

request = await CachePypiResponse(requestUri, request);

if (request == null)
{
using var r = new PypiMaxRetriesReachedTelemetryRecord { Name = spec.Name, DependencySpecifiers = spec.DependencySpecifiers?.ToArray() };
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public void SerializableProperties()
typeof(string),
typeof(string[]),
typeof(bool),
typeof(int),
typeof(int?),
typeof(TimeSpan?),
typeof(HttpStatusCode),
Expand Down
Loading