/
PuppeteerPageLoader.cs
94 lines (74 loc) · 3.27 KB
/
PuppeteerPageLoader.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
using System.Reflection;
using Microsoft.Extensions.Logging;
using PuppeteerSharp;
using WebReaper.Core.CookieStorage.Abstract;
using WebReaper.Core.Loaders.Abstract;
using WebReaper.Domain.PageActions;
using WebReaper.Extensions;
namespace WebReaper.Core.Loaders.Concrete;
public class PuppeteerPageLoader : BrowserPageLoader, IBrowserPageLoader
{
private readonly ICookiesStorage _cookiesStorage;
private readonly SemaphoreSlim _semaphore = new(1, 1);
public PuppeteerPageLoader(ILogger logger, ICookiesStorage cookiesStorage) : base(logger)
{
_cookiesStorage = cookiesStorage;
}
public async Task<string> Load(string url, List<PageAction>? pageActions = null, bool headless = true)
{
Logger.LogInformation("{class}.{method}", nameof(PuppeteerPageLoader), nameof(Load));
using var _ = Logger.LogMethodDuration();
var browserFetcher = new BrowserFetcher(new BrowserFetcherOptions
{
Path = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)
});
await _semaphore.WaitAsync();
try
{
Logger.LogInformation("{class}.{method}: Downloading browser...", nameof(PuppeteerPageLoader), nameof(Load));
await browserFetcher.DownloadAsync(BrowserFetcher.DefaultChromiumRevision);
Logger.LogInformation("{class}.{method}: Browser is downloaded", nameof(PuppeteerPageLoader), nameof(Load));
}
finally
{
_semaphore.Release();
}
Logger.LogInformation("{class}.{method}: Launching a browser", nameof(PuppeteerPageLoader), nameof(Load));
await using var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = headless,
ExecutablePath = browserFetcher.RevisionInfo(BrowserFetcher.DefaultChromiumRevision).ExecutablePath
});
Logger.LogInformation("{class}.{method}: creating a new page", nameof(PuppeteerPageLoader), nameof(Load));
await using var page = await browser.NewPageAsync();
var cookies = await _cookiesStorage.GetAsync();
if (cookies != null)
{
var cookieParams = cookies.GetAllCookies().Select(c => new CookieParam
{
Name = c.Name,
Value = c.Value
}).ToArray();
await page.SetCookieAsync(cookieParams);
}
await page.GoToAsync(url, WaitUntilNavigation.DOMContentLoaded);
//await page.WaitForNetworkIdleAsync();
if (pageActions != null)
{
Logger.LogInformation("{class}.{method}: performing page actions", nameof(PuppeteerPageLoader), nameof(Load));
for (int i = 0; i < pageActions.Count; i++)
{
var pageAction = pageActions[i];
Logger.LogInformation("{class}.{method}: performing page action {current} of {count} with type {actionType}",
nameof(PuppeteerPageLoader),
nameof(Load),
i,
pageActions.Count - 1,
pageAction.Type);
await PageActions[pageAction.Type](page, pageAction.Parameters);
}
}
var html = await page.GetContentAsync();
return html;
}
}