Skip to content

Commit 39ec0a8

Browse files
committed
feat: back-fill post titles
1 parent 419c770 commit 39ec0a8

7 files changed

Lines changed: 247 additions & 3 deletions

ApplicationData/Services/RedditPostProvider.cs

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using FruityFoundation.Base.Structures;
1+
using System.Runtime.CompilerServices;
2+
using FruityFoundation.Base.Structures;
23
using FruityFoundation.DataAccess.Abstractions;
34

45
namespace ApplicationData.Services;
@@ -24,4 +25,38 @@ public async Task<Maybe<string>> GetPostTitleByPostId(string redditPostId, Cance
2425

2526
return reader.TryGetString(0);
2627
}
28+
29+
public async IAsyncEnumerable<string> GetPostIdsWithoutTitleFetched([EnumeratorCancellation] CancellationToken cancellationToken)
30+
{
31+
await using var connection = _dbConnectionFactory.CreateReadOnlyConnection();
32+
await using var reader = await connection.ExecuteReader(
33+
"SELECT reddit_post_id FROM reddit_posts WHERE post_title IS NULL AND is_title_fetched = 0",
34+
cancellationToken: cancellationToken);
35+
36+
while (await reader.ReadAsync(cancellationToken))
37+
{
38+
yield return reader.GetString(0);
39+
}
40+
}
41+
42+
public async Task SetPostIdAsTitleFetched(string redditPostId, CancellationToken cancellationToken)
43+
{
44+
await using var connection = _dbConnectionFactory.CreateConnection();
45+
await connection.Execute(
46+
"UPDATE reddit_posts SET is_title_fetched = 1 WHERE reddit_post_id = @redditPostId",
47+
new { redditPostId }, cancellationToken);
48+
}
49+
50+
public async Task SetPostTitle(string redditPostId, string title, CancellationToken cancellationToken)
51+
{
52+
await using var connection = _dbConnectionFactory.CreateConnection();
53+
await connection.Execute(
54+
"""
55+
UPDATE reddit_posts
56+
SET
57+
post_title = @title
58+
,is_title_fetched = 1
59+
WHERE reddit_post_id = @redditPostId
60+
""", new { redditPostId, title }, cancellationToken);
61+
}
2762
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
using ApplicationData.Services;
2+
using SnooBrowser.Browsers;
3+
using SnooBrowser.Things;
4+
using FruityFoundation.Base.Structures;
5+
6+
namespace BackgroundProcessor.Processors;
7+
8+
public class RedditPostTitleFetcher : IBackgroundProcessor
9+
{
10+
private readonly ILogger<RedditPostTitleFetcher> _logger;
11+
private readonly RedditPostProvider _redditPostProvider;
12+
private readonly SubmissionBrowser _submissionBrowser;
13+
14+
public RedditPostTitleFetcher(
15+
ILogger<RedditPostTitleFetcher> logger,
16+
RedditPostProvider redditPostProvider,
17+
SubmissionBrowser submissionBrowser
18+
)
19+
{
20+
_logger = logger;
21+
_redditPostProvider = redditPostProvider;
22+
_submissionBrowser = submissionBrowser;
23+
}
24+
25+
/// <inheritdoc />
26+
public async Task Process(CancellationToken cancellationToken)
27+
{
28+
var query = _redditPostProvider.GetPostIdsWithoutTitleFetched(cancellationToken)
29+
.Take(10)
30+
.WithCancellation(cancellationToken);
31+
32+
await foreach (var redditPostId in query)
33+
{
34+
if (!(await _submissionBrowser.GetSubmission(LinkThing.CreateFromShortId(redditPostId))).Try(out var submission))
35+
{
36+
await _redditPostProvider.SetPostIdAsTitleFetched(redditPostId, cancellationToken);
37+
_logger.LogInformation("Submission title couldn't be fetched for {RedditPostId} because the submission has been removed", redditPostId);
38+
return;
39+
}
40+
41+
await _redditPostProvider.SetPostTitle(redditPostId, submission.Title, cancellationToken);
42+
}
43+
}
44+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
using FruityFoundation.DataAccess.Abstractions;
2+
3+
namespace WebApi.Scripts;
4+
5+
/// <summary>
6+
/// Add foreign key to reddit posts
7+
/// </summary>
8+
public class Script_2024_08_24_04_AddIsTitleFetchedColumn : IDbMaintenanceScript
9+
{
10+
/// <inheritdoc />
11+
public async Task Run(IDatabaseConnection<ReadWrite> dbConnection)
12+
{
13+
var columnExists = await dbConnection.ExecuteScalar<bool>(
14+
"SELECT EXISTS (SELECT 1 FROM pragma_table_info('reddit_posts') WHERE name = 'is_title_fetched')");
15+
16+
if (columnExists)
17+
return;
18+
19+
await dbConnection.Execute(
20+
"""
21+
alter table reddit_posts
22+
add is_title_fetched INTEGER;
23+
""");
24+
}
25+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
using FruityFoundation.DataAccess.Abstractions;
2+
3+
namespace WebApi.Scripts;
4+
5+
/// <summary>
6+
/// Fill reddit_posts.is_title_fetched = 0 where it is null
7+
/// </summary>
8+
public class Script_2024_08_24_05_FillIsTitleFetchedWithZeros : IDbMaintenanceScript
9+
{
10+
/// <inheritdoc />
11+
public async Task Run(IDatabaseConnection<ReadWrite> dbConnection)
12+
{
13+
await dbConnection.Execute(
14+
"""
15+
UPDATE reddit_posts
16+
SET is_title_fetched = 0
17+
WHERE is_title_fetched IS NULL;
18+
""");
19+
}
20+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
using FruityFoundation.DataAccess.Abstractions;
2+
3+
namespace WebApi.Scripts;
4+
5+
/// <summary>
6+
/// Fill reddit_posts.is_title_fetched = 0 where it is null
7+
/// </summary>
8+
public class Script_2024_08_24_06_AddIndexForPostsMissingTitle : IDbMaintenanceScript
9+
{
10+
/// <inheritdoc />
11+
public async Task Run(IDatabaseConnection<ReadWrite> dbConnection)
12+
{
13+
var indexExists = await dbConnection.ExecuteScalar<bool>(
14+
"SELECT EXISTS (SELECT 1 FROM pragma_index_list('reddit_posts') WHERE name = 'IX_reddit_posts_is_title_fetched_false')");
15+
16+
await dbConnection.Execute(
17+
"""
18+
create index IX_reddit_posts_is_title_fetched_false
19+
on reddit_posts (is_title_fetched)
20+
where reddit_posts.is_title_fetched = 0;
21+
22+
""");
23+
}
24+
}
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
using FruityFoundation.DataAccess.Abstractions;
2+
3+
namespace WebApi.Scripts;
4+
5+
/// <summary>
6+
/// Add foreign key to reddit posts
7+
/// </summary>
8+
public class Script_2024_08_24_07_MakeRedditPostsIsTitleFetchedNotNull : IDbMaintenanceScript
9+
{
10+
/// <inheritdoc />
11+
public async Task Run(IDatabaseConnection<ReadWrite> dbConnection)
12+
{
13+
var columnIsNotNull = await dbConnection.ExecuteScalar<bool>(
14+
"SELECT \"notnull\" FROM pragma_table_info('reddit_posts') WHERE name = 'is_title_fetched'");
15+
16+
if (columnIsNotNull)
17+
return;
18+
19+
// First remove the foreign key on reddit_comments -> reddit_posts.reddit_post_id
20+
await dbConnection.Execute(
21+
"""
22+
DROP TABLE IF EXISTS reddit_comments_dg_tmp;
23+
create table reddit_comments_dg_tmp
24+
(
25+
reddit_post_id TEXT not null
26+
constraint PK_RedditComments_RedditPostId
27+
primary key,
28+
reddit_comment_id TEXT not null,
29+
posted_at TEXT not null,
30+
last_updated_at TEXT not null
31+
);
32+
33+
insert into reddit_comments_dg_tmp(reddit_post_id, reddit_comment_id, posted_at, last_updated_at)
34+
select reddit_post_id, reddit_comment_id, posted_at, last_updated_at
35+
from reddit_comments;
36+
37+
drop table reddit_comments;
38+
39+
alter table reddit_comments_dg_tmp
40+
rename to reddit_comments;
41+
""");
42+
43+
// Make is_title_fetched not null
44+
await dbConnection.Execute(
45+
"""
46+
DROP TABLE IF EXISTS reddit_posts_dg_tmp;
47+
48+
create table reddit_posts_dg_tmp
49+
(
50+
reddit_post_id TEXT not null
51+
constraint PK_reddit_posts_reddit_post_id
52+
primary key,
53+
post_title TEXT,
54+
is_title_fetched INTEGER not null
55+
);
56+
57+
insert into reddit_posts_dg_tmp(reddit_post_id, post_title, is_title_fetched)
58+
select reddit_post_id, post_title, is_title_fetched
59+
from reddit_posts;
60+
61+
drop table reddit_posts;
62+
63+
alter table reddit_posts_dg_tmp
64+
rename to reddit_posts;
65+
66+
create index IX_reddit_posts_is_title_fetched_false
67+
on reddit_posts (is_title_fetched)
68+
where reddit_posts.is_title_fetched = 0;
69+
""");
70+
71+
// Re-add the foreign key on reddit_comments -> reddit_posts.reddit_post_id
72+
await dbConnection.Execute(
73+
"""
74+
create table reddit_comments_dg_tmp
75+
(
76+
reddit_post_id TEXT not null
77+
constraint PK_RedditComments_RedditPostId
78+
primary key
79+
constraint FK_reddit_comments_reddit_posts_reddit_post_id
80+
references reddit_posts,
81+
reddit_comment_id TEXT not null,
82+
posted_at TEXT not null,
83+
last_updated_at TEXT not null
84+
);
85+
86+
insert into reddit_comments_dg_tmp(reddit_post_id, reddit_comment_id, posted_at, last_updated_at)
87+
select reddit_post_id, reddit_comment_id, posted_at, last_updated_at
88+
from reddit_comments;
89+
90+
drop table reddit_comments;
91+
92+
alter table reddit_comments_dg_tmp
93+
rename to reddit_comments;
94+
""");
95+
}
96+
}

WebApi/WebApi.csproj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@
2727
<PackageReference Include="Microsoft.Extensions.Configuration.AzureAppConfiguration" Version="7.3.0" />
2828
<PackageReference Include="Sentry.AspNetCore" Version="4.10.2" />
2929
<PackageReference Include="SnooBrowser.Extensions.DependencyInjection" Version="3.1.2" />
30-
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.7.1" />
31-
<PackageReference Include="Swashbuckle.AspNetCore.Annotations" Version="6.7.1" />
30+
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.7.2" />
31+
<PackageReference Include="Swashbuckle.AspNetCore.Annotations" Version="6.7.2" />
3232
<PackageReference Include="System.Linq.Async" Version="6.0.1" />
3333
<PackageReference Include="System.Runtime.Caching" Version="8.0.0" />
3434
</ItemGroup>

0 commit comments

Comments
 (0)