Skip to content

Commit

Permalink
* fix every existing entity will be modified state since prop `RowVer…
Browse files Browse the repository at this point in the history
…sionedEntity.Version` will always be reset to `0`

* narrow generic constraint for `TEntity` from `class` to `RowVersionedEntity`
* rename param `existingOrNewLookup` to `isExistingEntityLookup`
@ `SaverWithRevision.SaveEntitiesWithRevision()`

* rename primary ctor param `registeredLocksLookup` to `registeredLocksKeyByType` @ RetryCrawlWorker.cs
* inline `using LinqKit` @ (Sub)ReplySaver.cs
@ crawler

* now will log with already retried times and no exception @ `TbmDbContext.LogDbUpdateConcurrencyException()`
@ shared

* enable insecption `resharper_unnecessary_whitespace_highlighting` @ .editorconfig
@ c#
  • Loading branch information
n0099 committed May 18, 2024
1 parent 6870be0 commit 1413923
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 21 deletions.
1 change: 1 addition & 0 deletions c#/.editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ resharper_suggest_var_or_type_simple_types_highlighting = hint
resharper_entity_framework_model_validation_unlimited_string_length_highlighting = none
resharper_move_local_function_after_jump_statement_highlighting = none
resharper_separate_local_functions_with_jump_statement_highlighting = none
resharper_unnecessary_whitespace_highlighting = warning

###############################
# .NET Coding Conventions #
Expand Down
3 changes: 2 additions & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ public virtual void Dispose()
[SuppressMessage("Major Bug", "S1751:Loops with at most one iteration should be refactored")]
public SaverChangeSet<TPost>? SaveCrawled(CancellationToken stoppingToken = default)
{
var retryTimes = 0;
while (true)
{
using var db = DbContextFactory(Fid); // dispose after each loop when retrying
Expand All @@ -66,7 +67,7 @@ public virtual void Dispose()
}
catch (DbUpdateConcurrencyException e)
{
db.LogDbUpdateConcurrencyException(e);
db.LogDbUpdateConcurrencyException(e, ref retryTimes);
}
finally
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public class AuthorRevisionSaver(
var newRevisionOfExistingUsers = existingRevisionOfExistingUsers

// filter out revisions with the same DiscoveredAt to prevent duplicate keys
// when some fields get updated more than one time in a second
// when some fields get updated more than once in a second
.Where(t => t.Existing.DiscoveredAt != t.NewInPost.DiscoveredAt
&& isValueChangedPredicate(t.Existing.Value, t.NewInPost.Value))
.Select(t => (t.Uid, t.NewInPost.Value, t.NewInPost.DiscoveredAt));
Expand Down
4 changes: 1 addition & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using PredicateBuilder = LinqKit.PredicateBuilder;

namespace tbm.Crawler.Tieba.Crawl.Saver.Post;

public class ReplySaver(
Expand Down Expand Up @@ -49,7 +47,7 @@ public override SaverChangeSet<ReplyPost> Save(CrawlerDbContext db)
{
var changeSet = Save(db, r => r.Pid,
r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid},
PredicateBuilder.New<ReplyPost>(r => Posts.Keys.Contains(r.Pid)));
LinqKit.PredicateBuilder.New<ReplyPost>(r => Posts.Keys.Contains(r.Pid)));

replyContentImageSaver.Save(db, changeSet.NewlyAdded);
PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke;
Expand Down
4 changes: 1 addition & 3 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
using LinqKit;

namespace tbm.Crawler.Tieba.Crawl.Saver.Post;

public class SubReplySaver(
Expand Down Expand Up @@ -44,7 +42,7 @@ public override SaverChangeSet<SubReplyPost> Save(CrawlerDbContext db)
{
var changeSet = Save(db, sr => sr.Spid,
sr => new SubReplyRevision {TakenAt = sr.UpdatedAt ?? sr.CreatedAt, Spid = sr.Spid},
PredicateBuilder.New<SubReplyPost>(sr => Posts.Keys.Contains(sr.Spid)));
LinqKit.PredicateBuilder.New<SubReplyPost>(sr => Posts.Keys.Contains(sr.Spid)));
PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke;

return changeSet;
Expand Down
12 changes: 6 additions & 6 deletions c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,21 @@ public abstract partial class SaverWithRevision<TBaseRevision>
protected void SaveEntitiesWithRevision<TEntity, TRevision>(
CrawlerDbContext db,
Func<TEntity, TRevision> revisionFactory,
ILookup<bool, TEntity> existingOrNewLookup,
ILookup<bool, TEntity> isExistingEntityLookup,
Func<TEntity, TEntity> existingSelector,
UserSaver.FieldChangeIgnorance? userFieldUpdateIgnorance = null,
UserSaver.FieldChangeIgnorance? userFieldRevisionIgnorance = null)
where TEntity : class
where TEntity : RowVersionedEntity
where TRevision : BaseRevisionWithSplitting
{
db.Set<TEntity>().AddRange(existingOrNewLookup[false]); // newly added
var newRevisions = existingOrNewLookup[true].Select(newEntity =>
db.Set<TEntity>().AddRange(isExistingEntityLookup[false]); // newly added
var newRevisions = isExistingEntityLookup[true].Select(newEntity =>
{
var entityInTracking = existingSelector(newEntity);
var entityEntry = db.Entry(entityInTracking);
// this will mutate existingEntity which is referenced by entry
entityEntry.CurrentValues.SetValues(newEntity);
entityEntry.CurrentValues.SetValues(newEntity); // mutate existingEntity that referenced by entry
entityEntry.Property(e => e.Version).IsModified = false; // newEntity.Version will always be default 0
bool IsTimestampingFieldName(string name) => name is nameof(BasePost.LastSeenAt)
or nameof(TimestampedEntity.CreatedAt) or nameof(TimestampedEntity.UpdatedAt);
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Worker/RetryCrawlWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace tbm.Crawler.Worker;

public class RetryCrawlWorker(
ILogger<RetryCrawlWorker> logger,
IIndex<CrawlerLocks.Type, CrawlerLocks> registeredLocksLookup,
IIndex<CrawlerLocks.Type, CrawlerLocks> registeredLocksKeyByType,
CrawlPost crawlPost,
Func<Owned<CrawlerDbContext.NewDefault>> dbContextDefaultFactory,
Func<Owned<ThreadLateCrawlFacade.New>> threadLateCrawlFacadeFactory,
Expand All @@ -16,7 +16,7 @@ protected override async Task DoWork(CancellationToken stoppingToken)
foreach (var lockType in Enum.GetValues<CrawlerLocks.Type>())
{
if (stoppingToken.IsCancellationRequested) return;
var failed = registeredLocksLookup[lockType].RetryAllFailed();
var failed = registeredLocksKeyByType[lockType].RetryAllFailed();
if (failed.Count == 0) continue; // skip current lock type if there's nothing needs to retry
if (lockType == CrawlerLocks.Type.ThreadLate)
{
Expand Down
15 changes: 10 additions & 5 deletions c#/shared/src/Db/TbmDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,18 @@ namespace tbm.Shared.Db;

public abstract class TbmDbContext(ILogger<TbmDbContext> logger) : DbContext
{
public void LogDbUpdateConcurrencyException(DbUpdateConcurrencyException e) =>
logger.LogWarning(e, "DbUpdateConcurrencyException: {}",
SharedHelper.UnescapedJsonSerialize(e.Entries
public void LogDbUpdateConcurrencyException(DbUpdateConcurrencyException e, ref int retryTimes)
{
retryTimes++;
logger.LogWarning("Failed to update following entities after {} times: {}",
retryTimes, SharedHelper.UnescapedJsonSerialize(e.Entries
.GroupBy(ee => ee.Entity.GetType().Name)
.ToDictionary(g => g.Key, g => g.Count())));
}

public int SaveChangesForUpdate()
{
var retryTimes = 0;
while (true)
{
try
Expand All @@ -27,7 +31,7 @@ public int SaveChangesForUpdate()
}
catch (DbUpdateConcurrencyException e)
{
LogDbUpdateConcurrencyException(e);
LogDbUpdateConcurrencyException(e, ref retryTimes);
foreach (var entry in e.Entries)
{
var existing = entry.GetDatabaseValues();
Expand All @@ -40,6 +44,7 @@ public int SaveChangesForUpdate()

public async Task<int> SaveChangesForUpdateAsync(CancellationToken stoppingToken = default)
{
var retryTimes = 0;
while (true)
{
try
Expand All @@ -48,7 +53,7 @@ public async Task<int> SaveChangesForUpdateAsync(CancellationToken stoppingToken
}
catch (DbUpdateConcurrencyException e)
{
LogDbUpdateConcurrencyException(e);
LogDbUpdateConcurrencyException(e, ref retryTimes);
foreach (var entry in e.Entries)
{
var existing = await entry.GetDatabaseValuesAsync(stoppingToken);
Expand Down

0 comments on commit 1413923

Please sign in to comment.