/
CommonInSavers.cs
127 lines (113 loc) · 7.61 KB
/
CommonInSavers.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
using Microsoft.EntityFrameworkCore.ChangeTracking;
namespace tbm.Crawler.Tieba.Crawl.Saver;
public abstract class CommonInSavers<TBaseRevision> : StaticCommonInSavers
where TBaseRevision : class, IRevision
{
private readonly ILogger<CommonInSavers<TBaseRevision>> _logger;
protected CommonInSavers(ILogger<CommonInSavers<TBaseRevision>> logger) => _logger = logger;
protected virtual Dictionary<string, ushort> RevisionNullFieldsBitMasks => throw new NotImplementedException();
protected virtual Dictionary<Type, Action<TbmDbContext, IEnumerable<TBaseRevision>>>
RevisionSplitEntitiesUpsertPayloads => throw new NotImplementedException();
protected void SavePostsOrUsers<TPostOrUser, TRevision>(
TbmDbContext db,
FieldChangeIgnoranceCallbackRecord userFieldChangeIgnorance,
Func<TPostOrUser, TRevision> revisionFactory,
ILookup<bool, TPostOrUser> existingOrNewLookup,
Func<TPostOrUser, TPostOrUser> existingSelector)
where TPostOrUser : class where TRevision : class, IRevision
{
db.Set<TPostOrUser>().AddRange(existingOrNewLookup[false]); // newly added
var newRevisions = existingOrNewLookup[true].Select(newPostOrUser =>
{
var postOrUserInTracking = existingSelector(newPostOrUser);
var entry = db.Entry(postOrUserInTracking);
entry.CurrentValues.SetValues(newPostOrUser); // this will mutate postOrUserInTracking which is referenced by entry
bool IsTimestampingFieldName(string name) => name is nameof(IPost.LastSeenAt)
or nameof(ITimestampingEntity.CreatedAt) or nameof(ITimestampingEntity.UpdatedAt);
// rollback changes that overwrite original values with the default value 0 or null
// for all fields of ITimestampingEntity and IPost.LastSeenAt
// this will also affect the entity instance which postOrUserInTracking references to it
entry.Properties.Where(p => IsTimestampingFieldName(p.Metadata.Name))
.Where(p => p.IsModified).ForEach(p => p.IsModified = false);
var revision = default(TRevision);
var revisionNullFieldsBitMask = 0;
var whichPostType = typeof(TPostOrUser);
var entryIsUser = whichPostType == typeof(TiebaUser);
foreach (var p in entry.Properties)
{
var pName = p.Metadata.Name;
if (!p.IsModified || IsTimestampingFieldName(pName)) continue;
if (FieldChangeIgnorance.Update(whichPostType, pName, p.OriginalValue, p.CurrentValue)
|| (entryIsUser && userFieldChangeIgnorance.Update(whichPostType, pName, p.OriginalValue, p.CurrentValue)))
{
p.IsModified = false;
continue; // skip following revision check
}
if (FieldChangeIgnorance.Revision(whichPostType, pName, p.OriginalValue, p.CurrentValue)
|| (entryIsUser && userFieldChangeIgnorance.Revision(whichPostType, pName, p.OriginalValue, p.CurrentValue))) continue;
if (IsLatestReplierUser(pName, p, entry)) return null;
if (!RevisionPropertiesCache[typeof(TRevision)].TryGetValue(pName, out var revisionProp))
{
object? ToHexWhenByteArray(object? value) => value is byte[] bytes ? "0x" + Convert.ToHexString(bytes).ToLowerInvariant() : value;
_logger.LogWarning("Updating field {} is not existing in revision table, " +
"newValue={}, oldValue={}, newObject={}, oldObject={}",
pName, ToHexWhenByteArray(p.CurrentValue), ToHexWhenByteArray(p.OriginalValue),
Helper.UnescapedJsonSerialize(newPostOrUser), Helper.UnescapedJsonSerialize(entry.OriginalValues.ToObject()));
}
else
{
revision ??= revisionFactory(postOrUserInTracking);
// quote from MSDN https://learn.microsoft.com/en-us/dotnet/api/system.reflection.propertyinfo.setvalue
// If the property type of this PropertyInfo object is a value type and value is null
// the property will be set to the default value for that type.
// https://stackoverflow.com/questions/3049477/propertyinfo-setvalue-and-nulls
// this is a desired behavior to convert null values produced by ExtensionMethods.NullIfZero()
// back to zeros for some revision fields that had been entity splitting
// these split tables will only contain two Superkeys: the Candidate/Primary Key and the field gets split out
// so it's no longer necessary to use NullFieldsBitMasks to identify between
// the real null values and unchanged fields that have null as a placeholder
revisionProp.SetValue(revision, p.OriginalValue);
if (p.OriginalValue != null) continue;
// fields that have already split out will not exist in RevisionNullFieldsBitMasks
if (RevisionNullFieldsBitMasks.TryGetValue(pName, out var whichBitToMask))
{ // mask the corresponding field bit with 1
revisionNullFieldsBitMask |= whichBitToMask;
}
}
}
if (revision != null) revision.NullFieldsBitMask = (ushort?)revisionNullFieldsBitMask.NullIfZero();
return revision;
}).OfType<TRevision>().ToList();
if (!newRevisions.Any()) return; // quick exit to prevent execute sql with WHERE FALSE clause
_ = db.Set<TRevision>().UpsertRange(newRevisions.Where(r => !r.IsAllFieldsIsNullExceptSplit())).NoUpdate().Run();
newRevisions.OfType<RevisionWithSplitting<TBaseRevision>>()
.SelectMany(r => r.SplitEntities)
.GroupBy(p => p.Key, p => p.Value)
.ForEach(g => RevisionSplitEntitiesUpsertPayloads[g.Key](db, g));
}
private static bool IsLatestReplierUser(string pName, PropertyEntry p, EntityEntry entry)
{
// ThreadCrawlFacade.ParseLatestRepliers() will save users with empty string as portrait
// they will soon be updated by (sub) reply crawler after it find out the latest reply
// so we should ignore its revision update for all fields
// ignore entire record is not possible via FieldChangeIgnorance.Revision() since it can only determine one field at the time
if (pName != nameof(TiebaUser.Portrait) || p.OriginalValue is not "") return false;
// invokes OriginalValues.ToObject() to get a new instance since postOrUserInTracking is reference to the changed one
var user = (TiebaUser)entry.OriginalValues.ToObject();
// create another user instance with only fields of latest replier filled
var latestReplier = ThreadCrawlFacade.LatestReplierFactory(user.Uid, user.Name, user.DisplayName);
// if they are same by fields values, the original one is a latest replier that previously generated by ParseLatestRepliers()
return IsSameUser(user, latestReplier);
}
private static bool IsSameUser(TiebaUser a, TiebaUser b) =>
a.Uid == b.Uid
&& a.Name == b.Name
&& a.DisplayName == b.DisplayName
&& a.Portrait == b.Portrait
&& a.PortraitUpdatedAt == b.PortraitUpdatedAt
&& a.Gender == b.Gender
&& a.FansNickname == b.FansNickname
&& (a.Icon == b.Icon
|| (a.Icon != null && b.Icon != null && a.Icon.SequenceEqual(b.Icon)))
&& a.IpGeolocation == b.IpGeolocation;
}