Inoreader sync: work harder to avoid dupes

When Inoreader gets an item first, and we add it, it has an Inoreader id. When canto receives the same item later, it has a real id (or one canto assigned it), which is different and as such canto doesn't recognize that these items are identical (nor should it - that's the whole point of the id) and you end up with dupes (one from the feed, one from inoreader). To resolve this, when synchronizing with Inoreader, remove old items only seen in Inoreader content and attempt to re-add them, which will properly match the items if canto has found a real copy.
themoken · Jun 18, 2015 · c64c49e · c64c49e
1 parent 20aa822
commit c64c49e
Showing 1 changed file with 31 additions and 1 deletion.
diff --git a/plugins/sync-inoreader.py b/plugins/sync-inoreader.py
@@ -299,6 +299,7 @@ def additems_inoreader(self, **kwargs):
         newcontent = kwargs["newcontent"]
         tags_to_add = kwargs["tags_to_add"]
         tags_to_remove = kwargs["tags_to_remove"]
+        remove_items = kwargs["remove_items"]
 
         stream_id = quote("feed/" + feed.URL, [])
 
@@ -318,8 +319,34 @@ def additems_inoreader(self, **kwargs):
         except Exception as e:
             log.debug("EXCEPT: %s", traceback.format_exc())
 
+        # Find items that were inserted last time, and remove them, potentially
+        # adding them to our fresh Inoreader data.
+
+        # This keeps us from getting dupes when Inoreader finds an item, we
+        # insert it, and then a real copy comes to canto but canto doesn't
+        # detect the dupe since the ids are different.
+
+        for canto_entry in newcontent["entries"][:]:
+            if "from_inoreader" not in canto_entry:
+                continue
+
+            remove_ids.append(canto_entry)
+            newcontent["entries"].remove(canto_entry)
+
+            for ino_entry in self.ino_data[:]:
+                if canto_entry["id"] == ino_entry["id"]:
+                    break
+            else:
+                self.ino_data.append(canto_entry)
+
+        # Now insert (or re-insert) items that aren't already in our data.
+
+        # NOTE: It's okay if re-inserted items are also in remove_ids, since
+        # that's processed first, and will be cancelled out by adding the tags
+        # afterwards.
+
         for ino_entry in self.ino_data:
-            for canto_entry in newcontent["entries"][:]:
+            for canto_entry in newcontent["entries"]:
                 if ino_entry["canonical"][0]["href"] != canto_entry["link"]:
                     continue
                 break
@@ -328,6 +355,9 @@ def additems_inoreader(self, **kwargs):
                 ino_entry["summary"] = ino_entry["summary"]["content"]
                 ino_entry["link"] = ino_entry["canonical"][0]["href"]
 
+                # mark this item as from inoreader (missing from feed)
+                ino_entry["from_inoreader"] = True
+
                 newcontent["entries"].append(ino_entry)
                 tags_to_add.append((ino_entry, "maintag:" + feed.name ))