Daemon.jsonrpc_file_read: read claims from a file and download them

With `lbrynet file summary` we are able to produce a file with a list of claims. With `lbrynet file read` we are able to parse that file, get the claim IDs, and then download each of the streams. ``` lbrynet file read --file=summary.txt ```
lbryio · Sep 14, 2021 · d9acdb8 · d9acdb8
1 parent a8fa654
commit d9acdb8
Show file tree

Hide file tree

Showing 2 changed files with 127 additions and 1 deletion.
diff --git a/lbry/extras/daemon/daemon.py b/lbry/extras/daemon/daemon.py
@@ -54,7 +54,7 @@
 from lbry.schema.claim import Claim
 from lbry.schema.url import URL, normalize_name
 from lbry.wallet.server.db.elasticsearch.constants import RANGE_FIELDS, REPLACEMENTS
-from lbry.extras.daemon.files import print_items
+from lbry.extras.daemon.files import print_items, parse_claim_file
 MY_RANGE_FIELDS = RANGE_FIELDS - {"limit_claims_per_channel"}
 
 if typing.TYPE_CHECKING:
@@ -2304,6 +2304,67 @@ async def jsonrpc_file_summary(self, show=None,
         return {"total_items": n_out,
                 "file": file_out}
 
+    @requires(WALLET_COMPONENT, EXCHANGE_RATE_MANAGER_COMPONENT, BLOB_COMPONENT, DATABASE_COMPONENT,
+              FILE_MANAGER_COMPONENT)
+    async def jsonrpc_file_read(self, file=None, download_directory=None,
+                                timeout=None, save_file=None, wallet_id=None,
+                                sep=";", start=1, end=0):
+        """
+        Read the claims from a file, and download the corresponding claims.
+
+        Usage:
+            file_read [--file=<file>] [--download_directory=<download_directory>]
+            [--timeout=<timeout>] [--save_file=<save_file>] [--wallet_id=<wallet_id>]
+            [--sep=<sep>] [--start=<start>] [--end=<end>]
+
+        Options:
+            --file=<file>            : (str) name of the input file from which to read the claim_ids
+            --download_directory=<download_directory>  : (str) full path to the directory to download into
+            --timeout=<timeout>      : (int) download timeout in number of seconds
+            --save_file=<save_file>  : (bool) save the file to the downloads directory
+            --wallet_id=<wallet_id>  : (str) wallet to check for claim purchase receipts
+            --sep=<sep>              : (str) string used as separator for fields (default ;)
+            --start=<start>          : (int) show claims starting from this index (default 1)
+            --end=<end>              : (int) show claims until and including this index (default 0);
+                                       if it is 0, it is the same as the last index
+        """
+        if not file:
+            return {"error": "No file to read"}
+        elif not os.path.exists(file):
+            return {"error": f'File does not exist "{file}"'}
+
+        sorted_items = parse_claim_file(file=file, sep=sep)
+
+        if not sorted_items or len(sorted_items) < 1:
+            return {"error": f'File does not contain claim IDs, "{file}"'}
+
+        list_downloaded = []
+
+        for num, item in enumerate(sorted_items, start=1):
+            if num < start:
+                continue
+            if end != 0 and num > end:
+                break
+
+            out = await self.jsonrpc_claim_search(claim_id=item["claim_id"], wallet_id=wallet_id)
+            if out["total_items"] < 1:
+                continue
+
+            txo = out["items"][-1]
+            uri = txo.meta["canonical_url"]
+
+            out = await self.jsonrpc_get(uri, file_name=None,
+                                         download_directory=download_directory,
+                                         timeout=timeout, save_file=save_file,
+                                         wallet_id=wallet_id)
+            if out:
+                list_downloaded.append(out)
+
+        n_read = len(list_downloaded)
+
+        return {"total_items": n_read,
+                "file": file}
+
     PURCHASE_DOC = """
     List and make purchases of claims.
     """

diff --git a/lbry/extras/daemon/files.py b/lbry/extras/daemon/files.py
@@ -114,3 +114,68 @@ def print_items(items=None, release_times=None, show="all",
         print("\n".join(out_list))
 
     return len(out_list), file
+
+
+def parse_claim_file(file=None, sep=";", start=1, end=0):
+    """
+    Parse a CSV file containing claim_ids.
+    """
+    if not file:
+        return False
+
+    with open(file, "r") as fdescriptor:
+        lines = fdescriptor.readlines()
+
+    n_lines = len(lines)
+    claims = []
+
+    if n_lines < 1:
+        return False
+
+    out_list = []
+
+    for num, line in enumerate(lines, start=1):
+        # Skip lines with only whitespace, and starting with # (comments)
+        line = line.strip()
+        if not line or line.startswith("#"):
+            continue
+
+        if num < start:
+            continue
+        if end != 0 and num > end:
+            break
+
+        out = "{:4d}/{:4d}".format(num, n_lines) + f"{sep} "
+
+        # Split by using the separator, and remove whitespaces
+        parts = line.split(sep)
+        clean_parts = [i.strip() for i in parts]
+
+        part = clean_parts[0]
+        found = True
+
+        for part in clean_parts:
+            # Find the 40 character long alphanumeric string
+            # without confusing it with an URI like 'lbry://@some/video#4'
+            if (len(part) == 40
+                    and "/" not in part
+                    and "@" not in part
+                    and "#" not in part
+                    and ":" not in part):
+                found = True
+                claims.append({"claim_id": part})
+                break
+            found = False
+
+        if found:
+            out_list.append(out + f"claim_id: {part}")
+        else:
+            out_list.append(out + "no 'claim_id' found, "
+                            "it must be a 40-character alphanumeric string "
+                            "without special symbols like '/', '@', '#', ':'")
+
+    print(f'Read summary: "{file}"')
+    print("\n".join(out_list))
+    n_claims = len(claims)
+    print(f"Effective claims found: {n_claims}")
+    return claims