# lsad.ipynb -- logseq assets downloader

Go through all the .md files in a logseq graph, find all uploaded images and pdf files, download them to the ../assets/ folder, and change the link to the local file.

Typical image link: `![](https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2Fmorawa%2FGhQGPfOOrV.png?alt=media&token=4abeaf62-63c5-4859-97a5-91ace19e5dd7)`  
Should be changed to `![](<../assets/<page name>_img<image number>.(png|jpg))`  

Typical pdf link: `{{pdf(https://firebasestorage.googleapis.com/v0/b/firescript-577a2.appspot.com/o/imgs%2Fapp%2Fmorawa%2FgAFZMChTVX.%20Standard%20notations%20for%20Deep%20Learning.pdf?alt=media&token=3ce4577b-bc6a-431c-9988-a40c20104af2)}}`  
Should be changed to `![pdf](../assets/<page name>_pdf<pdf number>.pdf)`

It is expected that the folder with the graph will have a `pages` and an `assets` folder.  
Only one link per line is handled. Only images with the `.png`, `.jpg`, or `.jpeg` extensions are handled.

---
To use this script, first make a copy backup copy of your graph and store it somewhere else.  
Next set the right value in the `path` variable of the second cell. This variable
should be the path to the `pages` folder in the Logseq graph.  
Once this is done run all four cells. The fourth cell will print for each file the line substitutions that would be done.
If these substitutions look OK, then set `dry_run = False` in the fourth cell and run it again.

In [None]:
import os
import requests
import re

In [None]:
path = "/home/z/Dropbox (OIST)/logseq/morawa/pages" # path for the "pages" folder in the graph
%cd $path
%pwd

In [None]:
def download_file(url, save_path):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors during download
        
        with open(save_path, 'wb') as file:
            file.write(response.content)
            
        print(f"File downloaded and saved as '{save_path}'")
    except requests.exceptions.RequestException as e:
        print(f"Error downloading the file: {e}")

In [None]:
# set dry_run = False after checking the line substitutions that will be made

dry_run = True

pdf_re = re.compile(r"(.*){{pdf[ ]*(.*)}}")
img_re = re.compile(r"(.*)(!\[\])\((.*)\)")

for filename in os.listdir(path):
    pdf_counter = 0
    img_counter = 0
    file_path = os.path.join(path, filename)

    with open(file_path, 'r') as file:
        lines = file.readlines()

    new_lines = []        
    for line in lines:
        if pdf_re.match(line):
            pdf_counter += 1
            pdf_match = pdf_re.match(line)
            pdf_url = pdf_match[2]
            # print(f"pdf match: {pdf_url}")
            save_name = filename.split('.')[0] + f"_pdf{pdf_counter}.pdf"
            save_path = os.path.join('../assets/', save_name)
            # print(f"save path: {save_path}")
            new_line = line.replace(pdf_match[0], pdf_match[1] + f"![pdf]({save_path})")
            print(line)
            print(new_line)
            new_lines.append(new_line)
            if not dry_run:
                download_file(pdf_url, save_path)
        elif img_re.match(line):
            img_counter += 1
            img_match = img_re.match(line)
            img_url = img_match[3]
            # print(f"img match: {img_url}")
            if ".png" in img_url:
                ext = ".png"    
            elif ".jpg" in img_url or ".jpeg" in img_url:
                ext = ".jpg"
            else:
                raise Exception("Unknown format for putative image file")
            save_name = filename.split('.')[0] + f"_img{img_counter}" + ext
            save_path = os.path.join('../assets/', save_name)
            # print(f"save path: {save_path}")
            new_line = line.replace(img_match[0], img_match[1] + f"![]({save_path})")
            print(line)
            print(new_line)
            new_lines.append(new_line)
            if not dry_run:
                download_file(img_url, save_path)
        else:
            new_lines.append(line)
    if not dry_run:
        with open(file_path, 'w') as file:
            file.writelines(new_lines)
            
    if pdf_counter > 0:
        print(f"---------(pdf) {filename}: {pdf_counter} --------\n")
    if img_counter > 0:
        print(f"---------(img) {filename}: {img_counter} --------\n")