# Config

In [141]:
from dotenv import dotenv_values

In [142]:
config = dotenv_values('../.env')

In [None]:
config.keys()

dict_keys(['SRC_DIR', 'DST_BUCKET_NAME', 'APP_KEY_ID', 'APP_KEY'])

# Tutorial

[Docs](https://b2-sdk-python.readthedocs.io/en/master/tutorial.html)

In [6]:
from b2sdk.v2 import InMemoryAccountInfo
from b2sdk.v2 import B2Api

In [7]:
info = InMemoryAccountInfo()  # store credentials, tokens and cache in memory
b2_api = B2Api(info)

In [27]:
# See keys on https://secure.backblaze.com/app_keys.htm?bznetid=166296201634409690118
# Used key: all-buckets-key
application_key_id = config['APP_KEY_ID']
application_key = config['APP_KEY']
b2_api.authorize_account("production", application_key_id, application_key)

In [28]:
b2_api.list_buckets()

[Bucket<ab9f3175869688ae78c20316,maz-test,allPrivate>,
 Bucket<9b1f916556c688ae78c20316,rasppi4,allPrivate>]

In [29]:
bucket_name = 'maz-test'
bucket = b2_api.get_bucket_by_name(bucket_name)
bucket

Bucket<ab9f3175869688ae78c20316,maz-test,allPrivate>

# Generate src file list

While repecting `.gitignore` files

In [35]:
import re
from pathlib import Path
from typing import Iterable, Union

from gitignore_parser import parse_gitignore

Types from [https://b2-sdk-python.readthedocs.io/en/master/api/sync.html#public-api-classes](https://b2-sdk-python.readthedocs.io/en/master/api/sync.html#public-api-classes)

In [135]:
Path(src_path / 'folder1').relative_to(src_path)

WindowsPath('folder1')

In [138]:
src_path: Path = Path('./src')
verbose: bool = True

excluded_files: Iterable[Union[str, re.Pattern]] = [] # see exclude_file_regexes
# excluded_dirs : Iterable[Union[str, re.Pattern]] = [] # see exclude_dir_regexes

def path_to_regex(path: Path) -> str:
    return path.as_posix().replace('/', '\/').replace('.', '\.')

def gen_exclude_lists(curr_dir: Path = src_path):
    paths_in_curr_dir = list(curr_dir.iterdir())
    possible_gitignore_path = curr_dir / '.gitignore'

    has_gitignore = possible_gitignore_path in paths_in_curr_dir
    if has_gitignore:
        # if verbose: print(f'{curr_dir} has a .gitignore')
        matches = parse_gitignore(possible_gitignore_path)

    for path in paths_in_curr_dir:
        if path.is_dir():
            gen_exclude_lists(path)
        elif path.is_file():
            if has_gitignore and matches(path):
                if verbose: print(f'ign {path}')
                # remove first part of the path as it is the source
                path = path.relative_to(src_path)
                excluded_files.append(path_to_regex(path))
        else:
            raise ValueError(f'{path} is neither file nor dir! not sure how to proceed!')

In [139]:
gen_exclude_lists()

ign src\folder1\file2.py
ign src\folder2\file2.txt
ign src\folder2\nested_folder1\file2.txt
ign src\folder3\file1.txt
ign src\folder3\file3.txt


In [140]:
excluded_files

['folder1\\/file2\\.py',
 'folder2\\/file2\\.txt',
 'folder2\\/nested_folder1\\/file2\\.txt',
 'folder3\\/file1\\.txt',
 'folder3\\/file3\\.txt']

# Synchronizer

[Docs](https://b2-sdk-python.readthedocs.io/en/master/api/sync.html)

In [8]:
import time
import sys

In [23]:
from b2sdk.v2 import ScanPoliciesManager
from b2sdk.v2 import parse_sync_folder
from b2sdk.v2 import SyncReport
from b2sdk.v2 import Synchronizer
from b2sdk.v2 import KeepOrDeleteMode, CompareVersionMode, NewerFileSyncMode

In [123]:
src = parse_sync_folder(str(src_path), b2_api)
dst = parse_sync_folder('b2://maz-test', b2_api,)

In [124]:
policies_manager = ScanPoliciesManager(
    exclude_all_symlinks=True,
    exclude_file_regexes=excluded_files,
)

In [127]:
synchronizer = Synchronizer(
    max_workers=10,
    policies_manager=policies_manager,
    dry_run=False,
    allow_empty_source=True,
    compare_version_mode=CompareVersionMode.MODTIME,
    compare_threshold=10,
    newer_file_mode=NewerFileSyncMode.REPLACE,
    keep_days_or_delete=KeepOrDeleteMode.DELETE, # delete old folders
    keep_days=10,
)

In [128]:
no_progress = True
with SyncReport(sys.stdout, no_progress) as reporter:
    synchronizer.sync_folders(
        source_folder=src,
        dest_folder=dst,
        now_millis=int(round(time.time() * 1000)),
        reporter=reporter,
    )

upload folder1/.gitignore
upload folder2/nested_folder1/.gitignore
upload folder3/.gitignore
upload folder1/file1.txt
upload folder2/nested_folder1/file1.txt
upload folder2/.gitignore
upload folder3/file2.txt
upload folder2/file1.txt


# List files in a bucket

[Docs](https://b2-sdk-python.readthedocs.io/en/master/quick_start.html#list-files)

In [106]:
bucket_name = 'maz-test'
bucket = b2_api.get_bucket_by_name(bucket_name)
bucket

Bucket<ab9f3175869688ae78c20316,maz-test,allPrivate>

## Only top-level

In [129]:
for file_version, folder_name in bucket.ls(latest_only=True):
    print(file_version.file_name, file_version.upload_timestamp, folder_name)

folder1/.gitignore 1634414393000 folder1/
folder2/.gitignore 1634414393000 folder2/
folder3/.gitignore 1634414393000 folder3/


## Recursive

In [130]:
for file_version, folder_name in bucket.ls(latest_only=True, recursive=True):
    print(file_version.file_name, file_version.upload_timestamp, folder_name)

folder1/.gitignore 1634414393000 None
folder1/file1.txt 1634414393000 None
folder2/.gitignore 1634414393000 None
folder2/file1.txt 1634414394000 None
folder2/nested_folder1/.gitignore 1634414393000 None
folder2/nested_folder1/file1.txt 1634414393000 None
folder3/.gitignore 1634414393000 None
folder3/file2.txt 1634414393000 None


# Restore

In [132]:
src = parse_sync_folder('b2://maz-test', b2_api)
dst = parse_sync_folder('./restored', b2_api,)

policies_manager = ScanPoliciesManager(
    exclude_all_symlinks=True,
)

synchronizer = Synchronizer(
    max_workers=10,
    policies_manager=policies_manager,
    dry_run=False,
    allow_empty_source=True,
    compare_version_mode=CompareVersionMode.MODTIME,
    compare_threshold=10,
    newer_file_mode=NewerFileSyncMode.REPLACE,
    keep_days_or_delete=KeepOrDeleteMode.DELETE, # delete old folders
    keep_days=10,
)

no_progress = True
with SyncReport(sys.stdout, no_progress) as reporter:
    synchronizer.sync_folders(
        source_folder=src,
        dest_folder=dst,
        now_millis=int(round(time.time() * 1000)),
        reporter=reporter,
    )

dnload folder1/.gitignore
dnload folder2/nested_folder1/file1.txt
dnload folder3/file2.txt
dnload folder2/nested_folder1/.gitignore
dnload folder1/file1.txt
dnload folder3/.gitignore
dnload folder2/file1.txt
dnload folder2/.gitignore
