In [None]:
import hashlib
import sys
from pathlib import Path

import py_fast_rsync
from faker import Faker

fake = Faker()

In [None]:
data_folder = Path("./data")
data_folder.mkdir(exist_ok=True)

In [None]:
def create_txt(size_in_kb):
    content = ""
    while len(content.encode("utf-8")) < size_in_kb * 1024:
        content += fake.text() + "\n"
    return content


for size in [1, 10, 100, 1000]:
    file_content = create_txt(size)
    file_path = data_folder / f"{size}KB.txt"
    file_path.write_text(file_content)

In [None]:
from py_fast_rsync import signature

In [None]:
with open("./data/1000KB.txt", "rb") as f:
    data_server = f.read()

data_local = data_server[:-100]

# data_server has 100 more characters
len(data_server), len(data_local)

In [None]:
# full pipeline
# 1. detect what files have some difference and enqueue
# 2. pop from queue, decide what operation to do
    # - look at delete, modified locally and remote
    # - look at permission locally and remote
    # - decide
# rsync or upload/download

In [None]:
# Scenario 1: client pulls changes from server

# 1. client sends signature to server
# POST /rsync/get_diff {path: str, signature: blob}
sig_local = signature.calculate(data_local)
print(f"sending {sys.getsizeof(sig_local)} bytes to server")

# 2. server calculates diff and hash for verification
diff = py_fast_rsync.diff(sig_local, data_server)
hash_server = hashlib.sha256(data_server).digest()

# 3. server returns diff to client
print(f"sending {sys.getsizeof(diff)} + {sys.getsizeof(hash_server)} bytes to client")

# 4. client applies diff
result = py_fast_rsync.apply(data_local, diff)

# 5. client verifies the result
hash_result = hashlib.sha256(result).digest()
assert hash_result == hash_server
assert data_server == result

# Calculate bytes saved
bytes_saved = (
    sys.getsizeof(data_server) - sys.getsizeof(diff) - sys.getsizeof(hash_server)
)
print("bytes saved", bytes_saved)

In [None]:
# Scenario 2: client pushes changes to server

# 1. client requests signature from server
# GET /rsync/get_signature?path={path}
sig_server = signature.calculate(data_server)
print(f"sending {sys.getsizeof(sig_server)} bytes to client")

# 2. client calculates diff and hash for verification
diff = py_fast_rsync.diff(sig_server, data_local)
hash_local = hashlib.sha256(data_local).digest()

# 3. client sends diff to server
# POST /rsync/apply_diff
print(f"sending {sys.getsizeof(diff)} + {sys.getsizeof(hash_local)} bytes to server")

# 4. server applies diff
result = py_fast_rsync.apply(data_server, diff)

# 5. server verifies the result
hash_result = hashlib.sha256(result).digest()
assert hash_result == hash_local
assert data_local == result

# Calculate bytes saved
bytes_saved = (
    sys.getsizeof(data_server) - sys.getsizeof(diff) - sys.getsizeof(hash_local)
)
print("bytes saved", bytes_saved)

# TODO

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()


class SignatureRequest(BaseModel):
    path: str
    signature: bytes


class DiffResponse(BaseModel):
    path: str
    diff: str
    hash: bytes


class SignatureResponse(BaseModel):
    path: str
    signature: bytes


class FileRequest(BaseModel):
    path: str


class DiffRequest(BaseModel):
    path: str
    diff: str
    hash: bytes


# Endpoint definitions


@app.post("/rsync/get_diff")
async def get_diffs(signature_requests: list[SignatureRequest]) -> list[DiffResponse]:
    pass


@app.get("/rsync/get_signature")
async def get_signatures(file_requests: list[FileRequest]) -> list[SignatureResponse]:
    pass


@app.post("/rsync/apply_diff")
async def apply_diffs(diff_requests: list[DiffRequest]) -> None:
    pass

@app.post("/rsync/upload")

@app.post("/rsync/download")

@app.post("/rsync/delete")

@app.post("/rsync/bulk") # later?

