In [70]:
import xarray as xr
import zarr


d = xr.DataArray([1, 2], dims=["a"], coords={"a": [1, 2]})

In [71]:
store = zarr.store.RemoteStore("local://test_zarr", asynchronous=True)

AttributeError: module 'zarr' has no attribute 'store'

In [72]:
import fsspec

mapper = fsspec.get_mapper("test_zarr")

d.to_zarr(mapper, mode="w")

<xarray.backends.zarr.ZarrStore at 0x1e90f92b140>

In [73]:
d.to_zarr("test_zarr", mode="w")

<xarray.backends.zarr.ZarrStore at 0x1e91147b440>

In [74]:
from pydantic import BaseModel, ConfigDict


class A(BaseModel):
    a: int
    model_config = ConfigDict(extra="allow")


class B(A):
    b: int
    model_config = ConfigDict(extra="allow")


class C(BaseModel):
    a: A
    model_config = ConfigDict(extra="allow")


C(a=B(a=1, b=2)).model_dump()

{'a': {'a': 1}}

In [75]:
import asyncio 
from zarr.sync import sync


async def gen(n):
    for i in range(n):
        yield i
        await asyncio.sleep(0.0)


def sync_gen(n):
    return gen(n)
    
def sync_iter(async_iterator) -> list:
    async def iter_to_list() -> list:
        return [item async for item in async_iterator]

    return sync(iter_to_list())


[v for v in [v for v in sync_iter(sync_gen(3))]]

ImportError: cannot import name 'sync' from 'zarr.sync' (C:\Users\josep\.conda\envs\tensordb\Lib\site-packages\zarr\sync.py)

In [None]:
z = zarr.zeros((10000, 10000), chunk_shape=(1000, 1000), dtype='i4')


In [None]:
dir(z._async_array)

In [None]:
z.metadata

In [None]:
z.save("test_zarr")

In [None]:
from pydantic import BaseModel, ConfigDict
import fsspec 


class A(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    a: int = 1
    b: fsspec.AbstractFileSystem

A(b=fsspec.filesystem("local")).model_dump()

In [None]:
import numpy as np
import dask.array as da
import time 

import fsspec 

a = fsspec.filesystem("local", use_listings_cache=False)


arr = da.zeros(shape=(100000000, ), chunks=(10000, ))

# arr.to_zarr(fsspec.FSMap(root="test-zarr", fs=a), overwrite=True)
with a.transaction:
    arr.to_zarr(fsspec.FSMap(root="test-zarr", fs=a), overwrite=True)
    time.sleep(5)
    # a.pipe_file("a", maybe_convert(np.array([1] * 100000000).tobytes()))


In [3]:
from lakefs_spec import LakeFSFileSystem

lfs = LakeFSFileSystem(
    host="127.0.0.1:8000",
    username="AKIAIOSFOLQUICKSTART",
    password="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
    access_key_id="AKIAIOSFOLQUICKSTART",
    secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
    # endpoint_url="http://127.0.0.1:8000",
    use_listings_cache=False
)

In [16]:
import xarray as xr
import dask.array as da
import s3fs
import fsspec
import time


for folder in ["test-zarr2", "test-zarr2"]:
    start = time.time()
    # The first execution is going to work
    path = f"s3://quickstart/main/{folder}"
    lfs = s3fs.S3FileSystem(
        key="AKIAIOSFOLQUICKSTART",
        secret="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", 
        endpoint_url="http://127.0.0.1:8000"
    )
    print(path)
    arr = da.zeros(shape=(100, 30), chunks=(2, 1)) + 7
    arr = xr.DataArray(
        arr, 
        dims=["a", "b"], 
        coords={
            "a": list(range(arr.shape[0])), 
            "b": list(range(arr.shape[1]))
        }
    ).to_dataset(name="data")
    
    
    # The error comes when it tries to clean the whole directory to rewrite the data
    # with lfs.transaction("quickstart", "main") as tx:
    fs_map = fsspec.FSMap(root=path, fs=lfs)
    arr.to_zarr(fs_map, mode="w")
        # tx.commit("Modifying the array")

    print(xr.open_zarr(fsspec.FSMap(root=path, fs=lfs)).compute())
    print(time.time() - start)

    # time.sleep(5)

s3://quickstart/main/test-zarr2
<xarray.Dataset> Size: 25kB
Dimensions:  (a: 100, b: 30)
Coordinates:
  * a        (a) int32 400B 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99
  * b        (b) int32 120B 0 1 2 3 4 5 6 7 8 9 ... 21 22 23 24 25 26 27 28 29
Data variables:
    data     (a, b) float64 24kB 7.0 7.0 7.0 7.0 7.0 7.0 ... 7.0 7.0 7.0 7.0 7.0
8.788392066955566
s3://quickstart/main/test-zarr2
<xarray.Dataset> Size: 25kB
Dimensions:  (a: 100, b: 30)
Coordinates:
  * a        (a) int32 400B 0 1 2 3 4 5 6 7 8 9 ... 91 92 93 94 95 96 97 98 99
  * b        (b) int32 120B 0 1 2 3 4 5 6 7 8 9 ... 21 22 23 24 25 26 27 28 29
Data variables:
    data     (a, b) float64 24kB 7.0 7.0 7.0 7.0 7.0 7.0 ... 7.0 7.0 7.0 7.0 7.0
9.125701904296875


In [None]:
# with lfs.transaction("quickstart", "main") as tx:
import fsspec
from zarr.storage import FSStore
fs_map = FSStore(url="lakefs://quickstart/test_zarr/test-zarr/", fs=lfs)
fs_map.fs.invalidate_cache()
print(list(fs_map))
print(xr.open_zarr(fs_map).compute())

In [None]:
lfs

In [None]:
dir(lfs.client.storage_config)

In [None]:
print(lfs.ls(f"quickstart/main/"))


In [None]:
REPO, BRANCH = "repo", "main"

with lfs.transaction("quickstart", "main") as tx:
    lfs.put(str("holaaa"), f"{REPO}/{tx.branch.id}/data")
    print(lfs.listdir(""))
    tx.commit()

In [None]:
list(lfs.ls("lakefs://test-zarr/main/here"))

In [None]:
def a():
    yield 1

def b():
    return a()

next(b())