### Connecting to the service

To start working with the flight fusion service, let's start a local instance.

```sh
fusion server start
```

In [None]:
from flight_fusion import FusionServiceClient, ClientOptions, AssetKey
from flight_fusion.ipc.v1alpha1 import SaveMode
import numpy as np
import pandas as pd
import warnings
import dagster
from numpy.random import default_rng

warnings.filterwarnings("ignore", category=dagster.ExperimentalWarning)

rng = default_rng()

# lets also define some test data to interact with the service
np.random.seed(42)
df_example = pd.DataFrame(np.random.randn(100, 3), columns=["col1", "col2", "col3"])
df_example["part"] = rng.choice(3, size=100, replace=True)

# and create an instance of the service client
ffc = FusionServiceClient(ClientOptions(host="localhost", port=50051))

### Discovery and metadata

In [None]:
available_datasets = ffc.list_datasets()
fds = ffc.get_dataset_client(available_datasets[3].asset_key)
fds.get_metadata()

In [None]:
ffc.list_datasets()

### Writing and reading data

#### Dataset Client

In [None]:
# when interacting with a single dataset, you need a dataset client
fds = ffc.get_dataset_client(AssetKey(["root", "partitioned"]))

In [None]:
fds.write_into(df_example, save_mode=SaveMode.SAVE_MODE_OVERWRITE, partition_by=["part"])

In [None]:
loaded = fds.load()
loaded.shape

In [None]:
loaded.schema

In [None]:
# write data into dataset
fds.write_into(df_example, SaveMode.SAVE_MODE_OVERWRITE)

In [None]:
# to retrieve data, you can load the entire table ...
df = fds.load().to_pandas()

# ... or execute a query against it
df_query = fds.query("SELECT avg(col1) FROM new_dataset").to_pandas()

df.shape, df_query.shape

In [None]:
fds.load(columns=["col2", "col3"]).to_pandas()

#### Versioned Dataset Client

In [None]:
# getting a client for versioned or regular datasets works the same way
vds = ffc.get_dataset_client(AssetKey(["demo", "new_versioned_dataset"]))

vds.write_into(df_example, SaveMode.SAVE_MODE_OVERWRITE)

In [None]:
df = vds.load(columns=["col1", "col3"])
df.to_pandas()

#### Querying multiple datasets

In [None]:
fds_1 = ffc.get_dataset_client(AssetKey(["table_1"]))
fds_1.write_into(df_example)

fds_2 = ffc.get_dataset_client(AssetKey(["table_2"]))
fds_2.write_into(df_example)

ctx = ffc.get_context([AssetKey(["table_1"]), AssetKey(["table_2"])])
query = "SELECT * FROM table_1 UNION ALL SELECT * FROM table_2"
df = ctx.query(query).to_pandas()

df


In [None]:
import pyarrow as pa
import pyarrow.dataset as ds

from datafusion import column, literal, SessionContext

ctx = SessionContext()

batch = pa.RecordBatch.from_arrays(
    [pa.array([1, 2, 3]), pa.array([4, 5, 6])],
    names=["a", "b"],
)
dataset = ds.dataset([batch])
ctx.register_dataset("t", dataset)

result = ctx.sql("SELECT a+b, a-b FROM t").collect()
result