In [None]:
from flight_fusion import FusionServiceClient, ClientOptions, TableClient
from flight_fusion.ipc.v1alpha1 import AreaSourceMetadata, SaveMode
import numpy as np
import pandas as pd

### Connection

To connect to the service we need to know the connection parameters. These can also be configured via the environment.

| parameter | env key | description |
| --- | --- | --- |
| `host` | `FF_HOST` | host to connect to |
| `port` | `FF_PORT` | port to connect to |

In [None]:
# set connection options
options = ClientOptions(host="localhost", port=50051)

# lets also define some test data to interact with the service
np.random.seed(42)
df_example = pd.DataFrame(np.random.randn(5, 3), columns=["col1", "col2", "col3"])

# and create an instance of the service client
ffc = FusionServiceClient(options)

# when interacting with a single dataset, you need a dataset client
fds = ffc.get_dataset_client(name="new_dataset", areas=["asd", "fgh"])

### Dataset Client

The `DatasetClient` exposes operations to interact with data sets defined in flight fusion.

In [None]:
# write data into dataset
fds.write_into(df_example)

# get the schema for the just written data - also when previously registered :)
# fds.schema()

In [None]:
# to retrieve data, you can load the entire table ...
df = fds.load().to_pandas()

# ... or execute a query against it
df_query = fds.query("SELECT avg(col1) FROM new_dataset").to_pandas()

df.shape, df_query.shape

In [None]:
metadata = AreaSourceMetadata()
fds.set_metadata(metadata=metadata)

meta = fds.get_metadata()

stats = fds.statistics()

In [None]:
fds_1 = ffc.get_dataset_client(name="table_1", areas=[])
fds_1.write_into(df_example)

fds_2 = ffc.get_dataset_client(name="table_2", areas=[])
fds_2.write_into(df_example)

ctx = ffc.get_context([("table_1", []), ("table_2", [])])
query = "SELECT * FROM table_1 UNION ALL SELECT * FROM table_2"
df = ctx.query(query).to_pandas()

df

### Organizing data



### Exploring registered datasets

One of the main purposes of flight fusion is to help you organize and explore your data - locally or in production. As such you can query the service to get information about available data. 