In [None]:
from flight_fusion import FlightFusionClient, ClientOptions, TableClient
from flight_fusion.ipc.v1alpha1 import AreaSourceMetadata
import numpy as np
import pandas as pd

### Connection

To connect to the service we need to know the connection parameters. These can also be configured via the environment.

| parameter | env key | description |
| --- | --- | --- |
| `host` | `FF_HOST` | host to connect to |
| `port` | `FF_PORT` | port to connect to |

In [None]:
# set connection options
options = ClientOptions(host="localhost", port=50051)

# lets also define some test data to interact with the service
np.random.seed(42)  
df_example = pd.DataFrame(np.random.randn(5, 3), columns=["col1", "col2", "col3"])

# and create an instance of the service client
ffc = FlightFusionClient(options)

### Dataset Client

The `DatasetClient` exposes operations to interact with data sets defined in flight fusion.

In [None]:
# create a basic dataset client
fds = ffc.get_dataset_client(name="new_dataset", areas=[])

# write data into dataset
fds.write_into(df_example)

# get the schema for the just written data - also when previously registered :)
fds.schema()

In [None]:
df = fds.load().to_pandas()
df.head()

In [None]:
df_query = fds.query("SELECT avg(col1) FROM new_dataset")

In [None]:
fds.get_metadata()

In [None]:
metadata = AreaSourceMetadata()
fds.set_metadata(metadata=metadata)

In [None]:
fds.statistics()

### Organizing data



### Exploring registered datasets

One of the main purposes of flight fusion is to help you organize and explore your data - locally or in production. As such you can query the service to get information about available data. 

### Data Ingestion

The fusion server supports registering data available locally as a managed dataset or registering data which resides in an external system.
Local data will be written to configured object storage and can be versioned if desired. For versioned datasets `Delta` tables are used.

- `register_memory_table`
- `register_dataset`
- `register_location`


In [None]:
query = 'SELECT * FROM catalog.schema.upload_remote WHERE A > 0 LIMIT 100'
ffc.execute_query(query).to_pandas()

In [None]:
query = "SELECT count(l) FROM catalog.schema.upload"
ffc.execute_query(query).to_pandas()

In [None]:
query = "select * from information_schema.columns"
ffc.execute_query(query).to_pandas()

In [None]:
arr = table.column("x_axis").to_pandas()
arr