In [None]:
from flight_fusion import FlightFusionClient, ClientOptions, TableClient
import numpy as np
import pandas as pd

### Connection

To connect to the service we need to know the connection parameters. These can also be configured via the environment.

| parameter | env key | description |
| --- | --- | --- |
| `host` | `FF_HOST` | host to connect to |
| `port` | `FF_PORT` | port to connect to |

In [None]:
# set connection options
options = ClientOptions(host="localhost", port=50051)

# lets also define some test data to interact with the service
np.random.seed(42)  
df_example = pd.DataFrame(np.random.randn(5, 3), columns=["col1", "col2", "col3"])

### Service Client

The most common way to start interacting with flight fusion is to create a service client. This client lets you explore the high level entities defined in the services. 

In [None]:
ffc = FlightFusionClient(options)

### Dataset Client

The `DatasetClient` exposes operations to interact with data sets defined in flight fusion.

In [None]:
# create a flight dataset client
fds = TableClient.from_options(name="new_dataset", areas=[], options=options)
fds.write_into(df_example)

In [None]:
df = fds.load().to_pandas()
df.head()

In [None]:
fds.drop()

### Data Ingestion

The fusion server supports registering data available locally as a managed dataset or registering data which resides in an external system.
Local data will be written to configured object storage and can be versioned if desired. For versioned datasets `Delta` tables are used.

- `register_memory_table`
- `register_dataset`
- `register_location`


In [None]:
np.random.seed(42)  
df = pd.DataFrame(np.random.randn(500000, 20), columns=list('qwertzuiopasdfghjkly'))
ffc.register_dataset(None, None, "upload", df)

In [None]:
np.random.seed(42)  
df = pd.DataFrame(np.random.randn(500, 3), columns=["col1", "col2", "col3"])
ffc.write_into_delta(None, None, delta_path, SaveMode.SAVE_MODE_OVERWRITE, df)

In [None]:
query = "SELECT * FROM catalog.schema.test_table"
table = ffc.execute_query(query).to_pandas()
table.sample(5)

In [None]:
ffc.register_remote_dataset(None, None, "upload_remote", "test/flight/largish2.parquet")

In [None]:
query = 'SELECT * FROM catalog.schema.upload_remote WHERE A > 0 LIMIT 100'
ffc.execute_query(query).to_pandas()

In [None]:
query = "SELECT count(l) FROM catalog.schema.upload"
ffc.execute_query(query).to_pandas()

In [None]:
query = "select * from information_schema.columns"
ffc.execute_query(query).to_pandas()

In [None]:
arr = table.column("x_axis").to_pandas()
arr