### Running API data ingest against Tableau hyper volume

- Date: 2024-05-06
- Author: MvS

[Snippet](https://github.com/tableau/hyper-api-samples/tree/main/Community-Supported/flights-data-incremental-refresh),  
[OpenSkyAPI](https://openskynetwork.github.io/opensky-api/rest.html),  
[Geospatial Bounding Box](http://bboxfinder.com/)



#### Install

1. Create python virtual environment containing jupyter
2. Source environment and run `pip install -r requirements-openskyapi.txt`
3. 

In [3]:
from tableauhyperapi import HyperProcess, Connection, Telemetry, TableDefinition, TableName, CreateMode, SqlType, Nullability, Inserter
from opensky_api import OpenSkyApi
import tableauserverclient as TSC
import uuid
import argparse

In [2]:
def create_hyper_database_with_flights_data(database_path):
    """
    Leverages the OpenSkyAPI (https://github.com/openskynetwork/opensky-api) to create a 
    Hyper database with flights data.  
    """
    # Create an instance of the opensky api to retrieve data from OpenSky via HTTP.
    opensky = OpenSkyApi()
    # Get the most recent state vector. Note that we can only call this method every 
    # 10 seconds as we are using the free version of the API.
    # Switzerland
    #states = opensky.get_states(bbox=(45.8389, 47.8229, 5.9962, 10.5226))
    # Berlin
    states = opensky.get_states(bbox=(51.971346, 52.918839, 12.079468, 14.414063))

    # Start up a local Hyper process. 
    with HyperProcess(telemetry=Telemetry.SEND_USAGE_DATA_TO_TABLEAU) as hyper:
        # Create a connection to the Hyper process and connect to a hyper file 
        # (create the file and replace if it exists).
        with Connection(endpoint=hyper.endpoint, database=database_path, create_mode=CreateMode.CREATE_AND_REPLACE) as connection:
            # Create a table definition with table name "flights" in the "public" schema 
            # and columns for airport data.
            table_definition = TableDefinition(
                table_name=TableName("public", "flights"),
                columns=[
                    TableDefinition.Column('baro_altitude', SqlType.double(), Nullability.NULLABLE),
                    TableDefinition.Column('callsign', SqlType.text(), Nullability.NOT_NULLABLE),
                    TableDefinition.Column('latitude', SqlType.double(), Nullability.NULLABLE),
                    TableDefinition.Column('longitude', SqlType.double(), Nullability.NULLABLE),
                    TableDefinition.Column('on_ground', SqlType.bool(), Nullability.NOT_NULLABLE),
                    TableDefinition.Column('origin_country', SqlType.text(), Nullability.NOT_NULLABLE),
                    TableDefinition.Column('time_position', SqlType.int(), Nullability.NULLABLE),
                    TableDefinition.Column('velocity', SqlType.double(), Nullability.NULLABLE),
                ])
            # Create the flights table.
            connection.catalog.create_table(table_definition)

            # Insert each of the states into the table.
            with Inserter(connection, table_definition) as inserter:
                for s in states.states:
                    inserter.add_row([s.baro_altitude, s.callsign, s.latitude, s.longitude, s.on_ground, s.origin_country, s.time_position, s.velocity])
                inserter.execute()

            num_flights = connection.execute_scalar_query(query=f"SELECT COUNT(*) from {table_definition.table_name}")
            print(f"Inserted {num_flights} flights into {database_path}.")

In [6]:
# argparser = argparse.ArgumentParser(description="Incremental refresh with flights data.")
# argparser.add_argument("server_url", help="The url of Tableau Server / Cloud, e.g. 'https://us-west-2a.online.tableau.com'")
# argparser.add_argument("site_name", help="The name of your site, e.g., use 'default' for your default site. Note that you cannot use 'default' in Tableau Cloud but must use the site name.", default='default')
# argparser.add_argument("project_name", help="The name of your project, e.g., use an empty string ('') for your default project.", default="")
# argparser.add_argument("token_name", help="The name of your authentication token for Tableau Server/Cloud. See this url for more details: https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm")
# argparser.add_argument("token_value", help="The value of your authentication token for Tableau Server/Cloud. See this url for more details: https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm")
# args = argparser.parse_args()

# First create the hyper database with flights data.
database_path = "flights.hyper"
create_hyper_database_with_flights_data(database_path)

# # Then publish the data to server.
# datasource_name_on_server = 'flights_data_set'
# # Create credentials to sign into Tableau Server.
# tableau_auth = TSC.PersonalAccessTokenAuth(args.token_name, args.token_value, args.site_name)
# publish_to_server(args.server_url, tableau_auth, args.project_name, database_path, datasource_name_on_server)

Inserted 17 flights into flights.hyper.
