In [None]:
from stelar.client import Client, TaskSpec

c = Client(base_url="https://klms.stelar.gr", username="***", password="***")

In [2]:
profile_example_dataset = c.datasets['stelardataprofiler-datasets-examples']

## Tabular Resources (CSV, XLS, XLSX, SHP)

### Example for CSV - No geometries

In [None]:
tabular_t = TaskSpec(tool="data-profiler", name="Tabular Profiling")

tabular_t.i(data="426fd66c-543b-4f3e-a8ff-b1ca94e6c6ac")


tabular_t.d(alias="d0", dset=profile_example_dataset)
tabular_t.p(
    header=0,
    sep='|',
    #ts_mode=False
    #time_column=DATE
)

tabular_t.o(
    profile={
        "url": "s3://test-bucket/stelardataprofiler-experiments/tabular-profile.json",
        "resource": {
            "name": "Profile for tabular_example.csv",
            "relation": "profile",
            "format": "json",
        },
        "dataset":"d0",
    }
)


proc = c.processes["d66764f1-c310-4751-9aa3-816703d5d316"]

tabular_t = proc.run(tabular_t)

### Example CSV - Geometries (+ extra_geometry_columns)

In [None]:
tabular_t = TaskSpec(tool="data-profiler", name="Tabular Profiling")

tabular_t.i(data="73c32f1d-16c4-4543-a0be-ee7b9d8ce8b8")


tabular_t.d(alias="d0", dset=profile_example_dataset)
tabular_t.p(
    header=0,
    sep='|',
    extra_geometry_columns = [
			{
			"longitude": "lon",
			"latitude": "lat"
			}
    ],
		light_mode = False,
		crs="EPSG:4326",
		num_cat_perc_threshold=0.5,
		max_freq_distr=10,
		eps_distance=1000
)

tabular_t.o(
    profile={
        "url": "s3://test-bucket/stelardataprofiler-experiments/tabular-vector-profile.json",
        "resource": {
            "name": "Profile for tabular_vector_example.csv",
            "relation": "profile",
            "format": "json",
        },
        "dataset":"d0",
    }
)


proc = c.processes["d66764f1-c310-4751-9aa3-816703d5d316"]

tabular_t = proc.run(tabular_t)

### Example SHP

In [3]:
tabular_t = TaskSpec(tool="data-profiler", name="Tabular Profiling - SHP")

tabular_t.i(data=["dc39a393-2ab9-4dfa-896f-db9c29087f16", "cb669577-f063-46e9-98b9-fdba00823b9c", "5b0434d3-b4fc-4388-8a7b-4ee45136c4eb", 
                  "fe3b7925-920f-411f-b448-79c081447ebe","56656255-3d72-4eec-b310-3384175602d4"])


tabular_t.d(alias="d0", dset=profile_example_dataset)
tabular_t.p(
    header=0,
    sep='|',
    extra_geometry_columns = [
			{
			"longitude": "POINT_X",
			"latitude": "POINT_Y"
			}
    ],
		light_mode = False,
		crs="EPSG:4326",
		num_cat_perc_threshold=0.5,
		max_freq_distr=10,
		eps_distance=1000
)

tabular_t.o(
    profile={
        "url": "s3://test-bucket/stelardataprofiler-experiments/vector_example.json",
        "resource": {
            "name": "Profile for vector_example.shp",
            "relation": "profile",
            "format": "json",
        },
        "dataset":"d0",
    }
)


proc = c.processes["d66764f1-c310-4751-9aa3-816703d5d316"]

tabular_t = proc.run(tabular_t)

### Example Type Detection + Custom Profile

#### Type Detection

In [7]:
tabular_t = TaskSpec(tool="data-profiler", name="Type Detection")

tabular_t.i(data=["dc39a393-2ab9-4dfa-896f-db9c29087f16", "cb669577-f063-46e9-98b9-fdba00823b9c", "5b0434d3-b4fc-4388-8a7b-4ee45136c4eb", 
                  "fe3b7925-920f-411f-b448-79c081447ebe","56656255-3d72-4eec-b310-3384175602d4"])


tabular_t.d(alias="d0", dset=profile_example_dataset)
tabular_t.p(
    type_detection_mode=True,
    header=0,
    sep='|',
    extra_geometry_columns = [
			{
			"longitude": "POINT_X",
			"latitude": "POINT_Y"
			}
    ],
		light_mode = False,
		crs="EPSG:4326",
		num_cat_perc_threshold=0.5,
		max_freq_distr=10,
		eps_distance=1000
)

tabular_t.o(
    types={
        "url": "s3://test-bucket/stelardataprofiler-experiments/vector_example_td.json",
        "resource": {
            "name": "Type Detection for vector_example.shp",
            "relation": "profile_type_detection",
            "format": "json",
        },
        "dataset":"d0",
    }
)


proc = c.processes["d66764f1-c310-4751-9aa3-816703d5d316"]

tabular_t = proc.run(tabular_t)

#### Custom Profile

In [8]:
tabular_t = TaskSpec(tool="data-profiler", name="Tabular Profiling - SHP with custom types")

tabular_t.i(data=["dc39a393-2ab9-4dfa-896f-db9c29087f16", "cb669577-f063-46e9-98b9-fdba00823b9c", "5b0434d3-b4fc-4388-8a7b-4ee45136c4eb", 
                  "fe3b7925-920f-411f-b448-79c081447ebe","56656255-3d72-4eec-b310-3384175602d4"],
                  type_detection_file="3f72a69b-63b5-4ef1-9050-dd1705f689b2")


tabular_t.d(alias="d0", dset=profile_example_dataset)
tabular_t.p(
    header=0,
    sep='|',
    extra_geometry_columns = [
			{
			"longitude": "POINT_X",
			"latitude": "POINT_Y"
			}
    ],
		light_mode = False,
		crs="EPSG:4326",
		num_cat_perc_threshold=0.5,
		max_freq_distr=10,
		eps_distance=1000
)

tabular_t.o(
    profile={
        "url": "s3://test-bucket/stelardataprofiler-experiments/vector_example_custom.json",
        "resource": {
            "name": "Profile with type detection for vector_example.shp",
            "relation": "profile",
            "format": "json",
        },
        "dataset":"d0",
    }
)


proc = c.processes["d66764f1-c310-4751-9aa3-816703d5d316"]

tabular_t = proc.run(tabular_t)