In [None]:
import sys
import warnings
import panel as pn
pn.extension()
warnings.filterwarnings("ignore")
from stelardataprofiler import (
    profile_tabular,
    write_to_json,
	read_config,
    profile_tabular_with_config,
	prepare_mapping
)


## 1st Approach - Give parameters in the code

### Dataset Path and parameters

In [None]:
my_file_path = '../datasets/tabular_vector_example.csv'
header = 0
sep = '|'
extra_geometry_columns = [
    {
        'longitude': 'lon',
        'latitude': 'lat'
    }
]

### Type Detection (Optional)

#### Declare inside the code

In [None]:
from stelardataprofiler import (
    type_detection,
    type_detection_with_config
)

types_dict = type_detection(input_path=my_file_path, header=header, sep=sep,extra_geometry_columns=extra_geometry_columns)

write_to_json(types_dict, './output/tabular_vector_types_dict.json')

types_dict

#### Using JSON

In [None]:
# JSON formatted string
config_json = '''{
	"input": {
		"files": "../datasets/tabular_vector_example.csv",
		"sep": "|",
		"header": 0,
		"extra_geometry_columns" : [
			{
			"longitude": "lon",
			"latitude": "lat"
			}
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/tabular_vector_types_dict.json"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)

type_detection_with_config(config=config_dict)

types_dict = read_config('./output/tabular_vector_types_dict.json')
types_dict

#### The user can view and edit the automatically detected types for each column e.g. Can change Textual to Categorical and/or Numeric to Categorical and/or Geometry to Categorical, etc.

#### <font color='red'>Warning: </font> Each type must have their corresponding parameters if the type requires it. 


In [None]:
# Turn housenumber column from Textual to Categorical
types_dict['housenumber']['type'] = 'Categorical'

# Turn wkt column from Geometry to Categorical
types_dict['wkt']['type'] = 'Categorical'

# Turn postcode column from Numeric to Categorical
types_dict['postcode']['type'] = 'Categorical'

write_to_json(types_dict, './output/tabular_vector_types_dict.json')

types_dict

### Run Profiler

In [None]:
my_file_path = '../datasets/tabular_vector_example.csv'
header = 0
sep = '|'
extra_geometry_columns = [
    {
        'longitude': 'lon',
        'latitude': 'lat'
    }
]


# include extra_geometry_columns using latitude and longitude column names lat, lon
profile_dict  = profile_tabular(input_path=my_file_path, header=header, sep=sep, extra_geometry_columns=extra_geometry_columns)

# with custom types_dict
#profile_dict  = profile_tabular(input_path=my_file_path, header=header, sep=sep, extra_geometry_columns=extra_geometry_columns, types_dict=types_dict)

profile_dict

### Write resulting dictionary

In [None]:
write_to_json(profile_dict,  './output/tabular_vector_profile.json')
profile_dict

## 2nd Approach - Give JSON configuration

### Extract configuration settings

In [None]:
# JSON formatted string
config_json = '''{
	"input": {
		"files": ["../datasets/tabular_vector_example.csv"],
		"sep": "|",
		"header": 0,
		"extra_geometry_columns" : [
			{
			"longitude": "lon",
			"latitude": "lat"
			}
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/tabular_vector_profile.json"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# If we want to add the custom types_dict
config_json2 = '''{
	"input": {
		"files": ["../datasets/tabular_vector_example.csv", "./output/tabular_vector_types_dict.json"],
		"sep": "|",
		"header": 0,
		"extra_geometry_columns" : [
			{
			"longitude": "lon",
			"latitude": "lat"
			}
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/tabular_vector_profile.json"
	}
}'''

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)
# config_dict = read_config(config_json2)

config_dict

### Run profiler

In [None]:
profile_tabular_with_config(config_dict)

## Write mapping .tll

In [None]:
# JSON formatted string
config_json = '''{
	"output": {
		"json": "./output/tabular_vector_profile.json",
		"rdf": "./output/tabular_vector_results.rdf",
		"serialization" : "turtle"
	},
	"profile": {
		"type": "tabular"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)

prepare_mapping(config_dict)