In [None]:
import sys
import warnings
import panel as pn
pn.extension()
warnings.filterwarnings("ignore")
from stelardataprofiler import (
    profile_tabular,
    write_to_json,
	read_config,
    profile_tabular_with_config,
	prepare_mapping
)

## 1st Approach - Give parameters in the code

### Dataset Path and parameters

In [None]:
my_file_path = '../datasets/vector_example.shp'

extra_geometry_columns = [
    {
        'longitude': 'POINT_X',
        'latitude': 'POINT_Y'
    }
]

### Type Detection (Optional)

#### Declare inside the code

In [None]:
from stelardataprofiler import (
    type_detection,
    type_detection_with_config
)

types_dict = type_detection(input_path=my_file_path,extra_geometry_columns=extra_geometry_columns)

write_to_json(types_dict, './output/vector_shp_types_dict.json')

types_dict

#### Using JSON

In [None]:
# JSON formatted string
config_json = '''{
	"input": {
		"files": "../datasets/vector_example.shp",
		"extra_geometry_columns" : [
            {
            "longitude": "POINT_X",
            "latitude": "POINT_Y"
            }
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/vector_shp_types_dict.json"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)

type_detection_with_config(config=config_dict)

types_dict = read_config('./output/vector_shp_types_dict.json')
types_dict

#### The user can view and edit the automatically detected types for each column e.g. Can change Textual to Categorical and/or Numeric to Categorical and/or Geometry to Categorical, etc.

#### <font color='red'>Warning: </font> Each type must have their corresponding parameters if the type requires it. 


In [None]:
# Turn ADDR_GR column from Textual to Categorical
types_dict['ADDR_GR']['type'] = 'Categorical'

# Turn geometry column from Geometry to Categorical
types_dict['geometry']['type'] = 'Categorical'

# Turn PHONE column from Numeric to Categorical
types_dict['PHONE']['type'] = 'Categorical'

write_to_json(types_dict, './output/vector_shp_types_dict.json')

types_dict

### Run Profiler

In [None]:
my_file_path = '../datasets/vector_example.shp'

extra_geometry_columns = [
    {
        'longitude': 'POINT_X',
        'latitude': 'POINT_Y'
    }
]

# include extra_geometry_columns using latitude and longitude column names POINT_Y, POINT_X
profile_dict  = profile_tabular(input_path=my_file_path, extra_geometry_columns=extra_geometry_columns)

# with custom types_dict
# profile_dict  = profile_tabular(input_path=my_file_path, extra_geometry_columns=extra_geometry_columns, types_dict=types_dict)

profile_dict

### Write resulting dictionary

In [None]:
write_to_json(profile_dict,  './output/vector_shp_profile.json')
profile_dict

## 2nd Approach - Give JSON configuration

### Extract configuration settings

In [None]:
config_json = '''{
	"input": {
		"files": ["../datasets/vector_example.shp"],
		"extra_geometry_columns" : [
			{
			"longitude": "POINT_X",
			"latitude": "POINT_Y"
			}
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/vector_shp_profile.json"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# If we want to add the custom types_dict
config_json2 = '''{
	"input": {
		"files": ["../datasets/vector_example.shp", "./output/vector_shp_types_dict.json"],
		"extra_geometry_columns" : [
			{
			"longitude": "POINT_X",
			"latitude": "POINT_Y"
			}
		],
		"_comment": "Optional parameters",
		"light_mode": false,
		"crs": "EPSG:4326",
		"num_cat_perc_threshold": 0.5,
		"max_freq_distr": 10,
		"eps_distance": 1000
	},
	"output": {
		"json": "./output/vector_shp_profile.json"
	}
}'''

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)
# config_dict = read_config(config_json2)

config_dict

### Run profiler

In [None]:
profile_tabular_with_config(config_dict)

## Write mapping .tll

In [None]:
# JSON formatted string
config_json = '''{
	"output": {
		"json": "./output/vector_shp_profile.json",
		"rdf": "./output/vector_shp_results.rdf",
		"serialization" : "turtle"
	},
	"profile": {
		"type": "tabular"
	}
}'''

# JSON configuration file path
# e.g. config_json: str = './config_template.json'

# Extract configuration settings in a dictionary
config_dict = read_config(config_json)

prepare_mapping(config_dict)