# Importa librerías

In [1]:
# Import libraries
import os
import requests
import pandas as pd
import tensorflow as tf
from tfx.components import CsvExampleGen, StatisticsGen, SchemaGen, Transform
from tfx.v1.components import ImportSchemaGen, ExampleValidator
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.orchestration.metadata import sqlite_metadata_connection_config
from ml_metadata.metadata_store import metadata_store
from tensorflow_metadata.proto.v0 import statistics_pb2, schema_pb2
import tensorflow_data_validation as tfdv
from sklearn.model_selection import train_test_split
print('TF version:', tf.__version__)
print('TFDV version:', tfdv.version.__version__)

2025-03-02 00:41:44.174950: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-02 00:41:44.175686: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-02 00:41:44.177713: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-02 00:41:44.183058: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-02 00:41:44.201917: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registe

TF version: 2.16.2
TFDV version: 1.16.1


In [2]:
from data_preparation import *
from model_creation import *
from sklearn.feature_selection import SelectKBest, chi2, f_classif

# 2.1 Lectura de datos

In [3]:
## download the dataset
# Directory of the raw data files
_data_root = './data/covertype'
# Path to the raw training data
_data_filepath = os.path.join(_data_root, 'covertype_train.csv')

# Download data
os.makedirs(_data_root, exist_ok=True)
if not os.path.isfile(_data_filepath):
    #https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/
    url = 'https://docs.google.com/uc?export= \
    download&confirm={{VALUE}}&id=1lVF1BCWLH4eXXV_YOJzjR7xZjj-wAGj9'
    r = requests.get(url, allow_redirects=True, stream=True)
    open(_data_filepath, 'wb').write(r.content)
    
df = cargar_datos(_data_filepath)

Elevation                              int64
Aspect                                 int64
Slope                                  int64
Horizontal_Distance_To_Hydrology       int64
Vertical_Distance_To_Hydrology         int64
Horizontal_Distance_To_Roadways        int64
Hillshade_9am                          int64
Hillshade_Noon                         int64
Hillshade_3pm                          int64
Horizontal_Distance_To_Fire_Points     int64
Wilderness_Area                       object
Soil_Type                             object
Cover_Type                             int64
dtype: object


In [4]:
# Se seleccionan solo las variables numéricas
df_numerico = df.select_dtypes(include=['number'])

In [5]:
df_numerico.dtypes

Elevation                             int64
Aspect                                int64
Slope                                 int64
Horizontal_Distance_To_Hydrology      int64
Vertical_Distance_To_Hydrology        int64
Horizontal_Distance_To_Roadways       int64
Hillshade_9am                         int64
Hillshade_Noon                        int64
Hillshade_3pm                         int64
Horizontal_Distance_To_Fire_Points    int64
Cover_Type                            int64
dtype: object

In [6]:
X = df_numerico.drop(['Cover_Type'], axis=1)
y = df_numerico['Cover_Type']

# 3. Selección de características

In [7]:
# Aplicar SelectKBest
selector = SelectKBest(f_classif, k=8)
X_new = selector.fit_transform(X, y)

# Obtener los nombres de las columnas seleccionadas
columnas_seleccionadas = X.columns[selector.get_support()]

# Convertir nuevamente en DataFrame
X_new = pd.DataFrame(X_new, columns=columnas_seleccionadas, index=X.index)

In [8]:
# Suponiendo que X_train es un DataFrame y y_train es una Serie o DataFrame
df_new = pd.concat([X_new, y], axis=1)

In [9]:
# Se guardan los datos en ruta definida
root_path = './data/data_new'
file_path = os.path.join(root_path,"cover_new.csv")
df_new.to_csv(file_path)

# 4. Data Pipeline

## 4.1. Configurar el contexto interactivo


In [10]:
# Define un directorio persistente para los artefactos del pipeline
pipeline_root = "/ruta/persistente/para/pipeline_outputs"

# Inicia el contexto interactivo con la ruta definida
context = InteractiveContext(pipeline_root=pipeline_root)

# Define una ruta para la base de datos SQLite
metadata_path = "/ruta/persistente/para/metadata.sqlite"

# Configura la conexión a la metadata
metadata_config = sqlite_metadata_connection_config(metadata_path)

# Inicia el contexto con metadata persistente
context = InteractiveContext(pipeline_root=pipeline_root, metadata_connection_config=metadata_config)




## 4.2. Generando Ejemplos

In [11]:
# Crear el componente ExampleGen
example_gen = CsvExampleGen(input_base=root_path)
context.run(example_gen)







0,1
.execution_id,1
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7fe5f194ad50.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']./data/data_new['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:4647660,xor_checksum:1740876120,sum_checksum:1740876120"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']./data/data_new['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:4647660,xor_checksum:1740876120,sum_checksum:1740876120"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],./data/data_new
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:4647660,xor_checksum:1740876120,sum_checksum:1740876120"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


## 4.3. Estadísticas


In [12]:
# Crear el componente StatisticsGen
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
context.run(statistics_gen)


0,1
.execution_id,2
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } StatisticsGen at 0x7fe5e6517850.inputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0.outputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].exec_properties['stats_options_json']None['exclude_splits'][]"
.component.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.component.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.exec_properties,['stats_options_json']None['exclude_splits'][]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['stats_options_json'],
['exclude_splits'],[]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"


In [13]:
stats_path = os.path.join(statistics_gen.outputs['statistics'].get()[0].uri,'Split-train','FeatureStats.pb')

# Leer el archivo en formato protobuf
with open(stats_path, "rb") as f:
    stats_proto = statistics_pb2.DatasetFeatureStatisticsList()
    stats_proto.ParseFromString(f.read())

# Visualizar estadísticas
tfdv.visualize_statistics(stats_proto)

## 4.4. Inferir el esquema


In [14]:
schema_gen = SchemaGen(
    statistics=statistics_gen.outputs['statistics'])
context.run(schema_gen)


0,1
.execution_id,3
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } SchemaGen at 0x7fe5e5a67990.inputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""].outputs['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e5ad9550.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3.exec_properties['infer_feature_shape']1['exclude_splits'][]"
.component.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.component.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e5ad9550.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
.inputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"
.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e5ad9550.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3
.exec_properties,['infer_feature_shape']1['exclude_splits'][]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e5ad9550.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
['infer_feature_shape'],1
['exclude_splits'],[]

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5e66c8c90.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2) at 0x7fe5e66cb8d0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/2
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e5ad9550.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3) at 0x7fe5e5ad93d0.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/SchemaGen/schema/3


## 4.5. Curando el esquema


In [15]:
# Obtener la URI del esquema generado por SchemaGen
schema_uri = schema_gen.outputs['schema'].get()[0].uri

# Cargar el esquema en formato protobuf
schema = tfdv.load_schema_text(schema_uri + "/schema.pbtxt")
# Definir rangos para variables numéricas
tfdv.set_domain(schema, "Hillshade_9am", schema_pb2.IntDomain(min=0, max=255))
tfdv.set_domain(schema, "Hillshade_Noon", schema_pb2.IntDomain(min=0, max=255))
tfdv.set_domain(schema, "Slope", schema_pb2.IntDomain(min=0, max=90))
tfdv.set_domain(schema, "Cover_Type", schema_pb2.IntDomain(is_categorical=True,min=0, max=6,name='Cover_Type'))

# # Definir Cover_Type como categórica
# cover_type_domain = schema_pb2.Schema().feature.add()
# cover_type_domain.name = "Cover_Type"
# cover_type_domain.type = schema_pb2.INT  # Mantiene el tipo INT
# cover_type_domain.int_domain.CopyFrom(schema_pb2.IntDomain(min=0, max=6))  # Rango permitido
# cover_type_domain.annotation.tag.append("categorical")  # Declarar como categórico

## 4.6. Entornos de esquema


In [16]:
# Seleccionar un subconjunto aleatorio (por ejemplo, el 20% de los datos)
df_service = df_new.sample(frac=0.2, random_state=42).copy()

# Eliminar la columna de la etiqueta (Cover_Type)
df_service.drop(columns=["Cover_Type"], inplace=True)

service_path = "./data/data_service"
file_service_path = os.path.join(service_path,'service_data.csv')
# Guardar el conjunto de datos de servicio
df_service.to_csv(file_service_path)

In [17]:
# Se valida que sin entorno se marca la columna Cover_Type
options = tfdv.StatsOptions(schema=schema, infer_type_from_schema=True)
serving_stats = tfdv.generate_statistics_from_csv(file_service_path, stats_options=options)
serving_anomalies = tfdv.validate_statistics(serving_stats, schema)

tfdv.display_anomalies(serving_anomalies)

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Unnamed: 0_level_0,Anomaly short description,Anomaly long description
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1
'Cover_Type',Column dropped,Column is completely missing


In [18]:
# Se utilizan los entornos
# All features are by default in both TRAINING and SERVING environments.
schema.default_environment.append('TRAINING')
schema.default_environment.append('SERVING')

# Specify that 'Cover_Type' feature is not in SERVING environment.
tfdv.get_feature(schema, 'Cover_Type').not_in_environment.append('SERVING')
serving_anomalies_with_env = tfdv.validate_statistics(serving_stats, schema, environment='SERVING')

tfdv.display_anomalies(serving_anomalies_with_env)

In [19]:
# Guardo el esquema
schema_file = 'schema.pb'
schema_folder = './schema'
schema_path = os.path.join(schema_folder,schema_file)
tfdv.write_schema_text(schema, schema_path)

In [20]:
# Muestro el esquema y los cambios que se introducieron
schema = tfdv.load_schema_text(schema_path)  # Para el archivo de texto

In [21]:
tfdv.display_schema(schema)

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'',INT,required,,-
'Cover_Type',INT,required,,min: 0; max: 6
'Elevation',INT,required,,-
'Hillshade_9am',INT,required,,min: 0; max: 255
'Hillshade_Noon',INT,required,,min: 0; max: 255
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,min: 0; max: 90
'Vertical_Distance_To_Hydrology',INT,required,,-


In [22]:
schema.default_environment

['TRAINING', 'SERVING']

In [23]:
schema_gen_actual = ImportSchemaGen(schema_file=schema_path)
context.run(schema_gen_actual)

0,1
.execution_id,4
.component,function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ImportSchemaGen at 0x7fe6e421e210.inputs{}.outputs['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4.exec_properties['schema_file']./schema/schema.pb
.component.inputs,{}
.component.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.inputs,{}
.outputs,['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4
.exec_properties,['schema_file']./schema/schema.pb

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
['schema_file'],./schema/schema.pb

0,1
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4


In [24]:
schema_channel = schema_gen_actual.outputs['schema']

In [25]:
# Crear el componente StatisticsGen
statistics_gen_actual = StatisticsGen(schema = schema_channel, examples=example_gen.outputs['examples'])
context.run(statistics_gen_actual)


0,1
.execution_id,5
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } StatisticsGen at 0x7fe5e6dc4c90.inputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4.outputs['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5f04ba410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""].exec_properties['stats_options_json']None['exclude_splits'][]"
.component.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4"
.component.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5f04ba410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
.inputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0['schema'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4"
.outputs,"['statistics'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5f04ba410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"
.exec_properties,['stats_options_json']None['exclude_splits'][]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5f04ba410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5
.span,0
.split_names,"[""train"", ""eval""]"

0,1
['stats_options_json'],
['exclude_splits'],[]

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fe5f15f75d0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"
['schema'],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Schema' (1 artifact) at 0x7fe5e6d78890.type_nameSchema._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1) at 0x7fe5f15f6210.type<class 'tfx.types.standard_artifacts.Examples'>.uri/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/ruta/persistente/para/pipeline_outputs/CsvExampleGen/examples/1
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
.type_name,Schema
._artifacts,[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
[0],function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Schema' (uri: /ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4) at 0x7fe5e6d7a890.type<class 'tfx.types.standard_artifacts.Schema'>.uri/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
.type,<class 'tfx.types.standard_artifacts.Schema'>
.uri,/ruta/persistente/para/pipeline_outputs/ImportSchemaGen/schema/4

0,1
['statistics'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'ExampleStatistics' (1 artifact) at 0x7fe5f04ba410.type_nameExampleStatistics._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
.type_name,ExampleStatistics
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'ExampleStatistics' (uri: /ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5) at 0x7fe5e6dc6dd0.type<class 'tfx.types.standard_artifacts.ExampleStatistics'>.uri/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5.span0.split_names[""train"", ""eval""]"

0,1
.type,<class 'tfx.types.standard_artifacts.ExampleStatistics'>
.uri,/ruta/persistente/para/pipeline_outputs/StatisticsGen/statistics/5
.span,0
.split_names,"[""train"", ""eval""]"


In [26]:
os.listdir(os.path.join(statistics_gen_actual.outputs['statistics'].get()[0].uri,'Split-train'))

['FeatureStats.pb']

In [27]:
stats_path_actual = os.path.join(statistics_gen_actual.outputs['statistics'].get()[0].uri,'Split-train','FeatureStats.pb')

# Leer el archivo en formato protobuf
with open(stats_path_actual, "rb") as f:
    stats_proto_actual = statistics_pb2.DatasetFeatureStatisticsList()
    stats_proto_actual.ParseFromString(f.read())

# Visualizar estadísticas
tfdv.visualize_statistics(stats_proto_actual)

In [28]:
example_validator = ExampleValidator(
    statistics=statistics_gen_actual.outputs["statistics"],
    schema=schema_channel
)
context.run(example_validator)
context.show(example_validator.outputs["anomalies"])

In [31]:
preprocessing_code = """
import tensorflow as tf
import tensorflow_transform as tft
 
# Declaración de constantes para características numéricas y categóricas.
NUMERIC_FEATURES = [
    'Elevation', 'Aspect', 'Slope',
    'Horizontal_Distance_To_Hydrology',
    'Vertical_Distance_To_Hydrology',
    'Horizontal_Distance_To_Roadways',
    'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
    'Horizontal_Distance_To_Fire_Points'
]
 
CATEGORICAL_FEATURES = [
    'Wilderness_Area', 'Soil_Type'
]
 
# Llave para la etiqueta
LABEL_KEY = 'Cover_Type'
 
def preprocessing_fn(inputs):
    outputs = {}
 
    # Transformaciones para características numéricas:
    # Las primeras tres características se escalan a un rango [0, 1].
    for key in NUMERIC_FEATURES[:3]:
        outputs[key + '_scaled_0_1'] = _fillna(tft.scale_to_0_1(
            tft.fill_missing(tf.cast(inputs[key], tf.float32))
        ))
 
    # Las siguientes tres características se escalan usando min-max scaling.
    for key in NUMERIC_FEATURES[3:6]:
        outputs[key + '_scaled_minmax'] = _fillna(tft.scale_by_min_max(
            tft.fill_missing(tf.cast(inputs[key], tf.float32))
        ))
 
    # Las restantes se escalan usando z-score normalization.
    for key in NUMERIC_FEATURES[6:]:
        outputs[key + '_scaled_zscore'] = _fillna(tft.scale_to_z_score(
            tft.fill_missing(tf.cast(inputs[key], tf.float32))
        ))
 
    # Transformaciones para características categóricas:
    for key in CATEGORICAL_FEATURES:
        # Convertir cadenas a índices de vocabulario, similar a la codificación en A.
        vocab = tft.compute_and_apply_vocabulary(
            tft.fill_missing(inputs[key])
        )
        outputs[key + '_indexed'] = vocab
        # Además, aplicar hashing para obtener una representación adicional.
        outputs[key + '_hashed'] = _fillna(tft.hash_strings(
            tft.fill_missing(inputs[key]), hash_buckets=100
        ))
 
    # La etiqueta se mantiene sin transformación.
    outputs[LABEL_KEY] = _fillna(tft.fill_missing(inputs[LABEL_KEY]))
 
    return outputs
 
def _fillna(t, value=0):
    if not isinstance(t, tf.sparse.SparseTensor):
        return t
    return tf.squeeze(tf.sparse.to_dense(
        tf.SparseTensor(t.indices, t.values, [t.dense_shape[0], 1]),
        value), axis=1)
 
"""
 
# Guardar
with open("preprocessing.py", "w") as f:
    f.write(preprocessing_code)
 

In [32]:
transform = Transform(
    examples=example_gen.outputs["examples"],
    schema=schema_channel,
    module_file=os.path.abspath("preprocessing.py"),
)
context.run(transform, enable_cache=False)

running bdist_wheel
running build
running build_py
creating build/lib
copying data_preparation.py -> build/lib
copying main-train.py -> build/lib
copying model_creation.py -> build/lib
copying preprocessing.py -> build/lib
installing to /tmp/tmph__mq8hb
running install
running install_lib
copying build/lib/data_preparation.py -> /tmp/tmph__mq8hb/.
copying build/lib/main-train.py -> /tmp/tmph__mq8hb/.
copying build/lib/model_creation.py -> /tmp/tmph__mq8hb/.
copying build/lib/preprocessing.py -> /tmp/tmph__mq8hb/.
running install_egg_info
running egg_info
creating tfx_user_code_Transform.egg-info
writing tfx_user_code_Transform.egg-info/PKG-INFO
writing dependency_links to tfx_user_code_Transform.egg-info/dependency_links.txt
writing top-level names to tfx_user_code_Transform.egg-info/top_level.txt
writing manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
reading manifest file 'tfx_user_code_Transform.egg-info/SOURCES.txt'
writing manifest file 'tfx_user_code_Transform.egg-in

!!

        ********************************************************************************
        Please avoid running ``setup.py`` directly.
        Instead, use pypa/build, pypa/installer or other
        standards-based tools.

        See https://blog.ganssle.io/articles/2021/10/setup-py-deprecated.html for details.
        ********************************************************************************

!!
  self.initialize_options()


Processing /ruta/persistente/para/pipeline_outputs/_wheels/tfx_user_code_transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa762b1412e84a469f5e9bb-py3-none-any.whl
Installing collected packages: tfx-user-code-transform
Successfully installed tfx-user-code-transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa762b1412e84a469f5e9bb
Processing /ruta/persistente/para/pipeline_outputs/_wheels/tfx_user_code_transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa762b1412e84a469f5e9bb-py3-none-any.whl
Installing collected packages: tfx-user-code-transform
Successfully installed tfx-user-code-transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa762b1412e84a469f5e9bb
Processing /ruta/persistente/para/pipeline_outputs/_wheels/tfx_user_code_transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa762b1412e84a469f5e9bb-py3-none-any.whl
Installing collected packages: tfx-user-code-transform
Successfully installed tfx-user-code-transform-0.0+e62d5f3c4ffd70016f6a39c5c5fc77b3217bfe34baa76

AttributeError: module 'tensorflow_transform' has no attribute 'fill_missing'

In [34]:
store = MetadataStore(metadata_config)

NameError: name 'MetadataStore' is not defined