In [None]:

import os
import tensorflow as tf
import tensorflow_transform as tft
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.components import CsvExampleGen, StatisticsGen, SchemaGen, ExampleValidator, Transform, Trainer, InfraValidator
from tfx.components.example_gen import utils
from transformers import AutoTokenizer
from typing import List




In [None]:

from tfx.proto import example_gen_pb2  # Import from tfx.proto


In [None]:

# Define paths
_pipeline_name = "qa_pipeline"
_data_path = "./data/"
_speech_data_file = "sjs.txt"  # Your speech file
_pipeline_root = os.path.join("tfx_pipelines", _pipeline_name)

In [None]:

# Initialize TFX interactive context
context = InteractiveContext(pipeline_root=_pipeline_root)


In [None]:

# Step 1: ExampleGen - Read the speech text file
example_gen = CsvExampleGen(
    input_base=_data_path,
    input_config=example_gen_pb2.Input(splits=[
        example_gen_pb2.Input.Split(name='train', pattern=_speech_data_file)
    ]),
)

In [None]:

# Run ExampleGen
context.run(example_gen)


In [None]:

# Step 2: StatisticsGen - Generate statistics for the dataset
statistics_gen = StatisticsGen(
    examples=example_gen.outputs['examples']
)

# Run StatisticsGen
context.run(statistics_gen)

# Step 3: SchemaGen - Infer schema from statistics
schema_gen = SchemaGen(
    statistics=statistics_gen.outputs['statistics'],
)

# Run SchemaGen
context.run(schema_gen)

# Step 4: ExampleValidator - Validate examples based on the schema
example_validator = ExampleValidator(
    statistics=statistics_gen.outputs['statistics'],
    schema=schema_gen.outputs['schema'],
)

# Run ExampleValidator
context.run(example_validator)

# Step 5: Transform - Tokenize input text and generate questions
@tft.transform_fn
def preprocessing_fn(inputs):
    tokenizer = AutoTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

    # Tokenize the input text
    input_tokens = tokenizer(
        inputs['text'],
        padding="max_length",
        truncation=True,
        max_length=256,
        return_tensors="tf"
    )

    # Flatten the input tokens
    flat_tokens = {key: tf.reshape(tensor, [-1]) for key, tensor in input_tokens.items()}

    return flat_tokens

transform = Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=os.path.abspath("transform_module.py"),  # Save the transform_fn to a file
    preprocessing_fn=preprocessing_fn,
)

# Run Transform
context.run(transform)

# Step 6: Trainer - Train the model
trainer = Trainer(
    module_file=os.path.abspath("train_module.py"),  # Save the trainer_fn to a file
    custom_executor_spec=trainer_pb2.ExecutorSpec(
        python_executor_spec=trainer_pb2.PythonExecutorSpec(
            classname='GenericExecutor'
        )
    ),
    examples=transform.outputs['transformed_examples'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(),
    eval_args=trainer_pb2.EvalArgs(),
)

# Run Trainer
context.run(trainer)

# Step 7: InfraValidator - Validate the serving infrastructure
infra_validator = InfraValidator(
    model=trainer.outputs['model'],
    serving_spec=infra_validator_pb2.ServingSpec(
        tensorflow_serving=infra_validator_pb2.TensorFlowServing(
            tags=["latest"]
        )
    ),
)

# Run InfraValidator
context.run(infra_validator)


In [1]:
import os
from tfx.components.example_gen.csv_example_gen.component import CsvExampleGen
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.proto import example_gen_pb2


In [4]:

# Assuming the notebook is in the project directory
_project_dir = os.getcwd()


In [5]:
_project_dir

'c:\\Users\\stefa\\3AInformatica_Prjs\\009.sensors_data_analysis\\3AAI\\SensorAnalysis\\ML_Training\\TFX\\practice01'

In [7]:

_data_path = os.path.join(_project_dir, "./data") = os.path.join(_project_dir, "./data")


In [8]:
_data_path 

In [None]:
_speech_data_file = "sjs.txt"


In [3]:

# Check if the file exists
file_path = os.path.join(_data_path, _speech_data_file)
if not os.path.exists(file_path):
    print(f"File not found: {file_path}")
else:
    print(f"File found: {file_path}")
_data_path = "./data/"
_speech_data_file = "sjs.txt"  # Your speech file
_pipeline_root = os.path.join("tfx_pipelines", _pipeline_name)




File not found: c:\Users\stefa\3AInformatica_Prjs\009.sensors_data_analysis\3AAI\SensorAnalysis\ML_Training\TFX\practice01\data\sjs.txt


In [None]:

# Assuming you have created an InteractiveContext named 'context'
context = InteractiveContext()

# Run ExampleGen
example_gen = CsvExampleGen(
    input_base=_data_path,
    input_config=example_gen_pb2.Input(splits=[
        example_gen_pb2.Input.Split(name='train', pattern=_speech_data_file)
    ]),
)
context.run(example_gen)