# Example summary

- pipeline: translation --> sentiment 

- example applications: sentiment of non-english product reviews and tweets

## Code walkthrough

In [1]:
import sys 
sys.path.append('..')
from dotenv import load_dotenv
import os
load_dotenv()

TEST_DUMMY_API_KEY = os.getenv('TEST_DUMMY_API_KEY_DEV')
TEST_DUMMY_API_URL = os.getenv('TEST_DUMMY_API_URL_DEV')

from krixik import krixik
krixik.init(api_key = TEST_DUMMY_API_KEY, 
            api_url = TEST_DUMMY_API_URL)

import json
def json_print(data):
    print(json.dumps(data, indent=2))
    
# define directory for input files 
input_directory = 'input_files'

# define directory for output files
output_directory = 'output_files'

# define directory for pipeline_configs
pipeline_configs_directory = 'pipeline_configs'

%load_ext autoreload
%autoreload 2 

SUCCESS: You are now authenticated.


In [2]:
from krixik.pipeline_builder.module import Module
from krixik.pipeline_builder.pipeline import CreatePipeline

# create modules for text (vector) search
module_1 = Module(name="ocr")

# create your custom pipeline
custom = CreatePipeline(name='ocr-pipeline', 
                        module_chain=[module_1])

# pass the custom object to the krixik operator (note you can also do this by passing its config - we'll do below)
pipeline = krixik.load_pipeline(pipeline=custom)

# define a test file in your input_files directory
test_file = "open_sign.jpeg"
pipeline.test_input(local_file_path = input_directory + "/" + test_file)

SUCCESS: local file input_files/open_sign.jpeg passed pipeline input test passed


In [3]:
# process the file
output = pipeline.process(local_file_path = input_directory + "/" + test_file,
                          expire_time=60*5, 
                          local_save_directory=output_directory, 
                          wait_for_process=True,
                          verbose=True)  

INFO: hydrated input modules: {'ocr': {'model': 'tesseract-en', 'params': {}}}
INFO: symbolic_directory_path was not set by user - setting to default of /etc
INFO: file_name was not set by user - setting to random file name: krixik_generated_dxjgnyvjlx.jpeg
INFO: wait_for_process is set to True.
INFO: file will expire and be removed from you account in 300 seconds, at Thu Apr 18 13:36:41 2024 UTC
INFO: ocr-pipeline file process and input processing started...
INFO: metadata can be updated using the .update api.
INFO: This file's process_id is: 41ba67c7-cfd3-cabc-b3b8-9a1bcdcd1be0
INFO: File process and processing status:
SUCCESS: module 1 (of 1) - ocr processing complete.
SUCCESS: pipeline process complete.
SUCCESS: process output downloaded


In [4]:
output

{'status_code': 200,
 'pipeline': 'ocr-pipeline',
 'request_id': 'd99947a2-cf90-49e3-973e-3a6102366e69',
 'file_id': 'd7e39119-ef6c-42b1-8eca-7fbcc59b414a',
 'message': 'SUCCESS - output fetched for file_id d7e39119-ef6c-42b1-8eca-7fbcc59b414a.Output saved to location(s) listed in process_output_files.',
 'process_output': {'text': '', 'detections': []},
 'process_output_files': ['output_files/d7e39119-ef6c-42b1-8eca-7fbcc59b414a.json']}

In [None]:
from krixik.pipeline_builder.module import Module
from krixik.pipeline_builder.pipeline import CreatePipeline

# create modules for text (vector) search
module_1 = Module(name="ocr")
module_2 = Module(name="translate")

# create your custom pipeline
custom = CreatePipeline(name='sign-translator-pipeline', 
                        module_chain=[module_1, module_2])

# pass the custom object to the krixik operator (note you can also do this by passing its config - we'll do below)
pipeline = krixik.load_pipeline(pipeline=custom)

# text of review
review_text = "Para los trabajos que estoy haciendo me resultó muy bueno. En una hora carga la batería y dura más de 3 horas de trabajo continuo. Un golazo contar con una segunda batería. Cómodo y con buen torque. Estoy conforme."

# save to file
with open("review_data.txt", "w") as text_file:
    text_file.write(review_text)

# define a test file in your input_files directory
test_file = "review_data.txt"
pipeline.test_input(local_file_path = input_directory + "/" + test_file)