# Example summary

Description: analyze the sentiment of any product reviews, tweets, etc., in any language.

Pipelines described: 

- sentiment

- translation --> sentiment.


## Code walkthrough

### boilerplate setup

In [1]:
import sys 
sys.path.append('..')
from dotenv import load_dotenv
import os
load_dotenv()

TEST_DUMMY_API_KEY = os.getenv('TEST_DUMMY_API_KEY_DEV')
TEST_DUMMY_API_URL = os.getenv('TEST_DUMMY_API_URL_DEV')

from krixik import krixik
krixik.init(api_key = TEST_DUMMY_API_KEY, 
            api_url = TEST_DUMMY_API_URL)

import json
def json_print(data):
    print(json.dumps(data, indent=2))
    
# define directory for input files 
input_directory = 'input_files'

# define directory for output files
output_directory = 'output_files'

# define directory for pipeline_configs
pipeline_configs_directory = 'pipeline_configs'

%load_ext autoreload
%autoreload 2 

SUCCESS: You are now authenticated.


### sentiment pipeline

In [8]:
from krixik.pipeline_builder.module import Module
from krixik.pipeline_builder.pipeline import CreatePipeline

# create modules for text (vector) search
module_1 = Module(name="sentiment")

# create your custom pipeline
custom = CreatePipeline(name='simple-sentiment-pipeline', 
                        module_chain=[module_1])

# pass the custom object to the krixik operator (note you can also do this by passing its config - we'll do below)
pipeline = krixik.load_pipeline(pipeline=custom)

# text of review - taken from first review of: https://www.amazon.com/Hunters-Dune-Universe-Book-18-ebook/dp/B001AY2I7I/?_encoding=UTF8&pd_rd_w=7wKHl&content-id=amzn1.sym.a6c319f7-de37-40ac-aac0-dba5df97534f&pf_rd_p=a6c319f7-de37-40ac-aac0-dba5df97534f&pf_rd_r=JE112WCZDH52EZHTAHE0&pd_rd_wg=vY7To&pd_rd_r=d92cd8b4-6b9b-4983-b1e2-9635cb7d0f17&ref_=pd_gw_reads_byrv2#customerReviews
review_text = "The plot is good and realistic, and it’s very entertaining and relaxing to read this book. The end is though accessible only to the readers of the Buttlerian Jihad."

with open(input_directory + "/" + "review_data.json", "w") as outfile:
    json.dump([{"snippet": review_text}], outfile)

# define a test file in your input_files directory
test_file = "review_data.json"
pipeline.test_input(local_file_path = input_directory + "/" + test_file)

SUCCESS: local file input_files/review_data.json passed pipeline input test passed


In [9]:
# process the file
output = pipeline.process(local_file_path = input_directory + "/" + test_file,
                          expire_time=60*5, 
                          local_save_directory=output_directory, 
                          wait_for_process=True,
                          verbose=True)  

INFO: hydrated input modules: {'sentiment': {'model': 'distilbert-base-uncased-finetuned-sst-2-english', 'params': {}}}
INFO: symbolic_directory_path was not set by user - setting to default of /etc
INFO: file_name was not set by user - setting to random file name: krixik_generated_beenawuolu.json
INFO: wait_for_process is set to True.
INFO: file will expire and be removed from you account in 300 seconds, at Thu Apr 18 10:42:39 2024 UTC
INFO: simple-sentiment-pipeline file process and input processing started...
INFO: metadata can be updated using the .update api.
INFO: This file's process_id is: 3207560e-8cea-c786-364e-56f170582373
INFO: File process and processing status:
SUCCESS: module 1 (of 1) - sentiment processing complete.
SUCCESS: pipeline process complete.
SUCCESS: process output downloaded


In [10]:
output

{'status_code': 200,
 'pipeline': 'simple-sentiment-pipeline',
 'request_id': '320fcb76-68fb-408b-9aed-c6f3e9e12d3d',
 'file_id': 'ef47cacc-1ffd-452e-9e93-099e928e574b',
 'message': 'SUCCESS - output fetched for file_id ef47cacc-1ffd-452e-9e93-099e928e574b.Output saved to location(s) listed in process_output_files.',
 'process_output': [{'snippet': 'The plot is good and realistic, and it’s very entertaining and relaxing to read this book. The end is though accessible only to the readers of the Buttlerian Jihad.',
   'positive': 1.0,
   'negative': 0.0,
   'neutral': 0.0}],
 'process_output_files': ['output_files/ef47cacc-1ffd-452e-9e93-099e928e574b.json']}

### translation --> sentiment

In [2]:
from krixik.pipeline_builder.module import Module
from krixik.pipeline_builder.pipeline import CreatePipeline

# create modules for text (vector) search
module_1 = Module(name="translate")
module_2 = Module(name="sentiment")

# create your custom pipeline
custom = CreatePipeline(name='translate-review-sentiment-pipeline', 
                        module_chain=[module_1, module_2])

# pass the custom object to the krixik operator (note you can also do this by passing its config - we'll do below)
pipeline = krixik.load_pipeline(pipeline=custom)

# text of review - taken from first review of:  https://articulo.mercadolibre.com.ar/MLA-1432580188-taladro-atornillador-percutor-2-baterias-gp-by-lusqtoff-_JM#polycard_client=homes-korribanSearchTodayPromotions&position=42&search_layout=grid&type=item&tracking_id=b6eefafa-515b-4ea5-8822-36d1e8c16f72&c_id=/home/today-promotions-recommendations/element&c_uid=fb69c84c-ad40-4f95-b394-7ca41a3d29fe
review_text = "Para los trabajos que estoy haciendo me resultó muy bueno. En una hora carga la batería y dura más de 3 horas de trabajo continuo. Un golazo contar con una segunda batería. Cómodo y con buen torque. Estoy conforme."

with open(input_directory + "/" + "review_data.json", "w") as outfile:
    json.dump([{"snippet": review_text}], outfile)

# define a test file in your input_files directory
test_file = "review_data.json"
pipeline.test_input(local_file_path = input_directory + "/" + test_file)

SUCCESS: local file input_files/review_data.json passed pipeline input test passed


In [3]:
# process the file
output = pipeline.process(local_file_path = input_directory + "/" + test_file,
                          modules={"translate": {"model": "opus-mt-es-en"}},
                          expire_time=60*5, 
                          local_save_directory=output_directory, 
                          wait_for_process=True,
                          verbose=True)  

INFO: hydrated input modules: {'translate': {'model': 'opus-mt-es-en', 'params': {}}, 'sentiment': {'model': 'distilbert-base-uncased-finetuned-sst-2-english', 'params': {}}}
INFO: symbolic_directory_path was not set by user - setting to default of /etc
INFO: file_name was not set by user - setting to random file name: krixik_generated_bdziwtfwau.json
INFO: wait_for_process is set to True.
INFO: file will expire and be removed from you account in 300 seconds, at Thu Apr 18 09:30:34 2024 UTC
INFO: translate-review-sentiment-pipeline file process and input processing started...
INFO: metadata can be updated using the .update api.
INFO: This file's process_id is: 207abaaa-4a5c-643e-0541-e40b3c65f7f9
INFO: File process and processing status:
SUCCESS: module 1 (of 2) - translate processing complete.
SUCCESS: module 2 (of 2) - sentiment processing complete.
SUCCESS: pipeline process complete.
SUCCESS: process output downloaded


In [6]:
output["process_output"]

[{'snippet': "For the jobs I'm doing I turned out very good. In an hour load the batter and lasts more than 3 hours of continuous work. A stroke have a second batter. Cmodo and with good torque. I agree.",
  'positive': 0.999,
  'negative': 0.001,
  'neutral': 0.0}]