In [1]:
from decouple import config
from aimped.utils import process_payload
from aimped.s3_file_manager import S3FileManager


In [2]:
from decouple import config
aws_access_key_id = config('AWS_ACCESS_KEY_ID')
aws_secret_access_key = config('AWS_SECRET_ACCESS_KEY')
bucket_name = config('PRIVATE_BUCKET_NAME')
region_name = "us-east-1"

UndefinedValueError: AWS_ACCESS_KEY_ID not found. Declare it as envvar or define a default value.

In [3]:
file_manager = S3FileManager(aws_access_key_id, aws_secret_access_key, region_name)


# Data JSON Test

In [4]:
payload = {'text': [
          "sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old", "Sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old"
], 'masked': True, 
'faked': True, 
'entity': 
['DATE', 'DOCTOR', 'AGE'],
 'model_id': 17, 
'user_id': 3097, 
'unit_price': 0.0}

process_payload(payload, file_manager=None)

['sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old',
 'Sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old']

In [5]:
# input data must be a list of strings
payload = {'text': 
          "sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old Sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old"
, 'masked': True, 
'faked': True, 
'entity': 
['DATE', 'DOCTOR', 'AGE'],
 'model_id': 17, 
'user_id': 3097, 
'unit_price': 0.0}

process_payload(payload, file_manager=None)

ValueError: input must be a list of strings

In [None]:
payload = {'key': [
          "sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old", "Sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old"
], 'masked': True, 
'faked': True, 
'entity': 
['DATE', 'DOCTOR', 'AGE'],
 'model_id': 17, 
'user_id': 3097, 
'unit_price': 0.0}

process_payload(payload, file_manager=None)

ValueError: text key is required in the payload

# Data TXT

In [6]:
# S3 URI
s3_uri = "input/text/model_114/user_3079/20dd919d638_medicalcoding.txt"
payload = {'file_type': "txt","txt":[s3_uri], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/6ee1a5b6-9c09-4186-bc02-3afadbce4bf5.txt']

In [7]:
# URL
url = "https://filesampleshub.com/download/document/txt/sample1.txt"
payload = {'file_type': "txt","txt":[url], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/75258407-6ad1-4ff3-a205-d9ff53ce99d6.txt']

In [10]:
text = "This is a sample text with a date 2020-01-01 and a doctor Dr. John Doe and an age 30 years old"
import base64
base64_text = base64.b64encode(text.encode()).decode()
payload = {'file_type': "txt","txt":[base64_text], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/38c3f257-335d-46fb-91b1-44d793383d63.txt']

In [9]:
# local file
file_path = "input_data_folder/38e41736-94a2-448e-8681-bc7ae17bbe5e.txt"
payload = {'file_type': "txt","txt":[file_path], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/38e41736-94a2-448e-8681-bc7ae17bbe5e.txt']

In [11]:
# local file base64 and url test
payload = {'file_type': "txt","txt":[file_path, base64_text, url], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/38e41736-94a2-448e-8681-bc7ae17bbe5e.txt',
 'input_data_folder/95864c64-689e-4bb5-b15a-fb8367c83285.txt',
 'input_data_folder/d3129149-3384-4b04-9fe1-ccbcd4b3c167.txt']

# data PDF

In [13]:
# S3 URI
s3_uri = "input/application/model_1/user_3092/563fb7c6780_28283823_2303236616.pdf"
payload = {'file_type': "pdf","pdf":[s3_uri]*3, 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/6e3815b1-659e-4c8d-a725-1ddfd21df171.pdf',
 'input_data_folder/9d05b4aa-f6b5-45bf-b135-7c1db8c980fa.pdf',
 'input_data_folder/685175bf-75d3-4517-a423-459ec67bc204.pdf']

In [18]:
# URL
url = "https://pdfobject.com/pdf/sample.pdf"
payload = {'file_type': "pdf","pdf":[url], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/b3062400-6422-4c89-86b8-392e9433e673.pdf']

In [20]:
# local file
file_path = "input_data_folder/test.pdf"
payload = {'file_type': "pdf","pdf":[file_path]*2, 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/test.pdf', 'input_data_folder/test.pdf']

In [21]:
# base64
import base64
with open(file_path, "rb") as file:
    base64_pdf = base64.b64encode(file.read()).decode()
payload = {'file_type': "pdf","pdf":[base64_pdf], 'masked': True, 'faked': True, 'entity': ['DATE', 'DOCTOR', 'AGE'], 'model_id': 17, 'user_id': 3097, 'unit_price': 0.0}
process_payload(payload, file_manager=file_manager)

['input_data_folder/04fc4a37-c32b-414e-bceb-8c1b1d0eb771.pdf']