# Install Dependencies

In [7]:
%pip install privateai_client

Note: you may need to restart the kernel to use updated packages.


# Set up Private AI Client
Start a container in an environment available to the Internet, and update the host, port, and protocol details

In [8]:
from privateai_client import PAIClient
from privateai_client.objects import request_objects
from IPython.display import Audio, display, Markdown
import base64
import json
import os
import requests

client = PAIClient(url="https://api.private-ai.com/deid")

# Replace <YOUR_API_KEY> with your API Key. Get a free demo API key here: https://portal.private-ai.com/
client.add_api_key(api_key="<YOUR_API_KEY>")

print(f'Able to access Private AI container: {client.ping()}')

Able to access Private AI container: True


# Process Audio Files

In [9]:

# Path to open the original audio file
input_audio_file_path = 'Original_Files/Medical-Report-Audio-Unredacted.mp3' 

with open(input_audio_file_path, 'rb') as input_file:
    file_data = base64.b64encode(input_file.read()).decode()

# Make a request call
response = client.process_files_base64(
    request_object={
        'file': {
            'data': file_data,
            'content_type': 'audio/mp3'
        }
    }
)

# Path to Save the redacted audio output file
output_audio_file_path = 'Redacted_Files/Medical-Report-Audio-redacted.mp3'  

with open(output_audio_file_path, 'wb') as redacted_mp3:
  redacted_mp3.write(base64.b64decode(response.processed_file))

# Display the Audio files in the player
display(Markdown('**Original Audio:**'))
display(Audio(input_audio_file_path))

display(Markdown('**Redacted Audio:**'))
display(Audio(output_audio_file_path))

print('***** PROCESSED TEXT *****')
print(response.processed_text)

**Original Audio:**

**Redacted Audio:**

***** PROCESSED TEXT *****
 Good morning. This is [NAME_MEDICAL_PROFESSIONAL_1] from the [ORGANIZATION_MEDICAL_FACILITY_1]. Am I speaking with Mr. [NAME_1]? Good morning. Yes, this is [NAME_GIVEN_1]. Hi [NAME_GIVEN_1]. I have your [MEDICAL_PROCESS_1] results here from your test back on the [DATE_1]. We did detect some abnormalities, which may be linked to the seizures you've experienced, but I'd like to do some more testing. So I'll need to make another appointment? Yes, we'll arrange that. First, I need to ask you a few things. Alright, sure. Have you ever had [CONDITION_1]? No, I don't think so. Just headaches sometimes. Any panic attacks or hyperventilation? No, never. Good. Okay. You mentioned that there were some family history of seizures. Your mother? Yes, my mother also had seizures, I think three different times when she was in her [AGE_1].


# Process DOCX

In [10]:
with open('Original_Files/PAI_SYNTH_EN_doctor-note.doc', 'rb') as input_file:
    file_data = base64.b64encode(input_file.read()).decode()

response = client.process_files_base64(
    request_object={
        'file': {
            'data': file_data,
            'content_type': 'application/msword'
        }
    }
)

print('***** PROCESSED TEXT *****')
print(response.processed_text)
print('***** REDACTED TEXT *****')
print(json.dumps(response.entities, indent=2))

with open('Redacted_Files/PAI_SYNTH_EN_doctor-note_redacted.doc', 'wb') as redacted_docx:
  redacted_docx.write(base64.b64decode(response.processed_file))


***** PROCESSED TEXT *****

 
 
 [LANGUAGE_1]-[LANGUAGE_2]
 
 2
 
 
 [ORGANIZATION_MEDICAL_FACILITY_1]
 [LOCATION_ADDRESS_STREET_1]
 [LOCATION_ADDRESS_STREET_2]
  [LOCATION_ADDRESS_STREET_3]
 [LOCATION_ADDRESS_1]
 [LOCATION_ZIP_1]
 FROM THE DESK OF
 [NAME_MEDICAL_PROFESSIONAL_1]
 [DATE_1]
 Re: 
 Mr. [NAME_1]
 DOB: 
 [DOB_1]
 To whom it may concern:
 Dear sir/madam,
 The above patient would like to report confirmation that they were unable to attend their final exam on the [DATE_2]
 [DATE_3]
  [DATE_4] as they were feeling unwell. They attended a check-up appointment at my office on this day and were advised to take [DRUG_1] and rest at home.
 Please do not hesitate to contact me if you need further information.
 [NAME_MEDICAL_PROFESSIONAL_2]
 [ORGANIZATION_MEDICAL_FACILITY_1]
***** REDACTED TEXT *****
[
  {
    "processed_text": "LANGUAGE_1",
    "text": "en",
    "location": {
      "stt_idx": 0,
      "end_idx": 2,
      "stt_idx_processed": 0,
      "end_idx_processed": 12
    },
  

# Process PDF

In [11]:
with open('Original_Files/PAI_SYNTH_EN_medical-referral_2.pdf', 'rb') as pdf_file:
  encoded_pdf = base64.b64encode(pdf_file.read()).decode('ascii')

response = client.process_files_base64(
    request_object={
        'file': {
            'data': encoded_pdf,
            'content_type': 'application/pdf'
        }
    }
)

print('***** PROCESSED TEXT *****')
print(response.processed_text)

print('***** REDACTED TEXT *****')
print(json.dumps(response.entities, indent=2))

with open('Redacted_Files/PAI_SYNTH_EN_medical-referral_2_redacted.pdf', 'wb') as redacted_pdf:
  redacted_pdf.write(base64.b64decode(response.processed_file))

***** PROCESSED TEXT *****
Clearly Imprint Patient Identification FATHERS Name: [NAME_1] MENTAL HEALTH DOB: [DOB_1] Postal Code: [LOCATION_ADDRESS_STREET_1] Referral Form CHIP: [NUMERICAL_PII_1] [ORGANIZATION_MEDICAL_FACILITY_1] [LOCATION_ADDRESS_1] Tel: [PHONE_NUMBER_1] Tel: [PHONE_NUMBER_2] Fax: [PHONE_NUMBER_3] [DATE_1] Email: [EMAIL_ADDRESS_1].[EMAIL_ADDRESS_2] Date: YYYY / MM / DD Are telephone messages OK? DYes ONO ** PLEASE ENSURE PATIENT [OCCUPATION_1] AND [OCCUPATION_2] REFERRAL INFORMATION IS COMPLETE + PREVIOUS [OCCUPATION_3] RECORDS ARE ATTACHED ** INCOMPLETE/UNCLEAR FORMS WILL BE RETURNED Referring [OCCUPATION_2] Information Family [OCCUPATION_2] Information (if not referring [OCCUPATION_2]) Name [NAME_MEDICAL_PROFESSIONAL_1] Name : [NAME_MEDICAL_PROFESSIONAL_2] Billing # [NUMERICAL_PII_2] Address [LOCATION_ADDRESS_2] Address [LOCATION_ADDRESS_3] Phone [PHONE_NUMBER_4] Phone [PHONE_NUMBER_5] A Fax ( Fax Email Father's Demographic Data Reason for Referral (Psychiatric Conce

# Process CSV

In [12]:
with open('Original_Files/HEALTHCARE_oliverjakeconnor_health-care-system-analysis-Patient_records.csv', 'rb') as csv_file:
  encoded_csv = base64.b64encode(csv_file.read()).decode('ascii')

response = client.process_files_base64(
    request_object={
        'file': {
            'data': encoded_csv,
            'content_type': 'text/csv'
        }
    }
)

print('***** PROCESSED TEXT *****')
print(response.processed_text)

print('***** REDACTED TEXT *****')
print(json.dumps(response.entities, indent=2))

with open('Redacted_Files/HEALTHCARE_oliverjakeconnor_health-care-system-analysis-Patient_records_redacted.csv', 'wb') as redacted_csv:
  redacted_csv.write(base64.b64decode(response.processed_file))

***** PROCESSED TEXT *****

***** REDACTED TEXT *****
[
  {
    "processed_text": "NUMERICAL_PII_1",
    "text": "187158",
    "location": {
      "stt_idx": 0,
      "end_idx": 6,
      "stt_idx_processed": 0,
      "end_idx_processed": 17
    },
    "best_label": "NUMERICAL_PII",
    "labels": {
      "NUMERICAL_PII": 0.9117,
      "HEALTHCARE_NUMBER": 0.409
    }
  },
  {
    "processed_text": "NAME_GIVEN_1",
    "text": "Harbir",
    "location": {
      "stt_idx": 0,
      "end_idx": 6,
      "stt_idx_processed": 0,
      "end_idx_processed": 14
    },
    "best_label": "NAME_GIVEN",
    "labels": {
      "NAME": 0.9958,
      "NAME_GIVEN": 0.9255,
      "NAME_FAMILY": 0.1271
    }
  },
  {
    "processed_text": "GENDER_SEXUALITY_1",
    "text": "Female",
    "location": {
      "stt_idx": 0,
      "end_idx": 6,
      "stt_idx_processed": 0,
      "end_idx_processed": 20
    },
    "best_label": "GENDER_SEXUALITY",
    "labels": {
      "GENDER_SEXUALITY": 0.9961
    }
  },
  {
   