# Language Translation for Bank Incident Reports using Microsoft Translator

Microsoft translator translates text from one language to another using the Translator REST API.


### Overview
*Bank Incident Reports Dataset*: International branches of Woodgrove Bank record data of safety incidents that occured at their branches in their local languages. For global consolidation, these reports can easily be translated into english in real-time through Microsoft Translator API.

### Notebook Organization 
+ Fetch the injury report JSON files from a folder in spanish.

+ Translate the JSON files to english by sending a post request to the Microsoft Translator service.

+ Store the translated JSON files to a folder.


## Disclaimer

By accessing this code, you acknowledge the code is made available for presentation and demonstration purposes only and that the code: (1) is not subject to SOC 1 and SOC 2 compliance audits; (2) is not designed or intended to be a substitute for the professional advice, diagnosis, treatment, or judgment of a certified financial services professional; (3) is not designed, intended or made available as a medical device; and (4) is not designed or intended to be a substitute for professional medical advice, diagnosis, treatment or judgement. Do not use this code to replace, substitute, or provide professional financial advice or judgment, or to replace, substitute or provide medical advice, diagnosis, treatment or judgement. You are solely responsible for ensuring the regulatory, legal, and/or contractual compliance of any use of the code, including obtaining any authorizations or consents, and any solution you choose to build that incorporates this code in whole or in part.

© 2021 Microsoft Corporation. All rights reserved

## Importing Relevant Libraries

In [1]:
import pandas as pd
import requests
import json
import os
from os import listdir
from os.path import isfile, join, splitext
import glob
import shutil

## Create Local Folders

- *input-json-files* is the folder from where the input JSON files are provided to be translated into english.
- *translated_json* consists of all the translated json files.

In [2]:
import os
import glob
import shutil

BASE_PATH = os.getcwd()
print('BASE_PATH: ',BASE_PATH)

FOLDER_NAME = 'input-json-files'

NEW_PATH = BASE_PATH+'/'+FOLDER_NAME
print('YOUR_PATH: ',NEW_PATH)

# LIST_OF_FILES=['logs_1.png','logs_2.png','logs_3.png']
# with file extension
EXT = 'json'
LIST_OF_FILES = [ f.split("/")[-1] for f in glob.glob(BASE_PATH+'/*.'+EXT)]


if os.path.isdir(NEW_PATH):    
    print('exists!')
else:
    print('created path: ',NEW_PATH)    
    os.mkdir(NEW_PATH)

    
for f in LIST_OF_FILES:
    new_file_path = NEW_PATH+'/'+f
    old_file_path = BASE_PATH+'/'+f    
    print('old_file_path: ',old_file_path)
    print('new_file_path: ',new_file_path)
    if os.path.isfile(new_file_path):
        print(new_file_path,' already exists!')
    else:
        print('copying file ',f,' to folder ',FOLDER_NAME)
        shutil.move(old_file_path,NEW_PATH)

BASE_PATH:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users
YOUR_PATH:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files
exists!
old_file_path:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/202045000.json
new_file_path:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files/202045000.json
/mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files/202045000.json  already exists!
old_file_path:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/202045001.json
new_file_path:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files/202045001.json
/mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files/202045001.json  already exists!
old_file_path:  /mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/202045002.json
new_fil

In [3]:
# filess = [f for f in listdir(os.getcwd()+"/input-json-files") if isfile(join(os.getcwd()+"/input-json-files", f))]
# print(filess)

# for f in filess:
#     new_file_path = BASE_PATH+'/'+f
#     old_file_path = NEW_PATH+'/'+f    
#     print('old_file_path: ',old_file_path)
#     print('new_file_path: ',new_file_path)
#     if os.path.isfile(new_file_path):
#         print(new_file_path,' already exists!')
#     else:
#         print('copying file ',f,' to folder ',FOLDER_NAME)
#         shutil.copy(old_file_path,BASE_PATH)

In [4]:
local_path = os.path.join(os.getcwd(), "input-json-files//")
# *translated_json* will contain all the translated json files
if (not os.path.isdir(os.getcwd()+"/translated_json")):
    os.makedirs(os.getcwd()+"/translated_json")
output_path = os.path.join(os.getcwd(), "translated_json//")
print(local_path)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files//


## Translator Resource

In [5]:
import GlobalVariables

In [6]:
# Translator resource
# Endpoint parameters for querying the translator to return the translated JSON
url = GlobalVariables.TRANSLATION_URL
apim_key = GlobalVariables.TRANSLATION_API_KEY
headers = {'Ocp-Apim-Subscription-Key': apim_key,"Content-Type":"application/json; charset=UTF-8"}
# Provide english as the language
lang = "en"

In [7]:
# Total files in the input folder
files = [f for f in listdir(os.getcwd()+"/input-json-files") if isfile(join(os.getcwd()+"/input-json-files", f))]
len(files)
# Loop through all the JSON files and translate them one by one
for file in files:
    if not file.lower().endswith(('.json')):
        continue
    with open(local_path+file) as f:
        # Reading the data from the JSON file
        print(local_path+file)
        data = f.readlines()
        print(data)
        data = [json.loads(line) for line in data]
        pd_data = {k.replace('_', ' ') : v for k, v in data[0].items()}
        esp_data = json.dumps(pd_data, ensure_ascii=False).encode('utf8')
        print("\nOriginal JSON\n")
        print(esp_data.decode())
        esp_decod = esp_data.decode('utf-8')
    # Sending a post request to the translator
    resp = requests.post(url+lang, json=[{'Text':esp_decod}], headers = headers)
    resp_text = json.loads(resp.text) 
    en_val=resp_text[0]['translations'][0]['text']
    try:
        en_dict = json.loads(en_val)

    except:
        en_str = f"{{{en_val}}}"
        en_dict = json.loads(en_str)
    print("\nTranslated JSON\n")
    print(en_dict)
    # Save the translated text to a json file
    with open(output_path+file[:-5]+"-translated"+".json", 'w') as outfile:
        json.dump(en_dict, outfile)

/mnt/batch/tasks/shared/LS_root/mounts/clusters/demo-fsi-user/code/Users/input-json-files//202045000.json
['{"CaseId": "202045000", "EventDate": "01-02-15", "Empleador": "Banco Woodgrove", "Ubicaci\\u00f3n": "Los \\u00c1ngeles", "Hospitalizado": 1, "Amputaci\\u00f3n": 0, "FinalNarrative": "Empleado tropez\\u00f3 en una pavimentadora y se fractur\\u00f3 la mu\\u00f1eca, requiriendo hospitalizaci\\u00f3n.", "NatureTitle": "Fracturas", "PartofBodyTitle": "Mu\\u00f1eca(s)", "EventTitle": "Ca\\u00edda en el mismo nivel debido a tropezar en una superficie irregular", "SourceTitle": "Irregularidad del suelo"}']

Original JSON

{"CaseId": "202045000", "EventDate": "01-02-15", "Empleador": "Banco Woodgrove", "Ubicación": "Los Ángeles", "Hospitalizado": 1, "Amputación": 0, "FinalNarrative": "Empleado tropezó en una pavimentadora y se fracturó la muñeca, requiriendo hospitalización.", "NatureTitle": "Fracturas", "PartofBodyTitle": "Muñeca(s)", "EventTitle": "Caída en el mismo nivel debido a trope