# GCP Demo 3: Text translation using AutoML

### Import Modules

In [None]:
import os
import shutil

from gcpdemo3 import etl

from google.oauth2 import service_account

### Run ETL

In [None]:
# get credentials for translation api
credentials = service_account.Credentials.from_service_account_file(
    '..\credentials\ml-sandbox-1-191918-b473cb40490b.json'
)

# copy raw book files from GCS
os.system('gcloud config set project ml-sandbox-1-191918')
os.mkdir('./temp')
os.system('gsutil -m cp -r gs://gcp-cert-demo-3/opus ./temp')

# process native english books and concatenate
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Austen_Jane-Pride_and_Prejudice.xml',
    out_path='./temp/Austen_Jane-Pride_and_Prejudice_en_processed.xml'
)
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Twain_Mark-Tom_Sawyer.xml',
    out_path='./temp/Twain_Mark-Tom_Sawyer_en_processed.xml'
)
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Doyle_Arthur_Conan-Adventures_of_Sherlock_Holmes.xml',
    out_path='./temp/Doyle_Arthur_Conan-Adventures_of_Sherlock_Holmes_en_processed.xml'
)
etl.concat_label_files(
    in_paths=[
        './temp/Austen_Jane-Pride_and_Prejudice_en_processed.xml',
        './temp/Twain_Mark-Tom_Sawyer_en_processed.xml',
        './temp/Doyle_Arthur_Conan-Adventures_of_Sherlock_Holmes_en_processed.xml'
    ],
    out_path='./temp/native.csv',
    label='native'
)

# process professionally translated books and concatenate
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Cervantes_Miguel-Don_Quijote.xml',
    out_path='./temp/Cervantes_Miguel-Don_Quijote_en_processed.xml'
)
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Hugo_Victor-Notre_Dame_de_Paris.xml',
    out_path='./temp/Hugo_Victor-Notre_Dame_de_Paris_en_processed.xml'
)
etl.process_book(
    in_path='./temp/opus/Books/raw/en/Flaubert_Gustave-Madame_Bovary.xml',
    out_path='./temp/Flaubert_Gustave-Madame_Bovary_en_processed.xml'
)
etl.concat_label_files(
    in_paths=[
        './temp/Cervantes_Miguel-Don_Quijote_en_processed.xml',
        './temp/Hugo_Victor-Notre_Dame_de_Paris_en_processed.xml',
        './temp/Flaubert_Gustave-Madame_Bovary_en_processed.xml'
    ],
    out_path='./temp/translated.csv',
    label='translated'
)

# process and translate native spanish book
etl.process_book(
    in_path='./temp/opus/Books/raw/es/Cervantes_Miguel-Don_Quijote.xml',
    out_path='./temp/Cervantes_Miguel-Don_Quijote_es_processed.xml'
)
etl.translate_book(
    credentials=credentials,
    in_path='./temp/Cervantes_Miguel-Don_Quijote_es_processed.xml',
    out_path='./temp/cervantes_translated.txt',
    source='es',
    target='en',
    chunk_size=10
)

# process and translate native french book
etl.process_book(
    in_path='./temp/opus/Books/raw/fr/Hugo_Victor-Notre_Dame_de_Paris.xml',
    out_path='./temp/Hugo_Victor-Notre_Dame_de_Paris_fr_processed.xml'
)
etl.translate_book(
    credentials=credentials,
    in_path='./temp/Hugo_Victor-Notre_Dame_de_Paris_fr_processed.xml',
    out_path='./temp/victorhugo_translated.txt',
    source='fr',
    target='en',
    chunk_size=10
)

# concatenate translated books
etl.concat_label_files(
    in_paths=[
        './temp/cervantes_translated.txt',
        './temp/victorhugo_translated.txt'
    ],
    out_path='./temp/machine_translated.csv',
    label='machine'
)

### Train Model

In [None]:
# TODO: train model

### Remove Temporary Directory

In [None]:
# remove temporary directory
shutil.rmtree('./temp')

### Deploy Model

In [None]:
# TODO: deploy model

### Make Predictions

In [None]:
# TODO: make predictions