# Google Drive setup
The script below creates a subdirectory on your Google Drive, which is populated with various Git repositories that we need.

Note that you need to change `gender_bias_dir` based on if we use our fork (which works with NL input) or the original repository (which works with EN input).

In [18]:
import os

root_dir = '/mnt/d/Libraries/University/bachelorarbeit'
gender_bias_dir = '/mnt/d/Libraries/University/bachelorarbeit/mt_gender_german'

os.chdir(root_dir)
# Auto install the required git repositories:
# fast_align module required in the evaluations
!git clone https://github.com/clab/fast_align.git

fast_align_dir = root_dir + '/fast_align';

fatal: destination path 'fast_align' already exists and is not an empty directory.


In [19]:
# List the contents of both projects to see if it worked.
print(gender_bias_dir, fast_align_dir)
!ls {gender_bias_dir} -al
!ls {fast_align_dir} -al

/mnt/d/Libraries/University/bachelorarbeit/mt_gender_german /mnt/d/Libraries/University/bachelorarbeit/fast_align
total 71000
drwxrwxrwx 1 michelle michelle     4096 Jul 13 14:05 .
drwxrwxrwx 1 michelle michelle     4096 Jul 11 14:24 ..
drwxrwxrwx 1 michelle michelle     4096 Jul 13 10:44 .git
-rwxrwxrwx 1 michelle michelle     1279 Jul 10 17:15 .gitignore
-rwxrwxrwx 1 michelle michelle     1074 Jul  4 16:17 LICENSE
-rwxrwxrwx 1 michelle michelle      242 Jul 10 17:15 Pipfile
-rwxrwxrwx 1 michelle michelle    40546 Jul 10 17:15 Pipfile.lock
-rwxrwxrwx 1 michelle michelle     2703 Jul  4 16:17 README.md
drwxrwxrwx 1 michelle michelle     4096 Jul 10 17:15 czech-morfflex-pdt-161115
-rwxrwxrwx 1 michelle michelle 72538619 Jul 10 17:18 czech-morfflex-pdt-161115.zip
drwxrwxrwx 1 michelle michelle     4096 Jul 13 13:03 data
-rwxrwxrwx 1 michelle michelle      513 Jul  4 16:17 install.sh
drwxrwxrwx 1 michelle michelle     4096 Jul 13 13:06 logs
-rwxrwxrwx 1 michelle michelle   110527 Jul 13 1

# Functions

In [20]:
import os
import time
from IPython import get_ipython
ipython = get_ipython()

def evaluate_language(source, stereotype, destination, translator):
  """ Run a bash evaluation script from the repo to determine gender bias in the source file compared to the destination.

  For example evaluating 'en to 'es' with translator 'google' looks like:
  1. It translates the input file first if it does not exist yet, which creates /translations/google/en-es.txt.
  2. It then tries to align the translated file using fast_align, which creates /src/forward/en-es.align. I believe the alignment process
  is about mapping words from one language to another.
  3. The alignments are evaluated, and the output is stored in /output/en-es.txt.

  :param str source: The input, which is a language file from the /data/aggregates folder, e.g. use "en" for the "en.txt" file.
  :param str destination: A language file from the /data/aggregates folder, e.g. use "en" for the "en.txt" file.
  :param str translator: One of "google", "bing", "aws", "sota", "systran".
  """

  timestamp = time.strftime("%Y%m%d-%H%M%S");

  # Define the source file to be a txt file in the aggregates folder.
  source_file = '/data/aggregates/' + source + '.txt';
  # Define the output file to be in the "output" directory in the root of the GenderBias project.
  os.chdir(gender_bias_dir)
  exec(ipython.transform_cell('!mkdir -p output'))
  os.chdir(gender_bias_dir + '/output')
  exec(ipython.transform_cell('!mkdir -p ' + translator))
  os.chdir(gender_bias_dir + '/output/' + translator)
  exec(ipython.transform_cell('!mkdir -p ' + destination))
  os.chdir(gender_bias_dir + '/output/' + translator + '/' + destination)
  exec(ipython.transform_cell('!mkdir -p ' + source))


  output_file = '../output/' + translator + '/' + destination + '/' +  source + '/' + timestamp + '.txt'

  os.chdir(gender_bias_dir + '/src')

  # Execute script from the repo.
  exec(ipython.transform_cell('!../scripts/evaluate_language.sh {gender_bias_dir}{source_file} {stereotype} {destination} {translator} > {output_file}'))


  print('Output file: ' + gender_bias_dir + '/src/' + output_file);
  pass

## Setup API keys
If you are going to evaluate a language pair that has no translations yet (like `/translations/google/en-es.txt` etc), you need to add an API key for the given service. 

After a translation task, the output is automatically saved in the project folder so it does not need to be ran again (unless you remove it).

In [21]:
# Insert Google API key so we can use the translation API.
%env GOOGLE_APPLICATION_CREDENTIALS=

# Set default region for AWS services (London in this case).
%env AWS_DEFAULT_REGION=eu-west-2
# Set AWS user (restricted to using AWS Translate only).
%env AWS_ACCESS_KEY_ID=
%env AWS_SECRET_ACCESS_KEY=

# Set Bing/Microsoft Translator credentials. Make sure that these credentials have a paid plan, because the free plan has a request limit.
%env BING_TRANSLATOR_TEXT_KEY=
%env BING_TRANSLATOR_REGION_KEY=westeurope

env: GOOGLE_APPLICATION_CREDENTIALS=
env: AWS_DEFAULT_REGION=eu-west-2
env: AWS_ACCESS_KEY_ID=
env: AWS_SECRET_ACCESS_KEY=
env: BING_TRANSLATOR_TEXT_KEY=
env: BING_TRANSLATOR_REGION_KEY=westeurope


## Evaluation

In [22]:
# Evaluate en-es (requires original GenderBias project, see very first code block).
# evaluate_language(source='en', destination='es', translator='google')

# Evaluate nl-es (requires our GenderBias fork here since it requires changes to support NL).
for lang in ['es', 'it', 'fr', 'uk', 'ru']:
    evaluate_language(source='de', stereotype='none', destination=lang, translator='deepl')
    evaluate_language(source='de_anti', stereotype='anti', destination=lang, translator='deepl')
    evaluate_language(source='de_pro', stereotype='pro', destination=lang, translator='deepl')
    evaluate_language(source='de_anti_wmt', stereotype='anti_wmt',destination=lang, translator='deepl')
    evaluate_language(source='de_pro_wmt', stereotype='pro_wmt', destination=lang, translator='deepl')

# evaluate_language(source='nl', destination='ar', translator='aws')
# evaluate_language(source='nl', destination='ar', translator='bing')
# evaluate_language(source='nl', destination='es', translator='deepl')


ARG=i
ARG=d
ARG=o
ARG=v
INITIAL PASS 
expected target length = source length * 1.0382
ITERATION 1
  log_e likelihood: -88716.3
  log_2 likelihood: -127991
     cross entropy: 29.8974
        perplexity: 1e+09
      posterior p0: 0.08
 posterior al-feat: -0.173132
       size counts: 78
ITERATION 2
  log_e likelihood: -20457.5
  log_2 likelihood: -29514
     cross entropy: 6.89418
        perplexity: 118.947
      posterior p0: 0.04831
 posterior al-feat: -0.100361
       size counts: 78
  1  model al-feat: -0.166006 (tension=4)
  2  model al-feat: -0.138031 (tension=5.3129)
  3  model al-feat: -0.125236 (tension=6.06629)
  4  model al-feat: -0.117846 (tension=6.56378)
  5  model al-feat: -0.113092 (tension=6.91349)
  6  model al-feat: -0.109838 (tension=7.16811)
  7  model al-feat: -0.107522 (tension=7.35766)
  8  model al-feat: -0.105829 (tension=7.50087)
     final tension: 7.61023
ITERATION 3
  log_e likelihood: -11425.1
  log_2 likelihood: -16483
     cross entropy: 3.85026
       