# Google Drive setup
The script below creates a subdirectory on your Google Drive, which is populated with various Git repositories that we need.

Note that you need to change `gender_bias_dir` based on if we use our fork (which works with NL input) or the original repository (which works with EN input).

In [None]:
import os

root_dir = 'PATH_ROOT/bachelorarbeit'
gender_bias_dir = 'PATH_ROOT/mt_gender_german'

os.chdir(root_dir)
# Auto install the required git repositories:
# fast_align module required in the evaluations
!git clone https://github.com/clab/fast_align.git

fast_align_dir = root_dir + '/fast_align';

In [None]:
# List the contents of both projects to see if it worked.
print(gender_bias_dir, fast_align_dir)
!ls {gender_bias_dir} -al
!ls {fast_align_dir} -al

# Functions

In [None]:
import os
import time
from IPython import get_ipython
ipython = get_ipython()

def evaluate_language(source, stereotype, destination, translator):
  """ Run a bash evaluation script from the repo to determine gender bias in the source file compared to the destination.

  For example evaluating 'en to 'es' with translator 'google' looks like:
  1. It translates the input file first if it does not exist yet, which creates /translations/google/en-es.txt.
  2. It then tries to align the translated file using fast_align, which creates /src/forward/en-es.align. I believe the alignment process
  is about mapping words from one language to another.
  3. The alignments are evaluated, and the output is stored in /output/en-es.txt.

  :param str source: The input, which is a language file from the /data/aggregates folder, e.g. use "en" for the "en.txt" file.
  :param str destination: A language file from the /data/aggregates folder, e.g. use "en" for the "en.txt" file.
  :param str translator: One of "google", "bing", "aws", "sota", "systran".
  """

  timestamp = time.strftime("%Y%m%d-%H%M%S");

  # Define the source file to be a txt file in the aggregates folder.
  source_file = '/data/aggregates/' + source + '.txt';
  # Define the output file to be in the "output" directory in the root of the GenderBias project.
  os.chdir(gender_bias_dir)
  exec(ipython.transform_cell('!mkdir -p output'))
  os.chdir(gender_bias_dir + '/output')
  exec(ipython.transform_cell('!mkdir -p ' + translator))
  os.chdir(gender_bias_dir + '/output/' + translator)
  exec(ipython.transform_cell('!mkdir -p ' + destination))
  os.chdir(gender_bias_dir + '/output/' + translator + '/' + destination)
  exec(ipython.transform_cell('!mkdir -p ' + source))


  output_file = '../output/' + translator + '/' + destination + '/' +  source + '/' + timestamp + '.txt'

  os.chdir(gender_bias_dir + '/src')

  # Execute script from the repo.
  exec(ipython.transform_cell('!../scripts/evaluate_language.sh {gender_bias_dir}{source_file} {stereotype} {destination} {translator} > {output_file}'))


  print('Output file: ' + gender_bias_dir + '/src/' + output_file);
  pass

## Setup API keys
If you are going to evaluate a language pair that has no translations yet (like `/translations/google/en-es.txt` etc), you need to add an API key for the given service. 

After a translation task, the output is automatically saved in the project folder so it does not need to be ran again (unless you remove it).

In [None]:
# Set default region for AWS services (London in this case).
%env AWS_DEFAULT_REGION=eu-central-1
# Set AWS user (restricted to using AWS Translate only).
%env AWS_ACCESS_KEY_ID=
%env AWS_SECRET_ACCESS_KEY=

## Evaluation

In [29]:
for lang in ['es', 'it', 'fr', 'uk', 'ru']:
     for model in ['deepl', 'bing', 'google', 'systran', 'aws']:
         evaluate_language(source='de', stereotype='none', destination=lang, translator=model)
         evaluate_language(source='de_anti', stereotype='anti', destination=lang, translator=model)
         evaluate_language(source='de_pro', stereotype='pro', destination=lang, translator=model)
         evaluate_language(source='de_anti_wmt', stereotype='anti_wmt',destination=lang, translator=model)
         evaluate_language(source='de_pro_wmt', stereotype='pro_wmt', destination=lang, translator=model)

for lang in ['ar', 'he']:
     for model in ['bing', 'google', 'systran', 'aws']:
         evaluate_language(source='de', stereotype='none', destination=lang, translator=model)
         evaluate_language(source='de_anti', stereotype='anti', destination=lang, translator=model)
         evaluate_language(source='de_pro', stereotype='pro', destination=lang, translator=model)
         evaluate_language(source='de_anti_wmt', stereotype='anti_wmt',destination=lang, translator=model)
         evaluate_language(source='de_pro_wmt', stereotype='pro_wmt', destination=lang, translator=model)

#evaluate_language(source='de', stereotype='none', destination='es', translator='deepl')


ARG=i
ARG=d
ARG=o
ARG=v
INITIAL PASS 
expected target length = source length * 1.03797
ITERATION 1
  log_e likelihood: -88695.6
  log_2 likelihood: -127961
     cross entropy: 29.8974
        perplexity: 1e+09
      posterior p0: 0.08
 posterior al-feat: -0.173144
       size counts: 78
ITERATION 2
  log_e likelihood: -20350.7
  log_2 likelihood: -29359.8
     cross entropy: 6.85978
        perplexity: 116.144
      posterior p0: 0.0476997
 posterior al-feat: -0.100356
       size counts: 78
  1  model al-feat: -0.166045 (tension=4)
  2  model al-feat: -0.138052 (tension=5.31378)
  3  model al-feat: -0.125251 (tension=6.0677)
  4  model al-feat: -0.117859 (tension=6.56561)
  5  model al-feat: -0.113102 (tension=6.91567)
  6  model al-feat: -0.109846 (tension=7.1706)
  7  model al-feat: -0.107528 (tension=7.3604)
  8  model al-feat: -0.105833 (tension=7.50385)
     final tension: 7.61339
ITERATION 3
  log_e likelihood: -11392.2
  log_2 likelihood: -16435.4
     cross entropy: 3.84005
  