<a href="https://colab.research.google.com/github/mille-s/GEM24_CheckSystemOutputs/blob/main/GEM24_checkOutputs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title 1- Run to create out folder
import os
out_folder = 'out'
if not os.path.exists(out_folder):
  os.makedirs(out_folder)

### 2- After running the first cell, upload your files (unzipped) in the "out" folder just created (click on "Files" on the left panel to see the folder); then run the first cell below for checking your output file(s).

In [None]:
#@title Run to check uploaded files
import glob
import codecs
import json

paths_submissions = glob.glob(os.path.join(out_folder, '*'))

def check_GEM_submissions(path_submission, sys_output):
  # Check name
  task_suffixes = ('_D2T-1-FA', '_D2T-1-FI', '_D2T-1-CFA', '_D2T-2-FA', '_D2T-2-FI', '_D2T-2-CFA', '_Summ-1', '_Summ-2', '_Summ-3')
  d2t1_IDs = ['D2T-1-FA', 'D2T-1-FI', 'D2T-1-CFA']
  d2t2_IDs = ['D2T-2-FA', 'D2T-2-FI', 'D2T-2-CFA']
  summ_IDs = ['Summ-1', 'Summ-2', 'Summ-3']
  languages = ('_en', '_zh', '_de', '_ru', '_es', '_ko', '_hi', '_sw', '_ar')
  extensions = ('.txt', '.jsonl')

  filename = os.path.basename(path_submission)
  print(filename)

  # Check extension
  if filename.endswith(extensions):
    filename_noExt = filename.rsplit('.', 1)[0]
    extension = filename.rsplit('.', 1)[1]
    # Check language ID
    if filename_noExt.endswith(languages):
      filename_noExt_noLang = filename_noExt.rsplit('_', 1)[0]
      # Check task identifier
      if filename_noExt_noLang.endswith(task_suffixes):
        filename_noExt_noLang_noTask = filename_noExt_noLang.rsplit('_', 1)[0]
        task_ID = filename_noExt_noLang.rsplit('_', 1)[1]
        # If there is a system name, open the files and check inside
        if len(filename_noExt_noLang_noTask) > 0:
          # txt files are for the D2T task; D2T-1 should have 1,779 lines, D2T-2 should have 1,800 lines.
          if extension == 'txt':
            file_lines = sys_output.readlines()
            # Check line numbers in D2T-1 data
            if task_ID in d2t1_IDs and not len(file_lines) == 1779:
              print(f'  Error line numbers!\n\t{filename} should have 1,779 lines (found {len(file_lines)}).')
            # Check line numbers in D2T-2 data
            elif task_ID in d2t2_IDs and not len(file_lines) == 1800:
              print(f'  Error line numbers!\n\t{filename} should have 1,800 lines (found {len(file_lines)}).')
            else:
              print('  OK!')
          # json files are for the summ task; check well-formedness
          elif extension == 'json':
            try:
              json.load(sys_output)
            except:
              print(f'  Error json formatting! Check {filename_noExt}.')
            # There should additional be code to check the number of outputs in the submitted files
        else:
          print(f'  Error filename system name!\n\t{filename_noExt} should have a name before the task suffix.')
      else:
          print(f'  Error filename task suffix!\n\t{filename_noExt} should contain one of these task suffixes: {task_suffixes}.')
    else:
      print(f'  Error filename language suffix!\n\t{filename_noExt} should end with one of these language suffixes: {languages}.')
  else:
    print(f'  Error filename extension!\n\t{filename} should have one of these extensions (according to task): {extensions}.')

for path_submission in paths_submissions:
  # We should receive "path_submission" an "sys_output"
  sys_output = codecs.open(path_submission, 'r', 'utf-8')

  check_GEM_submissions(path_submission, sys_output)



In [None]:
#@title 3- Empty "out" folder if needed
import shutil

def clear_files(folder):
  "Function to clear files from a folder."
  if os.path.exists(folder) and os.path.isdir(folder):
    for filename in os.listdir(folder):
      file_path = os.path.join(folder, filename)
      try:
        if os.path.isfile(file_path) or os.path.islink(file_path):
          os.unlink(file_path)
        elif os.path.isdir(file_path):
          shutil.rmtree(file_path)
      except Exception as e:
        print('Failed to delete %s. Reason: %s' % (file_path, e))

clear_files(out_folder)