* **where to run**: thesis-remote (Workspace) [SSH: login*.pegasus.kl.dfki.de]
* **kernel**: appraise-env (Python 3.10.6)
* **directory**: `~/Appraise`
* Appraise server (`manage.py runserver`) has to be running simultaneously

In [2]:
import sys
sys.path.append("..")
sys.path.append("../experiments")

from experiments.constants import * 
from experiments.utils import *

from random import randint
from collections import OrderedDict
import json

In [3]:
# SRC, TGT = SPANISH, BASQUE
SRC, TGT = ENGLISH, MALTESE

LP_FOLDER = f"/netscratch/falcao/data/{SRC}-{TGT}/eval-set"
print("lp folder:", LP_FOLDER)

lp folder: /netscratch/falcao/data/en-mt/eval-set


## Create batches

In [10]:
# configurations
_task_definition = OrderedDict({
    "TGT": 80,
    "CHK": 0,
    "REF": 10,
    "BAD": 10,
})

BATCHES_FILENAME = f"batches.{SRC}-{TGT}.json"

In [11]:
TASK_DEFINITION = ":".join(map(str, _task_definition.values()))
SRC_FILE = f"{LP_FOLDER}/src.{SRC}"
REF_FILE = f"{LP_FOLDER}/ref.{TGT}"
SYSTEMS_FOLDER = f"{LP_FOLDER}/systems/"
BATCHES_PATH = f"{LP_FOLDER}/{BATCHES_FILENAME}"

print("src file:", SRC_FILE)
print("ref file:", REF_FILE)
print("/systems:", os.listdir(SYSTEMS_FOLDER))

src file: /netscratch/falcao/data/en-mt/eval-set/src.en
ref file: /netscratch/falcao/data/en-mt/eval-set/ref.mt
/systems: ['etranslate.mt', 'nllb.mt', 'kurt.mt']


In [28]:
# call the command from bash but using variables set in Python
! python manage.py CreateDirectAssessmentData \
    100 \
    $SRC.code3 \
    $TGT.code3 \
    $LP_FOLDER/src.$SRC \
    $LP_FOLDER/ref.$TGT \
    $SYSTEMS_FOLDER \
    $BATCHES_PATH \
    --task-definition $TASK_DEFINITION \
    --required-annotations 3 \
    --source-based \
    --all-batches

Using task definition: (80, 0, 10, 10)
Loaded 400 source segments
Loaded 400 reference segments
character_based = False
Loaded 400 system nllb.mt segments
Loaded 400 system etranslate.mt segments
Loaded 400 system kurt.mt segments
Creating /netscratch/falcao/data/en-mt/eval-set/batches.en-mt.json.segments ... OK
Missing items is 80/80/1200
Added 80 missing items rotating keys
Total number of batches is 16
0 10 10
chk_items: 0
ref_items: 10
bad_items: 10
chk_ids: []
ref_ids: [48, 18, 10, 44, 20, 22, 0, 38, 46, 24]
bad_ids: [9, 27, 31, 39, 13, 37, 19, 6, 15, 45]
empty_slots [51, 52, 53, 54, 55, 57, 58, 61, 62, 64, 66, 67, 71, 73, 75, 76, 78, 79, 80, 82, 83, 84, 85, 86, 90, 91, 92, 93, 97, 99]
len(batch_items): 100
len(batch_items) == None: 0
0 10 10
chk_items: 0
ref_items: 10
bad_items: 10
chk_ids: []
ref_ids: [20, 48, 15, 11, 26, 32, 36, 29, 28, 5]
bad_ids: [49, 38, 10, 42, 21, 14, 7, 45, 44, 3]
empty_slots [50, 51, 52, 54, 56, 58, 59, 62, 63, 66, 67, 68, 69, 72, 73, 74, 75, 77, 80, 81,

## Create campaign

In [15]:
# configuration
CAMPAIGN_NAME = "testcampaignafter"

In [16]:
manifest = {
    "CAMPAIGN_URL": "http://127.0.0.1:8000/dashboard/sso/",
    "CAMPAIGN_NAME": CAMPAIGN_NAME,
    "CAMPAIGN_KEY": CAMPAIGN_NAME,
    "CAMPAIGN_NO": randint(0,100),
    "REDUNDANCY": 1,

    "TASKS_TO_ANNOTATORS": [
        [ SRC.code3, TGT.code3, "uniform", 1, 1 ]
    ]
}


In [17]:
assert type(manifest["TASKS_TO_ANNOTATORS"]) == list and type(manifest["TASKS_TO_ANNOTATORS"][0]) == list

In [18]:
MANIFEST_PATH = f"{LP_FOLDER}/manifest.json"

with open(MANIFEST_PATH, mode="w+") as f:
    json.dump(manifest, f, indent=4)

In [19]:
! python manage.py StartNewCampaign \
    $MANIFEST_PATH \
    --batches-json $BATCHES_PATH

JSON manifest path: '/netscratch/falcao/data/en-mt/eval-set/manifest.json'
CSV output path: None
Excel output path: None
No task type found in the manifest file, assuming it is "Direct". If this is incorrect, define "TASK_TYPE" in the manifest file.
### Running InitCampaign
All languages: [('eng', 'mlt')]
Identified superuser: falcao
Processed Market/Metadata instances
### Creating a new campaign
- '/netscratch/falcao/data/en-mt/eval-set/batches.en-mt.json'
Batch: /netscratch/falcao/data/en-mt/eval-set/batches.en-mt.json
  Market: eng_mlt_testcampaignafter
  Metadata: eng->mlt/testcampaignafter["1.0"]
Uploaded file name: Batches/batches.en-mt_jtKAkcI.json
Campaign name: testcampaignafter
### Running validatecampaigndata
Campaign name: testcampaignafter
Batch name: Batches/batches.en-mt_jtKAkcI.json
Validated 1 batches
### Running ProcessCampaignData
Batches/batches.en-mt_jtKAkcI.json 1
7 kurt.mt
198 b"il-kors inkluda valutazzjoni ta' elementi kwalitattivi g\xc4\xa7al kull modulu, b\xc4