In [None]:
import os
import csv
from typing import List
import random

runs_as_inference_server = os.environ.get('TIRA_INFERENCE_SERVER', None) is not None
dataset_dir = os.environ.get('TIRA_INPUT_DIRECTORY', None)
output_dir = os.environ.get('TIRA_OUTPUT_DIRECTORY', None)

In [None]:
values = [ "Self-direction: thought", "Self-direction: action", "Stimulation", "Hedonism", "Achievement", "Power: dominance", "Power: resources", "Face", "Security: personal", "Security: societal", "Tradition", "Conformity: rules", "Conformity: interpersonal", "Humility", "Benevolence: caring", "Benevolence: dependability", "Universalism: concern", "Universalism: nature", "Universalism: tolerance", "Universalism: objectivity" ]
probabilities = [ 0.17, 0.26, 0.06, 0.04, 0.27, 0.09, 0.11, 0.07, 0.38, 0.31, 0.11, 0.23, 0.04, 0.08, 0.29, 0.15, 0.38, 0.07, 0.14, 0.18 ]

In [None]:
def predict(input_list: List) -> List:
    return [[values[i] for i in range(len(values)) if random.random() <= probabilities[i]] for _ in input_list]

In [None]:
# "instance" is a dict with keys "Argument ID", "Conclusion", "Stance", and "Premise"
# return value is the list of detected values (here: use all)
def labelInstance(instance):
    return predict([instance])[0]

In [None]:
# generic code for reading and writing

def readInstances(directory):
    instances = []
    for instancesBaseName in os.listdir(directory):
        if instancesBaseName.startswith("arguments") and instancesBaseName.endswith(".tsv"):
            instancesFileName = os.path.join(directory, instancesBaseName)
            with open(instancesFileName, "r", newline='') as instancesFile:
                print("Reading " + instancesFileName)
                reader = csv.DictReader(instancesFile, delimiter = "\t")
                for fieldName in ["Argument ID", "Conclusion", "Stance", "Premise"]:
                    if fieldName not in reader.fieldnames:
                        print("Skipping file " + instancesFileName + " due to missing field '" + fieldName + "'")
                        continue
                for row in reader:
                    instances.append(row)
    return instances

In [None]:
def labelInstances(instances):
    print("Labeling " + str(len(instances)) + " instances")
    labels = {}
    for instance in instances:
        labels[instance["Argument ID"]] = labelInstance(instance)
    return labels

In [None]:
def writeRun(labels, outputDataset):
    if not os.path.exists(outputDataset):
        os.makedirs(outputDataset)

    usedValues = set()
    for instanceValues in labels.values():
        usedValues.update(instanceValues)

    for usedValue in usedValues:
        if usedValue not in values:
            print("Unknown value: '" + usedValue + "'")
            exit(1)

    print("Detected values: " + str(usedValues))

    fieldNames = [ "Argument ID" ]
    for value in values:
        if value in usedValues:
            fieldNames.append(value)

    print("Writing run file")
    with open(os.path.join(outputDataset, "run.tsv"), "w") as runFile:
        writer = csv.DictWriter(runFile, fieldnames = fieldNames, delimiter = "\t")
        writer.writeheader()
        for (argumentId, instanceValues) in labels.items():
            row = { "Argument ID": argumentId }
            for value in usedValues:
                if value in instanceValues:
                    row[value] = "1"
                else:
                    row[value] = "0"
            writer.writerow(row)

In [None]:
if not runs_as_inference_server:
    writeRun(labelInstances(readInstances(dataset_dir)), output_dir)