# Random-Baseline model from ACL'22

In [None]:
import os
import csv
import pandas as pd
from typing import List, Dict
import random

runs_as_inference_server = os.environ.get('TIRA_INFERENCE_SERVER', None) is not None
dataset_dir = os.environ.get('TIRA_INPUT_DIRECTORY', './dataset')
output_dir = os.environ.get('TIRA_OUTPUT_DIRECTORY', './output')

## Setup

In [None]:
active_subtask = 1  # either 1 or 2

In [None]:
values = [ "Self-direction: thought", "Self-direction: action", "Stimulation", "Hedonism", "Achievement", "Power: dominance", "Power: resources", "Face", "Security: personal", "Security: societal", "Tradition", "Conformity: rules", "Conformity: interpersonal", "Humility", "Benevolence: caring", "Benevolence: dependability", "Universalism: concern", "Universalism: nature", "Universalism: tolerance" ]
probabilities_resorted = [ 0.17, 0.26, 0.06, 0.04, 0.27, 0.09, 0.11, 0.07, 0.38, 0.31, 0.11, 0.23, 0.04, 0.08, 0.29, 0.15, 0.38, 0.07, 0.14 ]
probabilities_attained = [ 0.5 for _ in range(len(values)) ]

## Predict function

In [None]:
def predict_subtask_1(input_list: List) -> List[Dict]:
    return [{values[i]: 'attained' if random.random() <= probabilities_resorted[i] else 'none' for i in range(len(values))} for _ in input_list]

In [None]:
def predict_subtask_2(input_list: List) -> List[Dict]:
    return [{values[i]: 'attained' if random.random() <= probabilities_attained[i] else 'constrained' for i in range(len(values))} for _ in input_list]

In [None]:
# Compatibility function for running as inference server
def predict(input_list: List) -> List[Dict]:
    if not runs_as_inference_server:
        print(f'Labeling {len(input_list)} instances under subtask {active_subtask}')

    return predict_subtask_1(input_list) if active_subtask == 1 else predict_subtask_2(input_list)

## Classification on TIRA

In [None]:
# "instance" is a dict with keys "Text-ID", "Sentence", and "Text"
def labelInstances(instances: List[Dict]):
   predictions = [{"Text-ID": instance["Text-ID"], "Sentence": instance["Sentence"], **labels} for instance, labels in zip(instances, predict(instances))]
   return predictions

In [None]:
def writeRun(labels, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    field_names = [ "Text-ID", "Sentence" ] + values

    print("Writing run file")
    output_file = os.path.join(output_dir, "predictions.tsv")

    with open(output_file, "w") as runFile:
        writer = csv.DictWriter(runFile, fieldnames = field_names, delimiter = "\t")
        writer.writeheader()
        for row in labels:
            writer.writerow(row)

In [None]:
if not runs_as_inference_server:
    input_file = os.path.join(dataset_dir, "sentences.tsv")
    writeRun(labelInstances(pd.read_csv(input_file, sep='\t', header=0, index_col=None).to_dict('records')), output_dir)