# Random-Baseline model from ACL'22

In [None]:
import os
import pandas as pd
from typing import List, Dict, Tuple
import random

runs_as_inference_server = os.environ.get('TIRA_INFERENCE_SERVER', None) is not None
dataset_dir = os.environ.get('TIRA_INPUT_DIRECTORY', './dataset')
output_dir = os.environ.get('TIRA_OUTPUT_DIRECTORY', './output')

## Setup

In [None]:
values = [ "Self-direction: thought", "Self-direction: action", "Stimulation", "Hedonism", "Achievement", "Power: dominance", "Power: resources", "Face", "Security: personal", "Security: societal", "Tradition", "Conformity: rules", "Conformity: interpersonal", "Humility", "Benevolence: caring", "Benevolence: dependability", "Universalism: concern", "Universalism: nature", "Universalism: tolerance" ]
probabilities_resorted = [ 0.17, 0.26, 0.06, 0.04, 0.27, 0.09, 0.11, 0.07, 0.38, 0.31, 0.11, 0.23, 0.04, 0.08, 0.29, 0.15, 0.38, 0.07, 0.14 ]
probabilities_attained = [ 0.5 for _ in range(len(values)) ]

## Predict function

In [None]:
def choose_values_by_probability(prob_resort: float, prob_attained: float) -> Tuple[float, float]:
    if random.random() > prob_resort:
        return 0.0, 0.0
    if random.random() > prob_attained:
        return 0.0, 1.0
    return 1.0, 0.0

In [None]:
# Compatibility function for running as inference server
def predict(input_list: List) -> List[Dict]:
    if not runs_as_inference_server:
        print(f'Labeling {len(input_list)} instances')

    # For instance in input_list:
    # - The textual sentence is given by instance['Text']
    # - Give for each of the 19 values a confidence for 'attained', 'constrined', and 'none'.
    #   As these confidences have to add up to 1.0, the value for 'none' can be omitted
    return [
        {values[i] + k: v for i in range(len(values)) for k, v in zip((' attained', ' constrained'), choose_values_by_probability(probabilities_resorted[i], probabilities_attained[i]))} for _ in input_list
    ]

## Classification on TIRA

In [None]:
# "instance" is a dict with keys "Text-ID", "Sentence", and "Text"
def labelInstances(instances: List[Dict]):
   predictions = [{"Text-ID": instance["Text-ID"], "Sentence": instance["Sentence"], **labels} for instance, labels in zip(instances, predict(instances))]
   return predictions

In [None]:
def writeRun(labels, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    df = pd.DataFrame.from_dict(labels)

    print("Writing run file")
    output_file = os.path.join(output_dir, "predictions.tsv")
    df.to_csv(output_file, header=True, index=False, sep='\t')

In [None]:
if not runs_as_inference_server:
    input_file = os.path.join(dataset_dir, "sentences.tsv")
    writeRun(labelInstances(pd.read_csv(input_file, sep='\t', header=0, index_col=None).to_dict('records')), output_dir)