In [8]:
import os
import json
import csv

In [9]:
experiments_folder = '../data'
utils_folder = '../../utils'

In [10]:
axes = ['region', 'religion']

### Compiling the results for each of the experiments

In [4]:
subcategory_map = {
    'professions': 'professions',
    'subjects_of_study': 'subjects',
    'action_verbs': 'verbs',
    'behaviour_adjectives': 'adjectives',
    'socio_economic_status_adjectives': 'socio_economic_status',
    'food_habits_adjectives': 'food',
    'clothing_preferences_adjectives': 'clothes',
    'general': 'general'
}

In [6]:
for experiment in os.listdir(experiments_folder):
    print(f'Processing {experiment}')
    os.makedirs(f'{experiments_folder}/{experiment}/results', exist_ok=True)

    for axis in axes:
        file_path = f'{experiments_folder}/{experiment}/{axis}_results.json'
        with open(file_path, 'r') as f:
            data = json.load(f)

        results = {}
        scores = {}

        for id_term in data:
            results[id_term] = {}
            scores[id_term] = {}
            for subcategory in data[id_term]:
                subcategory_term = subcategory_map[subcategory]

                results[id_term][subcategory_term] = {}
                scores[id_term][subcategory_term] = {}
                for datum in data[id_term][subcategory]:
                    for top_result in datum:
                        token = top_result['token_str'].lower()
                        score = top_result['score']

                        if token not in results[id_term][subcategory_term]:
                            results[id_term][subcategory_term][token] = 0
                            scores[id_term][subcategory_term][token] = 0

                        results[id_term][subcategory_term][token] += 1
                        scores[id_term][subcategory_term][token] += score

        # this finds the average score for each token in each subcategory
        for id_term in results:
            for subcategory in results[id_term]:
                for token in results[id_term][subcategory]:
                    scores[id_term][subcategory][token] /= results[id_term][subcategory][token]

        for id_term in results:
            for subcategory in results[id_term]:
                results[id_term][subcategory] = {k: v for k, v in sorted(results[id_term][subcategory].items(), key=lambda item: item[1], reverse=True)}
                scores[id_term][subcategory] = {k: v for k, v in sorted(scores[id_term][subcategory].items(), key=lambda item: item[1], reverse=True)}

        with open(f'{experiments_folder}/{experiment}/results/{axis}_results.json', 'w') as f:
            json.dump(results, f, indent=4)
        with open(f'{experiments_folder}/{experiment}/results/scores_{axis}_results.json', 'w') as f:
            json.dump(scores, f, indent=4)

Processing negative_framing
Processing positive_framing
Processing adv_inc
Processing adv_perspective_shift
Processing adv_dec
Processing adv_future
Processing vanilla
Processing adv_present
Processing neutral_framing
Processing adv_past


### Calculating these scores for all the experiments

In [15]:
token_annotations = {}

for axis in axes:
    token_annotations[axis] = json.load(open(f'{utils_folder}/all_{axis}_data.json'))

In [20]:
for experiment in os.listdir(experiments_folder):
    print(f'Processing {experiment}')
    os.makedirs(f'{experiments_folder}/{experiment}/final_results', exist_ok=True)

    for axis in axes:
        file_path = f'{experiments_folder}/{experiment}/results/{axis}_results.json'

        results_data = json.load(open(file_path, 'r'))

        stereotype_scores = {}
        stereotype_tokens = {}

        for id_term in token_annotations[axis]:
            stereotype_scores[id_term] = {}
            stereotype_tokens[id_term] = {}
            for token_data in token_annotations[axis][id_term]:
                token = token_data['token']
                annotation = token_data['annotation']
                stereotype = token_data['stereotype']
                annotation_confidence = token_data['annotation_confidence']

                try:
                    if stereotype and token in results_data[id_term][annotation]:
                        if annotation not in stereotype_scores[id_term]:
                            stereotype_scores[id_term][annotation] = 0
                            stereotype_tokens[id_term][annotation] = []
                        stereotype_scores[id_term][annotation] += results_data[id_term][annotation][token] * annotation_confidence
                        stereotype_tokens[id_term][annotation].append(token)
                except Exception as e:
                    print(f'Exception occurred for experiment {experiment}, axis {axis}, id_term {id_term}, token {token} : {e}')
            
            with open(f'{experiments_folder}/{experiment}/final_results/{axis}_results.json', 'w') as f:
                json.dump(stereotype_scores, f, indent=4)
            with open(f'{experiments_folder}/{experiment}/final_results/stereotype_tokens_{axis}_results.json', 'w') as f:
                json.dump(stereotype_tokens, f, indent=4)

Processing negative_framing
Exception occurred for experiment negative_framing, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing positive_framing
Exception occurred for experiment positive_framing, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing adv_inc
Exception occurred for experiment adv_inc, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing adv_perspective_shift
Exception occurred for experiment adv_perspective_shift, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing adv_dec
Exception occurred for experiment adv_dec, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing adv_future
Exception occurred for experiment adv_future, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing vanilla
Exception occurred for experiment vanilla, axis religion, id_term Muslim, token terrorist : 'offensive_terms'
Processing adv_present
Exce