In [16]:
import json
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [11]:
experiments_folder = '../data'
utils_folder = '../../utils'
results_folder = '../results'

In [6]:
axes = ['region', 'religion']

In [7]:
experiment_sets = {
    'original': ['vanilla'],
    'framing': ['neutral_framing', 'positive_framing', 'negative_framing'],
    'temporal': ['adv_past', 'adv_present', 'adv_future'],
    'perspective': ['adv_perspective_shift'],
    'quantifier': ['adv_inc', 'adv_dec']
}

### Getting results for each experiment set and storing it

Here I am making final json files for each experiment set. This is to ensure that it can be accessed easily later. 

_Note: I had earlier done this with the help of Gemini to get the markdown tables, after the initial preprocessing step which was completely same. Then I realised I an just leverage pandas and it's amazingness._

In [14]:
for axis in axes:
    for experiment_set in experiment_sets:
        results = {}
        for experiment in experiment_sets[experiment_set]:
            data = json.load(open(f'{experiments_folder}/{experiment}/final_results/stereotypes/{axis}_tokens.json'))
            for id_term in data:
                if data[id_term]:
                    if id_term not in results:
                        results[id_term] = {}
                    results[id_term][experiment] = []
                    for subcategory in data[id_term]:
                        results[id_term][experiment].extend(data[id_term][subcategory])
        
        results_df = pd.DataFrame.from_dict(results, orient='index')
        results_df.index.name = 'id_term'
        os.makedirs(f'{results_folder}/{axis}/stereotype_csv', exist_ok=True)
        results_df.to_csv(f'{results_folder}/{axis}/stereotype_csv/{experiment_set}.csv')

### Calculating cumulative "stereotype scores" and plotting graphs for each experiment set

In [18]:
for axis in axes:
    for experiment_set in experiment_sets:
        results = {}
        for experiment in experiment_sets[experiment_set]:
            data = json.load(open(f'{experiments_folder}/{experiment}/final_results/stereotypes/{axis}_token_scores.json'))
            for id_term in data:
                if data[id_term]:
                    if id_term not in results:
                        results[id_term] = {}
                    results[id_term][experiment] = 0
                    for subcategory in data[id_term]:
                        results[id_term][experiment] += data[id_term][subcategory]

        results_df = pd.DataFrame.from_dict(results, orient='index')
        results_df.index.name = 'id_term'
        os.makedirs(f'{results_folder}/{axis}/stereotype_scores_csv', exist_ok=True)
        results_df.to_csv(f'{results_folder}/{axis}/stereotype_scores_csv/{experiment_set}.csv')

        # plotting the results
        results_df = results_df.reset_index()
        results_df = results_df.melt(id_vars='id_term', var_name='experiment', value_name='score')
        fig = px.bar(results_df, x='id_term', y='score', color='experiment', barmode='group')
        fig.update_layout(title=f'{axis} stereotypes scores for {experiment_set} experiments')
        # save the plot
        os.makedirs(f'{results_folder}/{axis}/stereotype_scores_plots', exist_ok=True)
        fig.write_image(f'{results_folder}/{axis}/stereotype_scores_plots/{experiment_set}.png')
        fig.write_html(f'{results_folder}/{axis}/stereotype_scores_plots/{experiment_set}.html')

### Doing the above two things for non stereotypes

In [19]:
for axis in axes:
    for experiment_set in experiment_sets:
        results = {}
        for experiment in experiment_sets[experiment_set]:
            data = json.load(open(f'{experiments_folder}/{experiment}/final_results/non_stereotypes/{axis}_tokens.json'))
            for id_term in data:
                if data[id_term]:
                    if id_term not in results:
                        results[id_term] = {}
                    results[id_term][experiment] = []
                    for subcategory in data[id_term]:
                        results[id_term][experiment].extend(data[id_term][subcategory])
        
        results_df = pd.DataFrame.from_dict(results, orient='index')
        results_df.index.name = 'id_term'
        os.makedirs(f'{results_folder}/{axis}/non_stereotype_csv', exist_ok=True)
        results_df.to_csv(f'{results_folder}/{axis}/non_stereotype_csv/{experiment_set}.csv')

In [20]:
for axis in axes:
    for experiment_set in experiment_sets:
        results = {}
        for experiment in experiment_sets[experiment_set]:
            data = json.load(open(f'{experiments_folder}/{experiment}/final_results/non_stereotypes/{axis}_token_scores.json'))
            for id_term in data:
                if data[id_term]:
                    if id_term not in results:
                        results[id_term] = {}
                    results[id_term][experiment] = 0
                    for subcategory in data[id_term]:
                        results[id_term][experiment] += data[id_term][subcategory]

        results_df = pd.DataFrame.from_dict(results, orient='index')
        results_df.index.name = 'id_term'
        os.makedirs(f'{results_folder}/{axis}/non_stereotype_scores_csv', exist_ok=True)
        results_df.to_csv(f'{results_folder}/{axis}/non_stereotype_scores_csv/{experiment_set}.csv')

        # plotting the results
        results_df = results_df.reset_index()
        results_df = results_df.melt(id_vars='id_term', var_name='experiment', value_name='score')
        fig = px.bar(results_df, x='id_term', y='score', color='experiment', barmode='group')
        fig.update_layout(title=f'{axis} non stereotype scores for {experiment_set} experiments')
        # save the plot
        os.makedirs(f'{results_folder}/{axis}/non_stereotype_scores_plots', exist_ok=True)
        fig.write_image(f'{results_folder}/{axis}/non_stereotype_scores_plots/{experiment_set}.png')
        fig.write_html(f'{results_folder}/{axis}/non_stereotype_scores_plots/{experiment_set}.html')

### Finding the tokens which are least likely to be generated by the model

- Find the frequency of each of the tokens 
- Find the least frequent tokens 
- See which identity terms those tokens are being linked to

In [59]:
for axis in axes:
    for experiment_set in ['original']:
        results = {}
        data = pd.read_csv(f'{results_folder}/{axis}/non_stereotype_csv/{experiment_set}.csv')
        for row in data.iterrows():
            token_list = row[1].values[1:]
            token_list = token_list[0][1:-1].split(', ')
            for token in token_list:
                if token not in results:
                    results[token] = 0
                results[token] += 1

        results_df = pd.DataFrame.from_dict(results, orient='index')
        results_df.index.name = 'id_term'
        results_df.columns = ['count']
        results_df = results_df.sort_values(by='count', ascending=False)
        results_df = results_df.reset_index()

        top_5 = results_df.head(5)
        bottom_5 = results_df.tail(5)

        top_tokens_map = {}
        # find identity terms which do not have the top 5 non-stereotypes
        for row in data.iterrows():
            id_term = row[1].values[0]
            token_list = row[1].values[1:]
            token_list = token_list[0][1:-1].split(', ')
            for token in top_5['id_term'].values:
                if token not in token_list:
                    if token not in top_tokens_map:
                        top_tokens_map[token] = []
                    top_tokens_map[token].append(id_term)
        for token in top_tokens_map:
            top_tokens_map[token] = ', '.join(top_tokens_map[token])
        top_tokens_map = pd.DataFrame.from_dict(top_tokens_map, orient='index')
        top_tokens_map.index.name = 'token'
        top_tokens_map.columns = ['id_terms']
        print(f'{axis} {experiment_set} top 5 non-stereotypes')
        print(top_tokens_map.to_markdown())

        bottom_tokens_map = {}
        # find identity terms which have the bottom 5 non-stereotypes
        for row in data.iterrows():
            id_term = row[1].values[0]
            token_list = row[1].values[1:]
            token_list = token_list[0][1:-1].split(', ')
            for token in bottom_5['id_term'].values:
                if token in token_list:
                    if token not in bottom_tokens_map:
                        bottom_tokens_map[token] = []
                    bottom_tokens_map[token].append(id_term)
        for token in bottom_tokens_map:
            bottom_tokens_map[token] = ', '.join(bottom_tokens_map[token])
        bottom_tokens_map = pd.DataFrame.from_dict(bottom_tokens_map, orient='index')
        bottom_tokens_map.index.name = 'token'
        bottom_tokens_map.columns = ['id_terms']
        print(f'{axis} {experiment_set} bottom 5 non-stereotypes')
        print(bottom_tokens_map.to_markdown())

region original top 5 non-stereotypes
| token        | id_terms                                               |
|:-------------|:-------------------------------------------------------|
| 'science'    | Gujarati, Tamilian                                     |
| 'writer'     | Himachali                                              |
| 'journalist' | Himachali, Telugu                                      |
| 'history'    | Himachali, Madhya pradeshi, Rajasthani, Uttar pradeshi |
| 'teacher'    | Tripuri                                                |
region original bottom 5 non-stereotypes
| token      | id_terms                                                  |
|:-----------|:----------------------------------------------------------|
| 'musician' | Goan, Kannadiga, Tripuri                                  |
| 'painter'  | Goan, Rajasthani                                          |
| 'poor'     | Jharkhandi, Madhya pradeshi, Uttar pradeshi, Uttarakhandi |
| 'artist'   | Tripuri      