In [1]:
# Copyright (c) 2021 Maryan Rizinski, Kostadin Mishev, Hristijan Peshov
 
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this library except in compliance with the License.
# You may obtain a copy of the License at
 
# [www.apache.org/licenses/LICENSE-2.0](http://www.apache.org/licenses/LICENSE-2.0)
 
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.

# 1. Using transformers to calculate cosine similarity between ethical principles

In [None]:
cd /content/drive/MyDrive/MLFinanceEthics/results

In [None]:
mkdir results

In [None]:
!ls

In [5]:
import pandas as pd
from itertools import product

## Read dataset

In [6]:
# Retrieve long and short definitions of ethical principles in finance and machine learning
fin_dataset = pd.read_excel("dataset_ethics.xlsx",sheet_name='Finance')
ml_dataset = pd.read_excel("dataset_ethics.xlsx",sheet_name='ML')

In [7]:
# Print the ethical principles of machine learning
ml_dataset.head()

Unnamed: 0,Principle,Long definition,Short definition
0,"Inclusive growth, sustainable development and ...",This principle states that AI should be develo...,Trustworthy AI should contribute to overall gr...
1,Human-centered values and fairness,"Based on this principle, AI should be develope...",AI systems should be designed in a way that re...
2,Transparency and explainability,Transparency defined in this principle has two...,Transparent and responsible disclosure around ...
3,"Robustness, security and safety",This principle states that AI systems must be ...,"AI systems must function in a robust, secure a..."
4,Accountability,"\nAccording to this principle, organisations a...","Organisations and individuals developing, depl..."


In [8]:
# Print the ethical principles of finance
fin_dataset.head()

Unnamed: 0,Principle,Long definition,Short definition
0,Integrity,Acting with integrity is one of the main princ...,"Moral self-governance, autonomy, trustworthine..."
1,Objectivity,Objectivity is ground on the subordination of ...,Protecting and advancing the interests of clie...
2,Competence,Professionals are obligated by to maintain the...,Rendering competent financial services to clie...
3,Fairness,The principle of fairness is an integral part ...,"Treating customers equitably, consistently app..."
4,Confidentiality,Confidentiality is the obligation to hold clie...,"Handling client relationships with confidence,..."


In [9]:
ds_principles = pd.DataFrame(list(product(fin_dataset['Principle'], ml_dataset['Principle'])),columns=["Finance","ML"])
ds_short_defs = pd.DataFrame(list(product(fin_dataset['Short definition'], ml_dataset['Short definition'])),columns=["Finance","ML"])
ds_long_defs = pd.DataFrame(list(product(fin_dataset['Long definition'], ml_dataset['Long definition'])),columns=["Finance","ML"])

In [None]:
!pip install transformers
!pip install sentence-transformers

In [11]:
from sentence_transformers import SentenceTransformer, util
import numpy as np

## Define transformer models

In [13]:
# Define transformer models that will be used in the experiment
models=['sentence-transformers/LaBSE','sentence-transformers/allenai-specter',
        'sentence-transformers/average_word_embeddings_glove.6B.300d','sentence-transformers/average_word_embeddings_glove.840B.300d',
        'sentence-transformers/average_word_embeddings_komninos','sentence-transformers/average_word_embeddings_levy_dependency',
        'sentence-transformers/bert-base-nli-cls-token','sentence-transformers/bert-base-nli-max-tokens',
        'sentence-transformers/bert-base-nli-mean-tokens','sentence-transformers/bert-base-nli-stsb-mean-tokens',
        'sentence-transformers/bert-base-wikipedia-sections-mean-tokens','sentence-transformers/bert-large-nli-cls-token',
        'sentence-transformers/bert-large-nli-max-tokens','sentence-transformers/bert-large-nli-mean-tokens',
        'sentence-transformers/bert-large-nli-stsb-mean-tokens','sentence-transformers/bert-large-nli-mean-tokens',
        'sentence-transformers/distilbert-base-nli-max-tokens',
        'sentence-transformers/distilbert-base-nli-mean-tokens','sentence-transformers/distilbert-base-nli-stsb-mean-tokens',
        'sentence-transformers/distilbert-base-nli-stsb-quora-ranking','sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking',
        'sentence-transformers/distilroberta-base-msmarco-v1','sentence-transformers/distilroberta-base-msmarco-v2',
        'sentence-transformers/distilroberta-base-paraphrase-v1','sentence-transformers/distiluse-base-multilingual-cased-v1',
        'sentence-transformers/distiluse-base-multilingual-cased-v2','sentence-transformers/distiluse-base-multilingual-cased',
        'sentence-transformers/facebook-dpr-ctx_encoder-multiset-base','sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base',
        'sentence-transformers/facebook-dpr-question_encoder-multiset-base','sentence-transformers/facebook-dpr-question_encoder-single-nq-base',
        'sentence-transformers/msmarco-MiniLM-L-12-v3','sentence-transformers/msmarco-MiniLM-L-6-v3',
        'sentence-transformers/msmarco-distilbert-base-dot-prod-v3','sentence-transformers/msmarco-distilbert-base-tas-b',
        'sentence-transformers/msmarco-distilbert-base-v2','sentence-transformers/msmarco-distilbert-base-v3',
        'sentence-transformers/msmarco-distilbert-base-v4','sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned',
        'sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-trained-scratch','sentence-transformers/msmarco-distilroberta-base-v2',
        'sentence-transformers/msmarco-roberta-base-ance-firstp','sentence-transformers/msmarco-roberta-base-v2',
        'sentence-transformers/msmarco-roberta-base-v3','sentence-transformers/nli-bert-base-cls-pooling',
        'sentence-transformers/nli-bert-base-max-pooling','sentence-transformers/nli-bert-base',
        'sentence-transformers/nli-bert-large-cls-pooling','sentence-transformers/nli-bert-large-max-pooling',
        'sentence-transformers/nli-bert-large','sentence-transformers/nli-distilbert-base-max-pooling',
        'sentence-transformers/nli-distilbert-base','sentence-transformers/nli-distilroberta-base-v2',
        'sentence-transformers/nli-mpnet-base-v2','sentence-transformers/nli-roberta-base-v2',
        'sentence-transformers/nli-roberta-base','sentence-transformers/nli-roberta-large',
        'sentence-transformers/nq-distilbert-base-v1','sentence-transformers/paraphrase-MiniLM-L12-v2',
        'sentence-transformers/paraphrase-MiniLM-L3-v2','sentence-transformers/paraphrase-MiniLM-L6-v2',
        'sentence-transformers/paraphrase-TinyBERT-L6-v2','sentence-transformers/paraphrase-albert-base-v2',
        'sentence-transformers/paraphrase-albert-small-v2','sentence-transformers/paraphrase-distilroberta-base-v1',
        'sentence-transformers/paraphrase-distilroberta-base-v2','sentence-transformers/paraphrase-mpnet-base-v2',
        'sentence-transformers/stsb-bert-base','sentence-transformers/stsb-bert-large',
        'sentence-transformers/stsb-distilbert-base','sentence-transformers/stsb-distilroberta-base-v2',
        'sentence-transformers/stsb-mpnet-base-v2','sentence-transformers/stsb-roberta-base-v2',
        'sentence-transformers/stsb-roberta-base','sentence-transformers/stsb-roberta-base',
        'sentence-transformers/stsb-roberta-large','sentence-transformers/stsb-xlm-r-multilingual',
        'sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens','sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens',
        'sentence-transformers/xlm-r-bert-base-nli-mean-tokens','sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens',
        'sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1',
        'all-mpnet-base-v2']

## Calculate cosine similarity across all transformers

In [14]:
# Calculate cosine similarity between ethical principles across all transformers
def calc_sim(row, model,label):
  sentence1 = row['Finance']
  sentence2 = row['ML']
  embedding1 = model.encode(sentence1, convert_to_tensor=True)
  embedding2 = model.encode(sentence2, convert_to_tensor=True)
  cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)
  row[label]=cosine_scores.item()
  return row;

In [None]:
# Perform the experiment
for model_name in models:
  print("MODEL NAME:"+model_name)
  model = SentenceTransformer(model_name)
  ds_principles = ds_principles.apply(lambda row: calc_sim(row,model,model_name),axis=1)
  ds_short_defs = ds_short_defs.apply(lambda row: calc_sim(row,model,model_name),axis=1)
  ds_long_defs = ds_long_defs.apply(lambda row: calc_sim(row,model,model_name),axis=1)
  ds_principles.to_pickle("./results/ds_principles.pickle")
  ds_short_defs.to_pickle("./results/ds_short_defs.pickle")
  ds_long_defs.to_pickle("./results/ds_long_defs.pickle")

In [16]:
ds_principles = ds_principles.apply(lambda row: calc_sim(row,model,'stsb-roberta-large'),axis=1)
ds_short_defs = ds_short_defs.apply(lambda row: calc_sim(row,model,'stsb-roberta-large'),axis=1)
ds_long_defs = ds_long_defs.apply(lambda row: calc_sim(row,model,'stsb-roberta-large'),axis=1)

In [17]:
ds_short_defs.head()

Unnamed: 0,Finance,ML,sentence-transformers/LaBSE,sentence-transformers/allenai-specter,sentence-transformers/average_word_embeddings_glove.6B.300d,sentence-transformers/average_word_embeddings_glove.840B.300d,sentence-transformers/average_word_embeddings_komninos,sentence-transformers/average_word_embeddings_levy_dependency,sentence-transformers/bert-base-nli-cls-token,sentence-transformers/bert-base-nli-max-tokens,sentence-transformers/bert-base-nli-mean-tokens,sentence-transformers/bert-base-nli-stsb-mean-tokens,sentence-transformers/bert-base-wikipedia-sections-mean-tokens,sentence-transformers/bert-large-nli-cls-token,sentence-transformers/bert-large-nli-max-tokens,sentence-transformers/bert-large-nli-mean-tokens,sentence-transformers/bert-large-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-max-tokens,sentence-transformers/distilbert-base-nli-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-quora-ranking,sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,sentence-transformers/distilroberta-base-msmarco-v1,sentence-transformers/distilroberta-base-msmarco-v2,sentence-transformers/distilroberta-base-paraphrase-v1,sentence-transformers/distiluse-base-multilingual-cased-v1,sentence-transformers/distiluse-base-multilingual-cased-v2,sentence-transformers/distiluse-base-multilingual-cased,sentence-transformers/facebook-dpr-ctx_encoder-multiset-base,sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base,sentence-transformers/facebook-dpr-question_encoder-multiset-base,sentence-transformers/facebook-dpr-question_encoder-single-nq-base,sentence-transformers/msmarco-MiniLM-L-12-v3,sentence-transformers/msmarco-MiniLM-L-6-v3,sentence-transformers/msmarco-distilbert-base-dot-prod-v3,sentence-transformers/msmarco-distilbert-base-tas-b,sentence-transformers/msmarco-distilbert-base-v2,sentence-transformers/msmarco-distilbert-base-v3,sentence-transformers/msmarco-distilbert-base-v4,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned,...,sentence-transformers/msmarco-roberta-base-v3,sentence-transformers/nli-bert-base-cls-pooling,sentence-transformers/nli-bert-base-max-pooling,sentence-transformers/nli-bert-base,sentence-transformers/nli-bert-large-cls-pooling,sentence-transformers/nli-bert-large-max-pooling,sentence-transformers/nli-bert-large,sentence-transformers/nli-distilbert-base-max-pooling,sentence-transformers/nli-distilbert-base,sentence-transformers/nli-distilroberta-base-v2,sentence-transformers/nli-mpnet-base-v2,sentence-transformers/nli-roberta-base-v2,sentence-transformers/nli-roberta-base,sentence-transformers/nli-roberta-large,sentence-transformers/nq-distilbert-base-v1,sentence-transformers/paraphrase-MiniLM-L12-v2,sentence-transformers/paraphrase-MiniLM-L3-v2,sentence-transformers/paraphrase-MiniLM-L6-v2,sentence-transformers/paraphrase-TinyBERT-L6-v2,sentence-transformers/paraphrase-albert-base-v2,sentence-transformers/paraphrase-albert-small-v2,sentence-transformers/paraphrase-distilroberta-base-v1,sentence-transformers/paraphrase-distilroberta-base-v2,sentence-transformers/paraphrase-mpnet-base-v2,sentence-transformers/stsb-bert-base,sentence-transformers/stsb-bert-large,sentence-transformers/stsb-distilbert-base,sentence-transformers/stsb-distilroberta-base-v2,sentence-transformers/stsb-mpnet-base-v2,sentence-transformers/stsb-roberta-base-v2,sentence-transformers/stsb-roberta-base,sentence-transformers/stsb-roberta-large,sentence-transformers/stsb-xlm-r-multilingual,sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1,all-mpnet-base-v2,stsb-roberta-large
0,"Moral self-governance, autonomy, trustworthine...",Trustworthy AI should contribute to overall gr...,0.313011,0.788788,0.525359,0.571049,0.720031,0.788346,0.641809,0.726695,0.5971,0.537367,0.987444,0.554548,0.669974,0.564894,0.288724,0.653201,0.605308,0.37411,0.395251,0.73863,0.293121,0.235861,0.340354,0.149044,0.171142,0.171142,0.767432,0.701668,0.725447,0.786437,0.362587,0.219561,0.538891,0.793902,0.387327,0.326462,0.363784,0.384942,...,0.133909,0.641809,0.726695,0.5971,0.554548,0.669974,0.564894,0.653201,0.605308,0.314517,0.398265,0.317112,0.556002,0.593743,0.28995,0.391302,0.33266,0.320597,0.303291,0.369229,0.282471,0.340354,0.300089,0.397865,0.537367,0.288724,0.37411,0.241276,0.298516,0.21535,0.458783,0.250536,0.539143,0.610239,0.539143,0.610239,0.539143,0.416844,0.261389,0.261389
1,"Moral self-governance, autonomy, trustworthine...",AI systems should be designed in a way that re...,0.467257,0.782522,0.678282,0.629304,0.748433,0.814385,0.789429,0.825633,0.748991,0.666821,0.98548,0.691178,0.785428,0.712844,0.37282,0.716211,0.708477,0.45445,0.4322,0.76368,0.184442,0.100919,0.411624,0.170563,0.195778,0.195778,0.770675,0.712685,0.770666,0.824064,0.274236,0.265393,0.490012,0.763096,0.343952,0.324036,0.248636,0.332289,...,0.011504,0.789429,0.825633,0.748991,0.691178,0.785428,0.712844,0.716211,0.708477,0.373913,0.463654,0.461325,0.727007,0.71184,0.355073,0.33359,0.366719,0.338117,0.344973,0.329935,0.302253,0.411624,0.29865,0.337958,0.666821,0.37282,0.45445,0.309416,0.419768,0.349737,0.456573,0.373851,0.692615,0.770124,0.692615,0.770124,0.692615,0.443529,0.343338,0.343338
2,"Moral self-governance, autonomy, trustworthine...",Transparent and responsible disclosure around ...,0.247537,0.769409,0.690555,0.606089,0.734101,0.805628,0.640556,0.779143,0.613223,0.477818,0.990017,0.629253,0.715772,0.637625,0.297339,0.658693,0.600226,0.395391,0.461196,0.750607,0.272992,0.36874,0.32655,0.181638,0.187057,0.187057,0.773864,0.715824,0.753906,0.79312,0.288414,0.291715,0.518242,0.793588,0.363083,0.310366,0.306164,0.419778,...,0.166891,0.640556,0.779143,0.613223,0.629253,0.715772,0.637625,0.658693,0.600226,0.407513,0.369859,0.457853,0.58502,0.596017,0.315365,0.283894,0.313246,0.346497,0.327832,0.418126,0.299105,0.32655,0.311737,0.362101,0.477818,0.297339,0.395391,0.367922,0.26738,0.400549,0.264902,0.257002,0.528633,0.638603,0.528633,0.638603,0.528633,0.361073,0.150961,0.150961
3,"Moral self-governance, autonomy, trustworthine...","AI systems must function in a robust, secure a...",0.460776,0.731835,0.541989,0.516778,0.677451,0.766191,0.777567,0.832374,0.69584,0.605844,0.979246,0.661998,0.724271,0.670636,0.371651,0.757499,0.696518,0.429193,0.479754,0.728421,0.201049,0.070818,0.325909,0.194365,0.1857,0.1857,0.763334,0.702749,0.738464,0.81652,0.225992,0.232814,0.461521,0.760147,0.365457,0.283609,0.298121,0.33019,...,0.065247,0.777567,0.832374,0.69584,0.661998,0.724271,0.670636,0.757499,0.696518,0.321908,0.329345,0.346306,0.63205,0.664886,0.367343,0.272017,0.249526,0.272324,0.252359,0.231022,0.20995,0.325909,0.234073,0.247353,0.605844,0.371651,0.429193,0.242168,0.307733,0.255203,0.339592,0.360726,0.607098,0.699053,0.607098,0.699053,0.607098,0.315796,0.192022,0.192022
4,"Moral self-governance, autonomy, trustworthine...","Organisations and individuals developing, depl...",0.394822,0.692191,0.603914,0.575361,0.713081,0.761288,0.719702,0.773742,0.652875,0.578754,0.986165,0.598234,0.708204,0.678364,0.503029,0.715415,0.656294,0.454105,0.483805,0.79773,0.175433,0.183246,0.339291,0.179304,0.22792,0.22792,0.758678,0.710985,0.739951,0.803706,0.234459,0.255711,0.545137,0.780395,0.350394,0.355264,0.305327,0.329521,...,0.186346,0.719702,0.773742,0.652875,0.598234,0.708204,0.678364,0.715415,0.656294,0.429125,0.485314,0.480559,0.595801,0.679975,0.424002,0.391191,0.293866,0.390476,0.346428,0.255765,0.230486,0.339291,0.38008,0.412788,0.578754,0.503029,0.454105,0.394403,0.465263,0.435198,0.375007,0.452732,0.595025,0.655949,0.595025,0.655949,0.595025,0.371844,0.277417,0.277417


## Save Results

In [18]:
import pandas as pd
ds_principles = pd.read_pickle("/content/results/ds_principles.pickle")
ds_long_defs = pd.read_pickle("/content/results/ds_long_defs.pickle")
ds_short_defs = pd.read_pickle("/content/results/ds_short_defs.pickle")

In [19]:
# Save the results
ds_principles.to_excel("ds_principles.xlsx")
ds_long_defs.to_excel("ds_long_defs.xlsx")
ds_short_defs.to_excel("ds_short_defs.xlsx")

# 2. Comparison between ethical principles of finance and machine learning

## Long and short definitions of ethical principles

In [20]:
long_defs = pd.read_excel('/content/ds_long_defs.xlsx')

In [21]:
long_defs.drop(labels=['Unnamed: 0'], axis=1, inplace=True)

In [22]:
long_defs

Unnamed: 0,Finance,ML,sentence-transformers/LaBSE,sentence-transformers/allenai-specter,sentence-transformers/average_word_embeddings_glove.6B.300d,sentence-transformers/average_word_embeddings_glove.840B.300d,sentence-transformers/average_word_embeddings_komninos,sentence-transformers/average_word_embeddings_levy_dependency,sentence-transformers/bert-base-nli-cls-token,sentence-transformers/bert-base-nli-max-tokens,sentence-transformers/bert-base-nli-mean-tokens,sentence-transformers/bert-base-nli-stsb-mean-tokens,sentence-transformers/bert-base-wikipedia-sections-mean-tokens,sentence-transformers/bert-large-nli-cls-token,sentence-transformers/bert-large-nli-max-tokens,sentence-transformers/bert-large-nli-mean-tokens,sentence-transformers/bert-large-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-max-tokens,sentence-transformers/distilbert-base-nli-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-quora-ranking,sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,sentence-transformers/distilroberta-base-msmarco-v1,sentence-transformers/distilroberta-base-msmarco-v2,sentence-transformers/distilroberta-base-paraphrase-v1,sentence-transformers/distiluse-base-multilingual-cased-v1,sentence-transformers/distiluse-base-multilingual-cased-v2,sentence-transformers/distiluse-base-multilingual-cased,sentence-transformers/facebook-dpr-ctx_encoder-multiset-base,sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base,sentence-transformers/facebook-dpr-question_encoder-multiset-base,sentence-transformers/facebook-dpr-question_encoder-single-nq-base,sentence-transformers/msmarco-MiniLM-L-12-v3,sentence-transformers/msmarco-MiniLM-L-6-v3,sentence-transformers/msmarco-distilbert-base-dot-prod-v3,sentence-transformers/msmarco-distilbert-base-tas-b,sentence-transformers/msmarco-distilbert-base-v2,sentence-transformers/msmarco-distilbert-base-v3,sentence-transformers/msmarco-distilbert-base-v4,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned,...,sentence-transformers/msmarco-roberta-base-v2,sentence-transformers/msmarco-roberta-base-v3,sentence-transformers/nli-bert-base-cls-pooling,sentence-transformers/nli-bert-base-max-pooling,sentence-transformers/nli-bert-base,sentence-transformers/nli-bert-large-cls-pooling,sentence-transformers/nli-bert-large-max-pooling,sentence-transformers/nli-bert-large,sentence-transformers/nli-distilbert-base-max-pooling,sentence-transformers/nli-distilbert-base,sentence-transformers/nli-distilroberta-base-v2,sentence-transformers/nli-mpnet-base-v2,sentence-transformers/nli-roberta-base-v2,sentence-transformers/nli-roberta-base,sentence-transformers/nli-roberta-large,sentence-transformers/nq-distilbert-base-v1,sentence-transformers/paraphrase-MiniLM-L12-v2,sentence-transformers/paraphrase-MiniLM-L3-v2,sentence-transformers/paraphrase-MiniLM-L6-v2,sentence-transformers/paraphrase-TinyBERT-L6-v2,sentence-transformers/paraphrase-albert-base-v2,sentence-transformers/paraphrase-albert-small-v2,sentence-transformers/paraphrase-distilroberta-base-v1,sentence-transformers/paraphrase-distilroberta-base-v2,sentence-transformers/paraphrase-mpnet-base-v2,sentence-transformers/stsb-bert-base,sentence-transformers/stsb-bert-large,sentence-transformers/stsb-distilbert-base,sentence-transformers/stsb-distilroberta-base-v2,sentence-transformers/stsb-mpnet-base-v2,sentence-transformers/stsb-roberta-base-v2,sentence-transformers/stsb-roberta-base,sentence-transformers/stsb-roberta-large,sentence-transformers/stsb-xlm-r-multilingual,sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1,all-mpnet-base-v2
0,Acting with integrity is one of the main princ...,This principle states that AI should be develo...,0.387797,0.680879,0.66716,0.801566,0.824382,0.888241,0.612051,0.832134,0.568487,0.437207,0.98359,0.589932,0.732759,0.607855,0.306911,0.771445,0.56832,0.286527,0.317793,0.74054,0.412875,0.436624,0.289736,0.24982,0.219538,0.219538,0.800962,0.783218,0.760515,0.805704,0.259434,0.212309,0.55788,0.799752,0.264852,0.230832,0.331237,0.456326,...,0.229421,0.275544,0.612051,0.832134,0.568487,0.589932,0.732759,0.607855,0.771445,0.56832,0.37448,0.399271,0.323093,0.70781,0.703064,0.41424,0.261045,0.267704,0.225396,0.299763,0.377759,0.252695,0.289736,0.312066,0.404549,0.437207,0.306911,0.286527,0.267032,0.307815,0.306581,0.46878,0.363594,0.413045,0.574273,0.413045,0.574273,0.413045,0.310636,0.400738
1,Acting with integrity is one of the main princ...,"Based on this principle, AI should be develope...",0.518087,0.745447,0.815029,0.876799,0.888299,0.91948,0.730444,0.870336,0.713398,0.620114,0.987712,0.69858,0.802397,0.767844,0.472762,0.836241,0.734272,0.516077,0.452976,0.807475,0.451479,0.440382,0.451179,0.296988,0.307216,0.307216,0.826488,0.812107,0.79034,0.825664,0.31429,0.301594,0.577851,0.815723,0.345322,0.307269,0.37294,0.374687,...,0.297298,0.304509,0.730444,0.870336,0.713398,0.69858,0.802397,0.767844,0.836241,0.734272,0.55932,0.5225,0.522096,0.836918,0.787684,0.485726,0.29728,0.427217,0.313201,0.391703,0.416407,0.387962,0.451179,0.394889,0.399563,0.620114,0.472762,0.516077,0.468546,0.44079,0.487328,0.562,0.520983,0.637795,0.719439,0.637795,0.719439,0.637795,0.466095,0.449621
2,Acting with integrity is one of the main princ...,Transparency defined in this principle has two...,0.377956,0.721431,0.716568,0.808795,0.811049,0.870325,0.580072,0.824998,0.571928,0.454645,0.981701,0.614626,0.772451,0.640742,0.379245,0.767338,0.579511,0.365446,0.300576,0.80177,0.407722,0.347968,0.377308,0.279468,0.324685,0.324685,0.786405,0.766192,0.738988,0.798564,0.302502,0.309135,0.523105,0.823649,0.217625,0.279715,0.15206,0.343326,...,0.275771,0.201596,0.580072,0.824998,0.571928,0.614626,0.772451,0.640742,0.767338,0.579511,0.492605,0.414489,0.432611,0.611698,0.644739,0.40188,0.312559,0.329449,0.308017,0.296618,0.411742,0.319232,0.377308,0.335215,0.327839,0.454645,0.379245,0.365446,0.434331,0.302299,0.376266,0.367677,0.283052,0.454105,0.606151,0.454105,0.606151,0.454105,0.359102,0.328345
3,Acting with integrity is one of the main princ...,This principle states that AI systems must be ...,0.493411,0.734399,0.723158,0.804302,0.822913,0.890237,0.733217,0.861202,0.743436,0.673234,0.986642,0.682535,0.795882,0.732904,0.506182,0.807374,0.678074,0.582924,0.543396,0.79514,0.367761,0.271717,0.433183,0.272005,0.287874,0.287874,0.834686,0.81724,0.807887,0.829477,0.298026,0.231046,0.522587,0.823028,0.301729,0.308602,0.233803,0.409416,...,0.203502,0.170379,0.733217,0.861202,0.743436,0.682535,0.795882,0.732904,0.807374,0.678074,0.482455,0.379897,0.441526,0.812843,0.743688,0.498508,0.302139,0.360286,0.309122,0.348532,0.360586,0.304126,0.433183,0.340385,0.385634,0.673234,0.506182,0.582924,0.455902,0.336424,0.433595,0.576557,0.468268,0.667177,0.750001,0.667177,0.750001,0.667177,0.446957,0.341357
4,Acting with integrity is one of the main princ...,"\nAccording to this principle, organisations a...",0.467611,0.76657,0.8046,0.867506,0.871983,0.897001,0.717485,0.851611,0.752373,0.657306,0.988333,0.7316,0.807958,0.751647,0.588504,0.793959,0.685275,0.502478,0.478264,0.802687,0.47119,0.481674,0.540175,0.329606,0.343724,0.343724,0.85177,0.815104,0.819486,0.848591,0.449724,0.368908,0.679676,0.842299,0.451963,0.418882,0.422799,0.550874,...,0.295836,0.326089,0.717485,0.851611,0.752373,0.7316,0.807958,0.751647,0.793959,0.685275,0.585663,0.531271,0.559919,0.740188,0.72561,0.534901,0.483986,0.434475,0.481189,0.448367,0.487895,0.445518,0.540175,0.463737,0.427151,0.657306,0.588504,0.502478,0.553022,0.468255,0.535214,0.58157,0.449862,0.638409,0.723817,0.638409,0.723817,0.638409,0.553596,0.406102
5,Objectivity is ground on the subordination of ...,This principle states that AI should be develo...,0.41149,0.648432,0.786892,0.872619,0.881102,0.920371,0.493206,0.82224,0.498365,0.39664,0.984469,0.514576,0.754581,0.588276,0.35603,0.732688,0.547229,0.368659,0.307048,0.77686,0.359096,0.321074,0.350701,0.183878,0.174416,0.174416,0.763654,0.740696,0.780312,0.835068,0.280097,0.205984,0.560946,0.777921,0.221136,0.18572,0.162397,0.263838,...,0.233769,0.220143,0.493206,0.82224,0.498365,0.514576,0.754581,0.588276,0.732688,0.547229,0.39694,0.41628,0.329921,0.549918,0.595609,0.251786,0.360677,0.309037,0.308767,0.193862,0.406477,0.299933,0.350701,0.272767,0.368569,0.39664,0.35603,0.368659,0.330451,0.33468,0.279052,0.431146,0.41686,0.44631,0.564351,0.44631,0.564351,0.44631,0.401252,0.306453
6,Objectivity is ground on the subordination of ...,"Based on this principle, AI should be develope...",0.523553,0.724341,0.832492,0.899356,0.888391,0.911564,0.549102,0.839616,0.552966,0.444221,0.984787,0.599553,0.809358,0.674446,0.456044,0.774099,0.623757,0.379541,0.381873,0.822344,0.348008,0.36457,0.41611,0.192155,0.164524,0.164524,0.787651,0.766024,0.795842,0.845375,0.274771,0.307047,0.539588,0.805242,0.292909,0.25579,0.213667,0.318056,...,0.279502,0.255445,0.549102,0.839616,0.552966,0.599553,0.809358,0.674446,0.774099,0.623757,0.530203,0.541435,0.536544,0.727439,0.746359,0.283811,0.37403,0.33563,0.346233,0.303425,0.451458,0.490477,0.41611,0.330126,0.376445,0.444221,0.456044,0.379541,0.503926,0.500065,0.476182,0.553692,0.491119,0.514107,0.636266,0.514107,0.636266,0.514107,0.447656,0.316687
7,Objectivity is ground on the subordination of ...,Transparency defined in this principle has two...,0.480815,0.704957,0.818543,0.856225,0.863575,0.890038,0.55228,0.803626,0.548412,0.437235,0.982947,0.53185,0.740086,0.603679,0.461033,0.741869,0.503705,0.316643,0.337661,0.814842,0.376841,0.337129,0.381256,0.194023,0.235297,0.235297,0.785262,0.748046,0.78152,0.834832,0.304861,0.37828,0.534861,0.770856,0.158657,0.18043,0.073876,0.289442,...,0.230191,0.212817,0.55228,0.803626,0.548412,0.53185,0.740086,0.603679,0.741869,0.503705,0.425981,0.42657,0.453285,0.524339,0.586293,0.255605,0.424088,0.248232,0.360281,0.195922,0.404623,0.303709,0.381256,0.265434,0.302952,0.437235,0.461033,0.316643,0.369529,0.276035,0.276857,0.399843,0.387697,0.456469,0.547271,0.456469,0.547271,0.456469,0.373878,0.290271
8,Objectivity is ground on the subordination of ...,This principle states that AI systems must be ...,0.454111,0.669547,0.797848,0.854988,0.875549,0.91183,0.410457,0.768204,0.460546,0.402887,0.986852,0.518325,0.767523,0.593896,0.403757,0.635652,0.424041,0.341494,0.469475,0.782074,0.3117,0.251054,0.367542,0.176541,0.190889,0.190889,0.776547,0.74544,0.802593,0.843513,0.283587,0.232192,0.504416,0.791505,0.153966,0.194245,0.101439,0.146217,...,0.260799,0.180331,0.410457,0.768204,0.460546,0.518325,0.767523,0.593896,0.635652,0.424041,0.324061,0.300466,0.388383,0.596888,0.525895,0.28464,0.341243,0.202243,0.304263,0.186065,0.328434,0.261931,0.367542,0.236081,0.335618,0.402887,0.403757,0.341494,0.265175,0.2438,0.22548,0.426671,0.280197,0.404855,0.450496,0.404855,0.450496,0.404855,0.385643,0.259552
9,Objectivity is ground on the subordination of ...,"\nAccording to this principle, organisations a...",0.423634,0.698198,0.789235,0.870432,0.876406,0.894781,0.459548,0.748359,0.517685,0.410678,0.984058,0.51982,0.704329,0.543469,0.392627,0.667846,0.499282,0.34462,0.363913,0.779582,0.288201,0.321082,0.358956,0.139811,0.129849,0.129849,0.779614,0.73038,0.779841,0.830716,0.238679,0.311024,0.617224,0.775055,0.272177,0.272224,0.215664,0.280891,...,0.283063,0.26105,0.459548,0.748359,0.517685,0.51982,0.704329,0.543469,0.667846,0.499282,0.296803,0.426043,0.370532,0.57545,0.569023,0.227619,0.351021,0.195929,0.39442,0.202646,0.341051,0.298241,0.358956,0.251108,0.340342,0.410678,0.392627,0.34462,0.168513,0.302483,0.188532,0.438751,0.21658,0.443657,0.52474,0.443657,0.52474,0.443657,0.370488,0.288626


In [23]:
short_defs = pd.read_excel('/content/ds_short_defs.xlsx')

In [24]:
short_defs.drop(labels=['Unnamed: 0'], axis=1, inplace=True)

In [25]:
short_defs

Unnamed: 0,Finance,ML,sentence-transformers/LaBSE,sentence-transformers/allenai-specter,sentence-transformers/average_word_embeddings_glove.6B.300d,sentence-transformers/average_word_embeddings_glove.840B.300d,sentence-transformers/average_word_embeddings_komninos,sentence-transformers/average_word_embeddings_levy_dependency,sentence-transformers/bert-base-nli-cls-token,sentence-transformers/bert-base-nli-max-tokens,sentence-transformers/bert-base-nli-mean-tokens,sentence-transformers/bert-base-nli-stsb-mean-tokens,sentence-transformers/bert-base-wikipedia-sections-mean-tokens,sentence-transformers/bert-large-nli-cls-token,sentence-transformers/bert-large-nli-max-tokens,sentence-transformers/bert-large-nli-mean-tokens,sentence-transformers/bert-large-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-max-tokens,sentence-transformers/distilbert-base-nli-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-quora-ranking,sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,sentence-transformers/distilroberta-base-msmarco-v1,sentence-transformers/distilroberta-base-msmarco-v2,sentence-transformers/distilroberta-base-paraphrase-v1,sentence-transformers/distiluse-base-multilingual-cased-v1,sentence-transformers/distiluse-base-multilingual-cased-v2,sentence-transformers/distiluse-base-multilingual-cased,sentence-transformers/facebook-dpr-ctx_encoder-multiset-base,sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base,sentence-transformers/facebook-dpr-question_encoder-multiset-base,sentence-transformers/facebook-dpr-question_encoder-single-nq-base,sentence-transformers/msmarco-MiniLM-L-12-v3,sentence-transformers/msmarco-MiniLM-L-6-v3,sentence-transformers/msmarco-distilbert-base-dot-prod-v3,sentence-transformers/msmarco-distilbert-base-tas-b,sentence-transformers/msmarco-distilbert-base-v2,sentence-transformers/msmarco-distilbert-base-v3,sentence-transformers/msmarco-distilbert-base-v4,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned,...,sentence-transformers/msmarco-roberta-base-v2,sentence-transformers/msmarco-roberta-base-v3,sentence-transformers/nli-bert-base-cls-pooling,sentence-transformers/nli-bert-base-max-pooling,sentence-transformers/nli-bert-base,sentence-transformers/nli-bert-large-cls-pooling,sentence-transformers/nli-bert-large-max-pooling,sentence-transformers/nli-bert-large,sentence-transformers/nli-distilbert-base-max-pooling,sentence-transformers/nli-distilbert-base,sentence-transformers/nli-distilroberta-base-v2,sentence-transformers/nli-mpnet-base-v2,sentence-transformers/nli-roberta-base-v2,sentence-transformers/nli-roberta-base,sentence-transformers/nli-roberta-large,sentence-transformers/nq-distilbert-base-v1,sentence-transformers/paraphrase-MiniLM-L12-v2,sentence-transformers/paraphrase-MiniLM-L3-v2,sentence-transformers/paraphrase-MiniLM-L6-v2,sentence-transformers/paraphrase-TinyBERT-L6-v2,sentence-transformers/paraphrase-albert-base-v2,sentence-transformers/paraphrase-albert-small-v2,sentence-transformers/paraphrase-distilroberta-base-v1,sentence-transformers/paraphrase-distilroberta-base-v2,sentence-transformers/paraphrase-mpnet-base-v2,sentence-transformers/stsb-bert-base,sentence-transformers/stsb-bert-large,sentence-transformers/stsb-distilbert-base,sentence-transformers/stsb-distilroberta-base-v2,sentence-transformers/stsb-mpnet-base-v2,sentence-transformers/stsb-roberta-base-v2,sentence-transformers/stsb-roberta-base,sentence-transformers/stsb-roberta-large,sentence-transformers/stsb-xlm-r-multilingual,sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1,all-mpnet-base-v2
0,"Moral self-governance, autonomy, trustworthine...",Trustworthy AI should contribute to overall gr...,0.313011,0.788788,0.525359,0.571049,0.720031,0.788346,0.641809,0.726695,0.5971,0.537367,0.987444,0.554548,0.669974,0.564894,0.288724,0.653201,0.605308,0.37411,0.395251,0.73863,0.293121,0.235861,0.340354,0.149044,0.171142,0.171142,0.767432,0.701668,0.725447,0.786437,0.362587,0.219561,0.538891,0.793902,0.387327,0.326462,0.363784,0.384942,...,0.208107,0.133909,0.641809,0.726695,0.5971,0.554548,0.669974,0.564894,0.653201,0.605308,0.314517,0.398265,0.317112,0.556002,0.593743,0.28995,0.391302,0.33266,0.320597,0.303291,0.369229,0.282471,0.340354,0.300089,0.397865,0.537367,0.288724,0.37411,0.241276,0.298516,0.21535,0.458783,0.250536,0.539143,0.610239,0.539143,0.610239,0.539143,0.416844,0.261389
1,"Moral self-governance, autonomy, trustworthine...",AI systems should be designed in a way that re...,0.467257,0.782522,0.678282,0.629304,0.748433,0.814385,0.789429,0.825633,0.748991,0.666821,0.98548,0.691178,0.785428,0.712844,0.37282,0.716211,0.708477,0.45445,0.4322,0.76368,0.184442,0.100919,0.411624,0.170563,0.195778,0.195778,0.770675,0.712685,0.770666,0.824064,0.274236,0.265393,0.490012,0.763096,0.343952,0.324036,0.248636,0.332289,...,0.159644,0.011504,0.789429,0.825633,0.748991,0.691178,0.785428,0.712844,0.716211,0.708477,0.373913,0.463654,0.461325,0.727007,0.71184,0.355073,0.33359,0.366719,0.338117,0.344973,0.329935,0.302253,0.411624,0.29865,0.337958,0.666821,0.37282,0.45445,0.309416,0.419768,0.349737,0.456573,0.373851,0.692615,0.770124,0.692615,0.770124,0.692615,0.443529,0.343338
2,"Moral self-governance, autonomy, trustworthine...",Transparent and responsible disclosure around ...,0.247537,0.769409,0.690555,0.606089,0.734101,0.805628,0.640556,0.779143,0.613223,0.477818,0.990017,0.629253,0.715772,0.637625,0.297339,0.658693,0.600226,0.395391,0.461196,0.750607,0.272992,0.36874,0.32655,0.181638,0.187057,0.187057,0.773864,0.715824,0.753906,0.79312,0.288414,0.291715,0.518242,0.793588,0.363083,0.310366,0.306164,0.419778,...,0.243732,0.166891,0.640556,0.779143,0.613223,0.629253,0.715772,0.637625,0.658693,0.600226,0.407513,0.369859,0.457853,0.58502,0.596017,0.315365,0.283894,0.313246,0.346497,0.327832,0.418126,0.299105,0.32655,0.311737,0.362101,0.477818,0.297339,0.395391,0.367922,0.26738,0.400549,0.264902,0.257002,0.528633,0.638603,0.528633,0.638603,0.528633,0.361073,0.150961
3,"Moral self-governance, autonomy, trustworthine...","AI systems must function in a robust, secure a...",0.460776,0.731835,0.541989,0.516778,0.677451,0.766191,0.777567,0.832374,0.69584,0.605844,0.979246,0.661998,0.724271,0.670636,0.371651,0.757499,0.696518,0.429193,0.479754,0.728421,0.201049,0.070818,0.325909,0.194365,0.1857,0.1857,0.763334,0.702749,0.738464,0.81652,0.225992,0.232814,0.461521,0.760147,0.365457,0.283609,0.298121,0.33019,...,0.066539,0.065247,0.777567,0.832374,0.69584,0.661998,0.724271,0.670636,0.757499,0.696518,0.321908,0.329345,0.346306,0.63205,0.664886,0.367343,0.272017,0.249526,0.272324,0.252359,0.231022,0.20995,0.325909,0.234073,0.247353,0.605844,0.371651,0.429193,0.242168,0.307733,0.255203,0.339592,0.360726,0.607098,0.699053,0.607098,0.699053,0.607098,0.315796,0.192022
4,"Moral self-governance, autonomy, trustworthine...","Organisations and individuals developing, depl...",0.394822,0.692191,0.603914,0.575361,0.713081,0.761288,0.719702,0.773742,0.652875,0.578754,0.986165,0.598234,0.708204,0.678364,0.503029,0.715415,0.656294,0.454105,0.483805,0.79773,0.175433,0.183246,0.339291,0.179304,0.22792,0.22792,0.758678,0.710985,0.739951,0.803706,0.234459,0.255711,0.545137,0.780395,0.350394,0.355264,0.305327,0.329521,...,0.187933,0.186346,0.719702,0.773742,0.652875,0.598234,0.708204,0.678364,0.715415,0.656294,0.429125,0.485314,0.480559,0.595801,0.679975,0.424002,0.391191,0.293866,0.390476,0.346428,0.255765,0.230486,0.339291,0.38008,0.412788,0.578754,0.503029,0.454105,0.394403,0.465263,0.435198,0.375007,0.452732,0.595025,0.655949,0.595025,0.655949,0.595025,0.371844,0.277417
5,Protecting and advancing the interests of clie...,Trustworthy AI should contribute to overall gr...,0.275948,0.686387,0.631883,0.620607,0.740241,0.786778,0.677746,0.767499,0.626476,0.566329,0.988757,0.532481,0.678867,0.533509,0.28901,0.664302,0.582922,0.41062,0.537651,0.777999,0.451523,0.434957,0.335402,0.128488,0.127384,0.127384,0.782101,0.706078,0.770041,0.804246,0.311413,0.288046,0.508696,0.820126,0.448505,0.297091,0.420537,0.445119,...,0.31396,0.209747,0.677746,0.767499,0.626476,0.532481,0.678867,0.533509,0.664302,0.582922,0.373488,0.411286,0.325334,0.680484,0.618876,0.367197,0.351122,0.38411,0.367785,0.317743,0.411957,0.391614,0.335402,0.343619,0.377965,0.566329,0.28901,0.41062,0.322106,0.339239,0.234009,0.492359,0.346075,0.573393,0.627063,0.573393,0.627063,0.573393,0.361687,0.209061
6,Protecting and advancing the interests of clie...,AI systems should be designed in a way that re...,0.376138,0.660218,0.698955,0.655111,0.770104,0.796319,0.802879,0.850157,0.764352,0.63114,0.985096,0.750927,0.803678,0.764054,0.480059,0.769547,0.689269,0.4661,0.518166,0.775894,0.282361,0.233378,0.327267,0.106019,0.107966,0.107966,0.779994,0.726769,0.769597,0.848392,0.252682,0.272394,0.464536,0.804299,0.242314,0.193341,0.261577,0.24357,...,0.278285,0.232374,0.802879,0.850157,0.764352,0.750927,0.803678,0.764054,0.769547,0.689269,0.445211,0.464914,0.466952,0.824126,0.782507,0.304163,0.175102,0.265741,0.193868,0.309297,0.301339,0.290522,0.327267,0.340152,0.351653,0.63114,0.480059,0.4661,0.441156,0.446789,0.38219,0.532464,0.38774,0.628684,0.747726,0.628684,0.747726,0.628684,0.366968,0.227346
7,Protecting and advancing the interests of clie...,Transparent and responsible disclosure around ...,0.286285,0.740661,0.710319,0.655116,0.8064,0.812329,0.761209,0.844591,0.725477,0.532332,0.993205,0.715169,0.745147,0.676467,0.383676,0.725004,0.685309,0.47904,0.623593,0.782976,0.379581,0.36487,0.321477,0.229467,0.227274,0.227274,0.825294,0.762191,0.808484,0.866834,0.220987,0.242412,0.497165,0.807827,0.291533,0.206202,0.318257,0.413034,...,0.297352,0.176634,0.761209,0.844591,0.725477,0.715169,0.745147,0.676467,0.725004,0.685309,0.40866,0.453398,0.427789,0.741259,0.687905,0.386595,0.349973,0.393295,0.382675,0.415854,0.48157,0.405982,0.321477,0.407085,0.471175,0.532332,0.383676,0.47904,0.358563,0.324011,0.33311,0.491293,0.23911,0.56112,0.710181,0.56112,0.710181,0.56112,0.349479,0.251653
8,Protecting and advancing the interests of clie...,"AI systems must function in a robust, secure a...",0.356274,0.655359,0.647909,0.598936,0.729477,0.75463,0.738885,0.825178,0.725352,0.596383,0.980654,0.717996,0.713054,0.681447,0.399574,0.761072,0.674899,0.468508,0.550945,0.751314,0.292428,0.268808,0.236923,0.117962,0.111546,0.111546,0.783084,0.704113,0.74474,0.818783,0.279458,0.275703,0.425235,0.803628,0.292431,0.223039,0.277577,0.274974,...,0.262814,0.189775,0.738885,0.825178,0.725352,0.717996,0.713054,0.681447,0.761072,0.674899,0.355187,0.38113,0.391686,0.753194,0.700167,0.264576,0.183099,0.267418,0.233815,0.309467,0.337768,0.272049,0.236923,0.287138,0.32135,0.596383,0.399574,0.468508,0.349344,0.356432,0.317775,0.389516,0.311065,0.603764,0.717859,0.603764,0.717859,0.603764,0.302115,0.172552
9,Protecting and advancing the interests of clie...,"Organisations and individuals developing, depl...",0.246275,0.67418,0.609709,0.640476,0.762009,0.799814,0.751374,0.816451,0.717644,0.545596,0.986605,0.617836,0.719139,0.649932,0.337371,0.733895,0.67738,0.453538,0.547922,0.799954,0.256322,0.201693,0.226934,-0.015101,-0.007553,-0.007553,0.807961,0.754478,0.750588,0.832191,0.233653,0.333314,0.493754,0.786363,0.263622,0.230635,0.280471,0.247189,...,0.190067,0.17869,0.751374,0.816451,0.717644,0.617836,0.719139,0.649932,0.733895,0.67738,0.273622,0.427259,0.297054,0.723314,0.732406,0.272464,0.364737,0.314122,0.36537,0.343937,0.384002,0.336881,0.226934,0.26307,0.369434,0.545596,0.337371,0.453538,0.133034,0.378564,0.163375,0.34991,0.296177,0.532946,0.683132,0.532946,0.683132,0.532946,0.263617,0.226106


## Helper functions

In [26]:
# Calculate percentiles
def get_percentile(dataset, column, percentile):
  return np.percentile(dataset[column].values, percentile)

In [27]:
# Annotate relationship based on percentiles
def annotate_strength_relationship(dataset):
  new_dataset = pd.DataFrame()

  for column in dataset.columns:
    l = []
    percentile33 = get_percentile(dataset, column, 33.333)
    percentile66 = get_percentile(dataset, column, 66.666)

    for item in dataset[column].values:
      if item <= percentile33:
        l.append(0)
      elif item <= percentile66:
        l.append(1)
      else:
        l.append(2)

    new_dataset[column] = np.asarray(l)

  return new_dataset

## Mappings between the ethical principles based on long and short definitions

In [28]:
links_long = annotate_strength_relationship(long_defs.iloc[:,2:])

In [29]:
links_long

Unnamed: 0,sentence-transformers/LaBSE,sentence-transformers/allenai-specter,sentence-transformers/average_word_embeddings_glove.6B.300d,sentence-transformers/average_word_embeddings_glove.840B.300d,sentence-transformers/average_word_embeddings_komninos,sentence-transformers/average_word_embeddings_levy_dependency,sentence-transformers/bert-base-nli-cls-token,sentence-transformers/bert-base-nli-max-tokens,sentence-transformers/bert-base-nli-mean-tokens,sentence-transformers/bert-base-nli-stsb-mean-tokens,sentence-transformers/bert-base-wikipedia-sections-mean-tokens,sentence-transformers/bert-large-nli-cls-token,sentence-transformers/bert-large-nli-max-tokens,sentence-transformers/bert-large-nli-mean-tokens,sentence-transformers/bert-large-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-max-tokens,sentence-transformers/distilbert-base-nli-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-quora-ranking,sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,sentence-transformers/distilroberta-base-msmarco-v1,sentence-transformers/distilroberta-base-msmarco-v2,sentence-transformers/distilroberta-base-paraphrase-v1,sentence-transformers/distiluse-base-multilingual-cased-v1,sentence-transformers/distiluse-base-multilingual-cased-v2,sentence-transformers/distiluse-base-multilingual-cased,sentence-transformers/facebook-dpr-ctx_encoder-multiset-base,sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base,sentence-transformers/facebook-dpr-question_encoder-multiset-base,sentence-transformers/facebook-dpr-question_encoder-single-nq-base,sentence-transformers/msmarco-MiniLM-L-12-v3,sentence-transformers/msmarco-MiniLM-L-6-v3,sentence-transformers/msmarco-distilbert-base-dot-prod-v3,sentence-transformers/msmarco-distilbert-base-tas-b,sentence-transformers/msmarco-distilbert-base-v2,sentence-transformers/msmarco-distilbert-base-v3,sentence-transformers/msmarco-distilbert-base-v4,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-trained-scratch,sentence-transformers/msmarco-distilroberta-base-v2,...,sentence-transformers/msmarco-roberta-base-v2,sentence-transformers/msmarco-roberta-base-v3,sentence-transformers/nli-bert-base-cls-pooling,sentence-transformers/nli-bert-base-max-pooling,sentence-transformers/nli-bert-base,sentence-transformers/nli-bert-large-cls-pooling,sentence-transformers/nli-bert-large-max-pooling,sentence-transformers/nli-bert-large,sentence-transformers/nli-distilbert-base-max-pooling,sentence-transformers/nli-distilbert-base,sentence-transformers/nli-distilroberta-base-v2,sentence-transformers/nli-mpnet-base-v2,sentence-transformers/nli-roberta-base-v2,sentence-transformers/nli-roberta-base,sentence-transformers/nli-roberta-large,sentence-transformers/nq-distilbert-base-v1,sentence-transformers/paraphrase-MiniLM-L12-v2,sentence-transformers/paraphrase-MiniLM-L3-v2,sentence-transformers/paraphrase-MiniLM-L6-v2,sentence-transformers/paraphrase-TinyBERT-L6-v2,sentence-transformers/paraphrase-albert-base-v2,sentence-transformers/paraphrase-albert-small-v2,sentence-transformers/paraphrase-distilroberta-base-v1,sentence-transformers/paraphrase-distilroberta-base-v2,sentence-transformers/paraphrase-mpnet-base-v2,sentence-transformers/stsb-bert-base,sentence-transformers/stsb-bert-large,sentence-transformers/stsb-distilbert-base,sentence-transformers/stsb-distilroberta-base-v2,sentence-transformers/stsb-mpnet-base-v2,sentence-transformers/stsb-roberta-base-v2,sentence-transformers/stsb-roberta-base,sentence-transformers/stsb-roberta-large,sentence-transformers/stsb-xlm-r-multilingual,sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1,all-mpnet-base-v2
0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,2,2,0,2,1,1,1,1,0,0,1,1,1,1,1,0,2,2,0,2,...,1,2,1,0,0,0,0,0,1,0,1,1,0,1,1,2,0,1,0,1,2,0,0,1,2,0,0,0,0,1,1,1,1,0,0,0,0,0,0,2
1,2,2,2,2,2,2,2,2,2,2,2,2,1,2,1,2,2,2,1,1,2,2,2,2,2,2,2,2,1,0,2,1,1,2,2,2,2,2,1,2,...,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,1,2,1,2,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2
2,0,2,0,0,0,0,0,0,0,1,0,1,0,1,0,1,1,0,0,1,2,2,1,2,2,2,0,0,0,0,2,2,0,2,0,1,0,1,0,2,...,2,1,0,0,0,1,0,1,1,1,2,1,1,0,1,1,1,1,1,1,2,1,1,2,0,1,0,0,2,1,2,0,0,0,0,0,0,0,1,1
3,1,2,0,0,0,0,2,2,2,2,1,2,1,2,2,2,2,2,2,0,2,1,2,2,2,2,2,2,2,0,2,1,0,2,2,2,1,2,0,1,...,0,0,2,2,2,2,1,2,2,2,2,1,2,2,2,2,1,2,1,2,2,1,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
4,1,2,1,1,1,1,2,2,2,2,2,2,1,2,2,1,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,...,2,2,2,2,2,2,1,2,1,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
5,0,0,0,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,1,1,...,1,2,0,0,0,0,0,0,0,0,1,1,0,0,0,0,2,1,1,0,2,1,1,1,1,0,0,0,1,2,0,0,1,0,0,0,0,0,1,1
6,2,2,2,2,2,2,0,1,0,0,1,0,1,1,1,1,1,1,0,2,2,2,2,1,0,0,1,0,1,2,1,1,0,2,1,1,1,1,1,2,...,2,2,0,1,0,0,1,1,1,1,2,2,2,1,2,0,2,1,1,1,2,2,2,2,1,0,1,1,2,2,2,2,2,1,1,1,1,1,2,1
7,1,2,2,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,2,2,1,1,1,1,0,0,0,1,2,2,0,0,0,0,0,0,2,2,...,1,1,0,0,0,0,0,0,0,0,1,2,2,0,0,0,2,0,2,0,2,1,1,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,1,1
8,1,0,1,1,2,2,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0,1,0,1,1,0,0,1,1,1,1,0,1,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0
9,0,2,1,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,1,2,2,0,1,1,1,0,0,1,...,2,2,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,2,0,1,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,1


In [30]:
links_short = annotate_strength_relationship(short_defs.iloc[:,2:])

In [31]:
links_short

Unnamed: 0,sentence-transformers/LaBSE,sentence-transformers/allenai-specter,sentence-transformers/average_word_embeddings_glove.6B.300d,sentence-transformers/average_word_embeddings_glove.840B.300d,sentence-transformers/average_word_embeddings_komninos,sentence-transformers/average_word_embeddings_levy_dependency,sentence-transformers/bert-base-nli-cls-token,sentence-transformers/bert-base-nli-max-tokens,sentence-transformers/bert-base-nli-mean-tokens,sentence-transformers/bert-base-nli-stsb-mean-tokens,sentence-transformers/bert-base-wikipedia-sections-mean-tokens,sentence-transformers/bert-large-nli-cls-token,sentence-transformers/bert-large-nli-max-tokens,sentence-transformers/bert-large-nli-mean-tokens,sentence-transformers/bert-large-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-max-tokens,sentence-transformers/distilbert-base-nli-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-mean-tokens,sentence-transformers/distilbert-base-nli-stsb-quora-ranking,sentence-transformers/distilbert-multilingual-nli-stsb-quora-ranking,sentence-transformers/distilroberta-base-msmarco-v1,sentence-transformers/distilroberta-base-msmarco-v2,sentence-transformers/distilroberta-base-paraphrase-v1,sentence-transformers/distiluse-base-multilingual-cased-v1,sentence-transformers/distiluse-base-multilingual-cased-v2,sentence-transformers/distiluse-base-multilingual-cased,sentence-transformers/facebook-dpr-ctx_encoder-multiset-base,sentence-transformers/facebook-dpr-ctx_encoder-single-nq-base,sentence-transformers/facebook-dpr-question_encoder-multiset-base,sentence-transformers/facebook-dpr-question_encoder-single-nq-base,sentence-transformers/msmarco-MiniLM-L-12-v3,sentence-transformers/msmarco-MiniLM-L-6-v3,sentence-transformers/msmarco-distilbert-base-dot-prod-v3,sentence-transformers/msmarco-distilbert-base-tas-b,sentence-transformers/msmarco-distilbert-base-v2,sentence-transformers/msmarco-distilbert-base-v3,sentence-transformers/msmarco-distilbert-base-v4,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-lng-aligned,sentence-transformers/msmarco-distilbert-multilingual-en-de-v2-tmp-trained-scratch,sentence-transformers/msmarco-distilroberta-base-v2,...,sentence-transformers/msmarco-roberta-base-v2,sentence-transformers/msmarco-roberta-base-v3,sentence-transformers/nli-bert-base-cls-pooling,sentence-transformers/nli-bert-base-max-pooling,sentence-transformers/nli-bert-base,sentence-transformers/nli-bert-large-cls-pooling,sentence-transformers/nli-bert-large-max-pooling,sentence-transformers/nli-bert-large,sentence-transformers/nli-distilbert-base-max-pooling,sentence-transformers/nli-distilbert-base,sentence-transformers/nli-distilroberta-base-v2,sentence-transformers/nli-mpnet-base-v2,sentence-transformers/nli-roberta-base-v2,sentence-transformers/nli-roberta-base,sentence-transformers/nli-roberta-large,sentence-transformers/nq-distilbert-base-v1,sentence-transformers/paraphrase-MiniLM-L12-v2,sentence-transformers/paraphrase-MiniLM-L3-v2,sentence-transformers/paraphrase-MiniLM-L6-v2,sentence-transformers/paraphrase-TinyBERT-L6-v2,sentence-transformers/paraphrase-albert-base-v2,sentence-transformers/paraphrase-albert-small-v2,sentence-transformers/paraphrase-distilroberta-base-v1,sentence-transformers/paraphrase-distilroberta-base-v2,sentence-transformers/paraphrase-mpnet-base-v2,sentence-transformers/stsb-bert-base,sentence-transformers/stsb-bert-large,sentence-transformers/stsb-distilbert-base,sentence-transformers/stsb-distilroberta-base-v2,sentence-transformers/stsb-mpnet-base-v2,sentence-transformers/stsb-roberta-base-v2,sentence-transformers/stsb-roberta-base,sentence-transformers/stsb-roberta-large,sentence-transformers/stsb-xlm-r-multilingual,sentence-transformers/xlm-r-100langs-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-100langs-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-mean-tokens,sentence-transformers/xlm-r-bert-base-nli-stsb-mean-tokens,sentence-transformers/xlm-r-distilroberta-base-paraphrase-v1,all-mpnet-base-v2
0,1,2,0,0,0,1,0,0,0,1,2,0,0,0,0,0,1,0,0,0,2,2,2,2,2,2,1,1,0,1,2,1,2,2,2,2,2,2,2,2,...,1,1,0,0,0,0,0,0,0,1,1,2,1,0,0,1,2,2,2,1,2,1,2,1,2,1,0,0,0,2,0,2,0,1,0,1,0,1,2,2
1,2,2,1,0,1,2,2,2,2,2,1,1,2,2,1,1,2,2,1,1,1,0,2,2,2,2,1,2,2,2,2,2,1,0,2,2,1,2,2,0,...,1,0,2,2,2,1,2,2,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,1,1,2,1,2,1,2,2,2,2,2,2,2,2,2,2,2
2,0,2,2,0,1,2,0,1,0,0,2,1,0,1,0,0,0,1,1,0,2,2,2,2,2,2,1,2,2,1,2,2,2,2,2,2,2,2,0,2,...,2,2,0,1,0,1,0,1,0,0,2,2,2,0,0,2,1,2,2,1,2,2,2,2,2,0,0,1,2,1,2,0,0,1,1,1,1,1,2,0
3,2,2,0,0,0,0,2,2,2,2,0,1,1,2,1,2,2,1,1,0,1,0,2,2,2,2,1,1,1,2,1,1,1,0,2,2,2,2,0,0,...,0,0,2,2,2,1,1,2,2,2,1,1,1,0,1,2,1,1,1,0,0,0,2,0,0,2,1,1,0,2,1,0,2,2,2,2,2,2,1,1
4,2,2,0,0,0,0,1,1,1,2,2,0,0,2,2,1,1,2,1,2,1,1,2,2,2,2,1,1,1,2,1,2,2,1,2,2,2,2,0,1,...,1,2,1,1,1,0,0,2,1,1,2,2,2,0,1,2,2,1,2,2,0,0,2,2,2,2,2,2,2,2,2,1,2,2,1,2,1,2,2,2
5,0,1,0,0,1,1,1,0,1,2,2,0,0,0,0,0,0,1,2,2,2,2,2,1,1,1,2,1,2,2,2,2,2,2,2,2,2,2,2,2,...,2,2,1,0,1,0,0,0,0,0,2,2,1,1,0,2,2,2,2,1,2,2,2,2,2,2,0,1,1,2,0,2,1,2,0,2,0,2,2,2
6,2,0,2,0,2,1,2,2,2,2,1,2,2,2,2,2,2,2,2,1,2,1,2,1,1,1,1,2,2,2,2,2,1,2,1,1,1,0,2,1,...,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,0,1,0,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
7,1,2,2,0,2,2,2,2,2,1,2,2,1,2,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,1,2,2,2,2,...,2,2,2,2,2,2,1,2,1,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,1,1,2,2,2,2,2,0,1,2,1,2,1,2,2
8,2,0,1,0,0,0,2,2,2,2,0,2,0,2,2,2,2,2,2,0,2,2,0,1,1,1,2,1,2,2,2,2,0,2,2,2,2,1,1,2,...,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,1,0,1,1,1,2,1,0,1,1,2,2,2,2,2,2,1,1,2,2,2,2,2,1,1
9,0,1,0,0,2,2,2,2,2,1,2,0,0,1,1,2,2,2,2,2,1,1,0,0,0,0,2,2,2,2,1,2,1,1,1,2,2,1,1,1,...,1,2,2,2,2,0,0,1,2,2,0,2,0,2,2,1,2,2,2,2,2,2,0,0,2,1,1,2,0,2,0,0,0,1,1,1,1,1,1,2


## Determine overlaps across all transformers

In [32]:
# Calculate the number of overlaps between long and short mappings for each transformer
def calculate_overlaps(long_links, short_links):
  overlaps = pd.DataFrame(columns = ['transformer', 'number of overlaps'])

  for column in long_links.columns:
    num_overlaps = (short_links[column].values == long_links[column].values).sum() 
    new_data = {'transformer': column, 'number of overlaps': num_overlaps}
    overlaps = overlaps.append(new_data, ignore_index=True)
  
  return overlaps

In [33]:
overlaps = calculate_overlaps(links_long, links_short)

In [34]:
overlaps.sort_values(by=['number of overlaps'], ascending=False, inplace=True, ignore_index=True)

In [35]:
overlaps

Unnamed: 0,transformer,number of overlaps
0,sentence-transformers/nli-distilroberta-base-v2,23
1,sentence-transformers/LaBSE,21
2,sentence-transformers/stsb-roberta-base-v2,21
3,sentence-transformers/nli-roberta-large,21
4,sentence-transformers/nli-bert-large,20
...,...,...
76,sentence-transformers/nq-distilbert-base-v1,8
77,sentence-transformers/facebook-dpr-question_en...,8
78,sentence-transformers/msmarco-distilbert-base-...,7
79,sentence-transformers/facebook-dpr-ctx_encoder...,6


In [36]:
most_overlaps = overlaps.loc[0, 'transformer']

In [37]:
most_overlaps

'sentence-transformers/nli-distilroberta-base-v2'

## Results

In [38]:
def map_to_label(num_values):
  return np.asarray(['Weak' if item == 0 else 'Moderate' if item == 1 else 'Strong'  for item in num_values])

In [39]:
def create_mapping_table(label, values, finance_labels, ml_labels):
  new_dataset = pd.DataFrame(columns=[label] + finance_labels)

  new_dataset[label] = ml_labels
  length = len(finance_labels)

  for num in range(length):
    column = finance_labels[num]
    to_index = (num + 1) * len(ml_labels)
    from_index = to_index - len(ml_labels)
    new_dataset[column] = map_to_label(values[from_index:to_index])

  return new_dataset

In [40]:
# Define labels for the resulting tables
label = 'Mapping between finance and ML ethics'
finance_labels = ['Integrity', 'Objectivity', 'Competenece', 'Fairness', 'Confidentiality', 'Professionalism', 'Diligence']
ml_labels = ['Inclusive growth, sustainable development and weel-being', 'Human-centred values and fairness', 'Transparency and expalinability',
             'Robustness, security and safety', 'Accountability']

In [41]:
# Print results for mappings between ethical principles based on long definitions
values = links_long[most_overlaps].values
create_mapping_table(label, values, finance_labels, ml_labels)

Unnamed: 0,Mapping between finance and ML ethics,Integrity,Objectivity,Competenece,Fairness,Confidentiality,Professionalism,Diligence
0,"Inclusive growth, sustainable development and ...",Moderate,Moderate,Weak,Strong,Weak,Weak,Weak
1,Human-centred values and fairness,Strong,Strong,Moderate,Strong,Moderate,Strong,Moderate
2,Transparency and expalinability,Strong,Moderate,Strong,Strong,Moderate,Moderate,Moderate
3,"Robustness, security and safety",Strong,Weak,Weak,Moderate,Weak,Weak,Weak
4,Accountability,Strong,Weak,Weak,Strong,Weak,Moderate,Strong


In [42]:
# Print results for mappings between ethical principles based on short definitions
values = links_short[most_overlaps].values
create_mapping_table(label, values, finance_labels, ml_labels)

Unnamed: 0,Mapping between finance and ML ethics,Integrity,Objectivity,Competenece,Fairness,Confidentiality,Professionalism,Diligence
0,"Inclusive growth, sustainable development and ...",Moderate,Strong,Moderate,Strong,Weak,Weak,Weak
1,Human-centred values and fairness,Strong,Strong,Weak,Strong,Moderate,Moderate,Moderate
2,Transparency and expalinability,Strong,Strong,Weak,Strong,Strong,Moderate,Moderate
3,"Robustness, security and safety",Moderate,Strong,Weak,Moderate,Moderate,Weak,Weak
4,Accountability,Strong,Weak,Weak,Strong,Weak,Weak,Moderate


In [43]:
# Print results for mappings between ethical principles based on human-based annotations
values = [1,1,2,1,2, 2,2,0,0,0, 0,0,1,1,1, 2,2,1,1,2, 0,1,0,2,0, 0,0,1,2,2, 1,2,0,2,1]
create_mapping_table(label, values, finance_labels, ml_labels)

Unnamed: 0,Mapping between finance and ML ethics,Integrity,Objectivity,Competenece,Fairness,Confidentiality,Professionalism,Diligence
0,"Inclusive growth, sustainable development and ...",Moderate,Strong,Weak,Strong,Weak,Weak,Moderate
1,Human-centred values and fairness,Moderate,Strong,Weak,Strong,Moderate,Weak,Strong
2,Transparency and expalinability,Strong,Weak,Moderate,Moderate,Weak,Moderate,Weak
3,"Robustness, security and safety",Moderate,Weak,Moderate,Moderate,Strong,Strong,Strong
4,Accountability,Strong,Weak,Moderate,Strong,Weak,Strong,Moderate
