# Imports & Helper Functions

In [2]:
import pandas as pd
import numpy as np
import itertools
from plotly.subplots import make_subplots
import plotly.graph_objects as go

from dash import Dash, html, dcc, callback, Output, Input
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go

import random
import scipy.stats
import os

from transformers import AutoTokenizer

custom_template = dict(
    layout=go.Layout(title_font=dict(family="Times New Roman"))
)


def get_language_family(row, lang_fam_mapping):
    if row['iso_639_3'] in list(itertools.chain.from_iterable(lang_fam_mapping.values())):
        for key in lang_fam_mapping.keys():
            if row['iso_639_3'] in lang_fam_mapping[key]:
                return key
            
    else: 
        return 'unknown'
    

def get_answered_family(row):
    if row['input_family'] == row['output_family']:
        return 'Same'
    else:
        return 'Other'

def get_answered_language(row):
    if row['detected_language'] in row['iso_range']:
        return 'Same Language'
    elif row['detected_language'] == 'eng':
        return 'English'
    else:
        return 'Other'

def get_answered_correct(row):
    if row['detected_language'] in row['iso_range']:
        return True
    else:
        return False


# Analyse Language Fidelity

## Input & Output Language Detection

In [None]:
df_dict_v2 = {}

for model in os.listdir(f"../../data/model_language_fidelity/"):
        df_dict_v2[model[:-4]] = pd.read_csv(f"../../data/model_language_fidelity/{model}")
        print(f"Loaded {model} ({len(df_dict_v2[model[:-4]])} rows)")

In [95]:
for model in df_dict_v2:
    df_dict_v2[model]['answered_language'] = df_dict_v2[model].apply(lambda row: get_answered_language(row), axis = 1)
    df_dict_v2[model]['correct_language'] = df_dict_v2[model].apply(lambda row: get_answered_correct(row), axis = 1)
    df_dict_v2[model]['answered_family'] = df_dict_v2[model].apply(lambda row: get_answered_family(row), axis = 1)

## What is the percentage of response languages that could not be detected?

In [96]:
for model in df_dict_v2:
    print(model)
    print("Not able to detect in %")
    not_det = len(df_dict_v2[model][df_dict_v2[model]['detected_language'].isin(['Unknown'])])/len(df_dict_v2[model])
    print(str(not_det))
    print('-----')

Mistral-7B-Instruct-v0.1
Not able to detect in %
0.0
-----
Qwen1.5-7B-Chat
Not able to detect in %
0.0
-----
Mistral-7B-Instruct-v0.2
Not able to detect in %
0.00010948905109489051
-----
Llama-2-70b-chat-hf
Not able to detect in %
0.0
-----
Llama-2-13b-chat-hf
Not able to detect in %
0.0
-----
zephyr-7b-beta
Not able to detect in %
0.0
-----
Yi-6B-Chat
Not able to detect in %
0.0
-----
Mixtral-8x7B-Instruct-v0.1
Not able to detect in %
3.649635036496351e-05
-----
Llama-2-7b-chat-hf
Not able to detect in %
0.0
-----


## Overall percentage of Input & Ouput Language Correlation (Same, Different, English)

In [143]:
fig = go.Figure()

#colors = ['#8ed1c4', '#AB47BD', '#90cdff']
colors = ['#43A047', '#9667D0','#7CCEFD' ]
annotations=[]
for model in ['Mistral-7B-Instruct-v0.1', 'Qwen1.5-7B-Chat', 'Llama-2-13b-chat-hf', 'Llama-2-70b-chat-hf', 'Llama-2-7b-chat-hf', 'Mixtral-8x7B-Instruct-v0.1']:
    subset = df_dict_v2[model][~df_dict_v2[model]['answered_language'].isin(['Mni-mtei', 'doi']) ]
    group = subset['answered_language'].value_counts(normalize=True).reset_index()
    group['answered_language'] = round(group['answered_language'] * 100, 2)
    group = group.sort_values(by='index', ascending=False).reset_index()
    space = 0

    if 'Llama' in model:
        model_name = model.replace('-chat-hf', '')
    elif 'tral' in model:
        model_name = model.replace('-Instruct', '')
    elif 'Qwen' in model:
        model_name = model.replace('-Chat', '')
    else:
        model_name = model.replace('-beta', '')
        model_name = model_name.replace('z', 'Z')
    model_name = model_name.replace('b', 'B')

    for index, row in group.iterrows():
        fig.add_trace(go.Bar(
            y=[model_name],
            x=[row['answered_language']],
            marker=dict(
                    color=colors[index],
                    line=dict(color='rgb(248, 248, 249)', width=1)
                ),
            name=row['index'],
            orientation='h',

        ))
        if index == 0:
            annotations.append(dict(xref='x', yref='y',
                                x=row['answered_language'] / 2, y=model_name,
                                text=str(row['answered_language']) + '%',
                                font=dict(family='Arial', size=14,
                                        color='rgb(248, 248, 255)'),
                                showarrow=False))
            space = row['answered_language']
        else: 
            annotations.append(dict(xref='x', yref='y',
                                    x=space + (row['answered_language']/2), y=model_name,
                                    text=str(row['answered_language']) + '%',
                                    font=dict(family='Arial', size=14,
                                              color='rgb(248, 248, 255)'),
                                    showarrow=False))
            space += row['answered_language']

fig.update_yaxes(categoryorder='array', categoryarray= [ 'Qwen1.5-7B', 'Mixtral-8x7B-v0.1', 'Mistral-7B-v0.1', 'Llama-2-70B', 'Llama-2-13B', 'Llama-2-7B'])
fig.update_yaxes(ticksuffix = "  ")

fig.update_layout(
    yaxis_title="",
    xaxis_title="",
    font_family="Times New Roman",
    barmode='stack',
    width=750,
    font=dict(size=24),
    height=300,
    showlegend=False,
    template=custom_template,
    margin=dict(l=200, r=0, t=0, b=30),
    #annotations=annotations,
    )

fig.show()
fig.write_image("../../img/answer_proportion_multiq_v2.pdf", format="pdf")

## Analysis of Input & Output Families in Comparison with Input & Output Languages

In [144]:
for model in ['Mistral-7B-Instruct-v0.1', 'Qwen1.5-7B-Chat', 'Llama-2-7b-chat-hf', 'Mixtral-8x7B-Instruct-v0.1']:
    groups = df_dict_v2[model].output_family.value_counts(normalize=True)
    print(model)
    print('Unknown proportion: '+ str(groups['Unknown']))


Mistral-7B-Instruct-v0.1
Unknown proportion: 0.06974452554744526
Qwen1.5-7B-Chat
Unknown proportion: 0.05777372262773722
Llama-2-7b-chat-hf
Unknown proportion: 0.0012043795620437956
Mixtral-8x7B-Instruct-v0.1
Unknown proportion: 0.06102189781021898


In [145]:
#or model in ['Mistral-7B-Instruct-v0.1', 'Qwen1.5-7B-Chat', 'Llama-2-7b-chat-hf', 'Mixtral-8x7B-Instruct-v0.1']:
for model in df_dict_v2:
    fig = go.Figure()
    group = df_dict_v2[model].groupby('language')['answered_language'].value_counts(normalize=True).to_frame('perc').reset_index()

    group = group[group['answered_language'] == 'Other']
    print(model)
    print()

    print('Percentage answered in same language family')
    print(df_dict_v2[model][df_dict_v2[model]['answered_language'] == 'Other']['answered_family'].value_counts(normalize=True))

    print()
    print('Percentage answered language')
    print(df_dict_v2[model][df_dict_v2[model]['answered_language'] == 'Other']['detected_language'].value_counts(normalize=True)[:10])
    print()
    print('Number of distinct languages chosen')
    print(len(df_dict_v2[model][df_dict_v2[model]['answered_language'] == 'Other']['detected_language'].value_counts(normalize=True)))
    print()

    group.sort_values(by=["perc"], ascending=[False], inplace=True)


    lang_list = group.head(20).language.unique()

    subset = df_dict_v2[model][df_dict_v2[model]['language'].isin(lang_list)]

    if 'Llama' in model:
        model_name = model.replace('-chat-hf', '')
    elif 'tral' in model:
        model_name = model.replace('-Instruct', '')
    elif 'Qwen' in model:
        model_name = model.replace('-Chat', '')
    else:
        model_name = model.replace('-beta', '')
        model_name = model_name.replace('z', 'Z')
    model_name = model_name.replace('b', 'B')

    confusion_matrix = pd.crosstab(subset['iso_639_3'], subset['detected_language'])
    confusion_matrix_percentage = confusion_matrix.div(confusion_matrix.sum(axis=1), axis=0) * 100

    

    print(subset.input_family.value_counts())

    print(subset.output_family.value_counts())


    color_scale = [[0, '#16193C'], 
                [0.05, '#4c55be'], 
                [0.1, '#4D79C7'], 
                [0.2, '#7FB2F0'], 
                [0.3, '#ABD4F7'], 
                [0.4, '#E6DE00'], 
                [0.5, '#F2E826'], 
                [0.6, '#BDDB39'],
                [0.7, '#B6E656'],
                [0.8, '#68BB6C'],
                [0.9, '#44A248'],
                [1, '#32671D']]
    
    fig = px.imshow(confusion_matrix_percentage, 
                    range_color=(0,100),
                    color_continuous_scale=color_scale)

    fig.update_layout(
        yaxis_title="Input Language",
        xaxis_title="Output Language",
        font_family="Times New Roman",
        coloraxis_colorbar_x=0.9,
        coloraxis_showscale=False,
        title=model,
        title_x=0.5,
        title_y=0.97,
        template=custom_template,
        )

    fig.show()

    confusion_matrix = pd.crosstab(subset['input_family'], subset['output_family'])
    confusion_matrix_percentage = confusion_matrix.div(confusion_matrix.sum(axis=1), axis=0) * 100

    confusion_matrix_percentage = confusion_matrix_percentage.drop('other', axis = 1, errors='ignore')


    color_scale = [[0, '#16193C'], 
                [0.05, '#4c55be'], 
                [0.1, '#4D79C7'], 
                [0.2, '#7FB2F0'], 
                [0.3, '#ABD4F7'], 
                [0.4, '#E6DE00'], 
                [0.5, '#F2E826'], 
                [0.6, '#BDDB39'],
                [0.7, '#B6E656'],
                [0.8, '#68BB6C'],
                [0.9, '#44A248'],
                [1, '#32671D']]
    
    fig = px.imshow(confusion_matrix_percentage, 
                    range_color=(0,100),
                    color_continuous_scale=color_scale)

    fig.update_layout(
        #yaxis_title="Input Family",
        yaxis_title="",
        xaxis_title="Output Family",
        font_family="Times New Roman",
        coloraxis_colorbar_x=1,
        #coloraxis_showscale=False,
        width=700,
        font=dict(size=20),
        height=400,
        margin=dict(l=150, r=50, t=30, b=110),
        title=model_name,
        title_x=0.55,
        title_y=0.9,
        yaxis = dict(tickfont = dict(size=16)),
        xaxis = dict(tickfont = dict(size=16), tickangle=60),
        template=custom_template,
    )

    fig.show()


    fig.write_image("../../img/io_fam_"+model+".pdf", format="pdf")




Mistral-7B-Instruct-v0.1
Percentage answered in same language family
Same     0.545541
Other    0.454459
Name: answered_family, dtype: float64

Percentage answered language
hin    0.117895
ind    0.081340
nld    0.077129
tir    0.065837
por    0.059330
rus    0.055694
tur    0.042871
spa    0.041148
amh    0.040766
ukr    0.036555
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European     2800
Afro-Asiatic       400
Sino-Tibetan       400
Austro-Asiatic     200
Austronesian       200
Name: input_family, dtype: int64
Indo-European          2539
Afro-Asiatic            668
Unknown                 410
Austronesian            167
Sino-Tibetan            125
Korean                   33
Austro-Asiatic           19
Dravidian                11
Uralic                   10
Japanese                  9
Niger-Congo               5
Gumuz                     1
Mayan                     1
Northwest Caucasian       1
Altaic                    1
Name: output_family

Qwen1.5-7B-Chat
Percentage answered in same language family
Same     0.605967
Other    0.394033
Name: answered_family, dtype: float64

Percentage answered language
hin    0.106679
ind    0.097138
swh    0.085067
spa    0.069203
tur    0.069088
rus    0.048052
tgl    0.042419
zho    0.038510
por    0.033222
kaz    0.033107
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2400
Austronesian      800
Altaic            400
Niger-Congo       200
Aymaran           200
Name: input_family, dtype: int64
Indo-European     2451
Austronesian       768
Altaic             312
Afro-Asiatic       184
Niger-Congo        117
Unknown             98
Sino-Tibetan        41
Uralic              13
Basque               5
Austro-Asiatic       4
Mayan                3
Aymaran              2
Mixe-Zoque           1
other                1
Name: output_family, dtype: int64


Mistral-7B-Instruct-v0.2
Percentage answered in same language family
Same     0.704565
Other    0.295435
Name: answered_family, dtype: float64

Percentage answered language
hin    0.215326
ind    0.130599
amh    0.085807
por    0.047491
tgl    0.042634
tur    0.041554
rus    0.038856
bul    0.036158
dan    0.035078
ben    0.031840
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2400
Austronesian      800
Altaic            400
Afro-Asiatic      200
Tai-Kadai         200
Name: input_family, dtype: int64
Indo-European     3032
Unknown            316
Austronesian       299
Afro-Asiatic       160
Tai-Kadai           97
Altaic              91
Austro-Asiatic       3
other                1
Dravidian            1
Name: output_family, dtype: int64


Llama-2-70b-chat-hf
Percentage answered in same language family
Same     0.752513
Other    0.247487
Name: answered_family, dtype: float64

Percentage answered language
ind    0.208797
rus    0.100689
nld    0.091680
por    0.089560
dan    0.088500
spa    0.069952
ita    0.069952
bul    0.036036
ukr    0.031267
ces    0.030207
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2200
Austronesian     1000
Altaic            200
Quechuan          200
Aymaran           200
Tupian            200
Name: input_family, dtype: int64
Indo-European     3448
Austronesian       512
Unknown             12
Altaic               6
Austro-Asiatic       6
Niger-Congo          5
Aymaran              4
Afro-Asiatic         3
Tupian               3
Quechuan             1
Name: output_family, dtype: int64


Llama-2-13b-chat-hf
Percentage answered in same language family
Same     0.698892
Other    0.301108
Name: answered_family, dtype: float64

Percentage answered language
ind    0.168641
nld    0.136585
por    0.087108
spa    0.086411
mlt    0.080836
dan    0.072474
ita    0.052962
rus    0.049477
vie    0.044599
ces    0.032056
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2400
Austronesian     1000
Quechuan          200
Aymaran           200
Tupian            200
Name: input_family, dtype: int64
Indo-European     3645
Austronesian       293
Unknown             24
Afro-Asiatic        16
Austro-Asiatic      12
Niger-Congo          5
Tupian               3
Aymaran              1
Uralic               1
Name: output_family, dtype: int64


zephyr-7b-beta
Percentage answered in same language family
Same     0.589056
Other    0.410944
Name: answered_family, dtype: float64

Percentage answered language
hin    0.160081
rus    0.124779
spa    0.120299
ind    0.086354
por    0.059878
tur    0.059742
tgl    0.037882
nld    0.031093
deu    0.027020
pes    0.025526
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2000
Altaic            800
Austronesian      600
Quechuan          200
Aymaran           200
Tupian            200
Name: input_family, dtype: int64
Indo-European     3105
Austronesian       524
Altaic             209
Unknown             99
Afro-Asiatic        17
Tupian              10
Sino-Tibetan         9
Aymaran              7
Uralic               5
Niger-Congo          3
Austro-Asiatic       3
Dravidian            3
Japanese             2
Border               1
Quechuan             1
Arawakan             1
Korean               1
Name: output_family, dtype: int64


Yi-6B-Chat
Percentage answered in same language family
Other    0.549207
Same     0.450793
Name: answered_family, dtype: float64

Percentage answered language
hin    0.188398
rus    0.087813
zho    0.086393
ind    0.076104
spa    0.068831
jpn    0.064573
kor    0.040979
tur    0.032464
por    0.027852
ita    0.023417
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2000
Austronesian      800
Altaic            400
Quechuan          200
Aymaran           200
Tai-Kadai         200
Niger-Congo       200
Name: input_family, dtype: int64
Indo-European        2757
Austronesian          441
Unknown               274
Altaic                145
Sino-Tibetan           92
Japanese               86
Tai-Kadai              81
Korean                 47
Niger-Congo            34
Dravidian               9
Austro-Asiatic          8
Aymaran                 8
Uralic                  6
Afro-Asiatic            5
Quechuan                4
Nakh-Daghestanian       

Mixtral-8x7B-Instruct-v0.1
Percentage answered in same language family
Same     0.68743
Other    0.31257
Name: answered_family, dtype: float64

Percentage answered language
hin    0.173492
ind    0.145140
spa    0.096985
tur    0.051980
tgl    0.047705
deu    0.035779
nld    0.031728
rus    0.030828
mar    0.028128
por    0.027903
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    1800
Austronesian     1000
Altaic            600
Afro-Asiatic      200
Quechuan          200
Aymaran           200
Name: input_family, dtype: int64
Indo-European     2240
Austronesian       896
Altaic             374
Unknown            263
Afro-Asiatic       175
Quechuan            14
Aymaran             12
Algic                6
Niger-Congo          5
Basque               4
other                2
Gumuz                2
Uralic               2
Austro-Asiatic       2
Eskimo-Aleut         1
Korean               1
Japanese             1
Name: output_family, dtype: int

Llama-2-7b-chat-hf
Percentage answered in same language family
Same     0.620172
Other    0.379828
Name: answered_family, dtype: float64

Percentage answered language
dan    0.181333
por    0.153333
ind    0.089333
spa    0.065333
nld    0.057333
ita    0.054667
mlt    0.053333
lug    0.046667
ces    0.037333
rus    0.036000
Name: detected_language, dtype: float64
Number of distinct languages chosen
130
Indo-European    2400
Austronesian      400
Korean            200
Sino-Tibetan      200
Altaic            200
Quechuan          200
Japanese          200
Tupian            200
Name: input_family, dtype: int64
Indo-European     3892
Austronesian        65
Niger-Congo         20
Austro-Asiatic       7
Uralic               4
Afro-Asiatic         3
Unknown              3
Japanese             3
Tupian               2
Altaic               1
Name: output_family, dtype: int64


## Correlation between Input Language and Output Language on Language Level, sorted by Language Family

In [110]:
sorterlist = ['arb', 'gaz', 'hau', 'mlt', 'tir', 'amh', 'heb', 'som', 'tat', 'uig', 'tuk', 'kir', 'uzn', 'tur', 'kaz', 'azb',
              'khm', 'vie', 'sun', 'ind', 'haw', 'ilo', 'zsm', 'plt', 'mri', 'ceb', 'jav', 'tgl', 'smo', 'ayr', 'eus', 'tel', 'tam', 
              'kan', 'mal', 'mww', 'ell', 'pst', 'bul', 'hin', 'snd', 'sqi', 'nor', 'sin', 'slv', 'por', 'ydd', 'ory', 'hye', 'gle', 
              'fra', 'bos', 'dgo', 'eng', 'hbs', 'pan', 'bho', 'gla', 'ltz', 'isl', 'deu', 'dan', 'lav', 'ckb', 'spa', 'ben', 'afr', 
              'npi', 'rus', 'mar', 'fry', 'pes', 'knn', 'ron', 'mkd', 'ces', 'cym', 'tgk', 'bel', 'lit', 'swe', 'ukr', 'cat', 'pol', 
              'guj', 'asm', 'nld', 'glg', 'ita', 'urd', 'div', 'slk', 'mai', 'jpn', 'kat', 'kor', 'bam', 'lug', 'kin', 'aka', 'sot', 
              'xho', 'tso', 'yor', 'sna', 'nso', 'ibo', 'zul', 'ewe', 'nya', 'swh', 'lin', 'quh', 'mya', 'lus', 'mni', 'lao', 'tha', 
              'gug', 'kri', 'zho', 'cos', 'mon', 'san', 'epo', 'lat', 'fin', 'hun', 'ekk', 'hat']

In [111]:
for model in df_dict_v2:
    df_dict_v2[model].iso_639_3 = df_dict_v2[model].iso_639_3.astype("category")
    df_dict_v2[model].iso_639_3 = df_dict_v2[model].iso_639_3.cat.set_categories(sorterlist)
    df_dict_v2[model].detected_language = df_dict_v2[model].detected_language.astype("category")
    df_dict_v2[model].detected_language = df_dict_v2[model].detected_language.cat.set_categories(sorterlist)


In [139]:
for model in df_dict_v2:
    confusion_matrix = pd.crosstab(df_dict_v2[model]['iso_639_3'], df_dict_v2[model]['detected_language'])
    confusion_matrix_percentage = confusion_matrix.div(confusion_matrix.sum(axis=1), axis=0) * 100

    missing_cols = [x for x in sorterlist if not x in confusion_matrix_percentage.columns]

    for lang in missing_cols:
        confusion_matrix_percentage[lang] = 0

    confusion_matrix_percentage = confusion_matrix_percentage.reindex(sorterlist, axis=1)

    if 'Llama' in model:
        model_name = model.replace('-chat-hf', '')
    elif 'Mistral' in model:
        model_name = model.replace('-Instruct', '')
    elif 'Qwen' in model:
        model_name = model.replace('-Chat', '')
    else:
        model_name = model.replace('-beta', '')
        model_name = model_name.replace('z', 'Z')
    model_name = model_name.replace('b', 'B')

    

    color_scale = [[0, '#16193C'], 
                [0.05, '#4c55be'], 
                [0.1, '#4D79C7'], 
                [0.2, '#7FB2F0'], 
                [0.3, '#ABD4F7'], 
                [0.4, '#E6DE00'], 
                [0.5, '#F2E826'], 
                [0.6, '#BDDB39'],
                [0.7, '#B6E656'],
                [0.8, '#68BB6C'],
                [0.9, '#44A248'],
                [1, '#32671D']]
    
    fig = px.imshow(confusion_matrix_percentage, 
                    range_color=(0,100),
                    color_continuous_scale=color_scale)

    fig.update_layout(
        yaxis_title="Input Language",
        #yaxis_title="",
        #xaxis_title="Output Language",
        xaxis_title="",
        font_family="Times New Roman",
        coloraxis_colorbar_x=0.9,
        #coloraxis_showscale=False,
        width=450,
        font=dict(size=20),
        height=400,
        margin=dict(l=25, r=0, t=40, b=70),
        title=model_name,
        title_x=0.5,
        title_y=0.97,
        template=custom_template,
        xaxis={'tickvals':['eng', 'hin', 'ind'], 'ticktext':['eng', 'hin', 'ind']},
        yaxis={'tickvals':['eng'], 'ticktext':['eng'], }
        )

    fig.show()

    fig.write_image("../../img/io_plot_"+model_name+".pdf", format="pdf")

## Correlation between input and output families in total

In [19]:
for model in df_dict_v2:
    confusion_matrix = pd.crosstab(df_dict_v2[model]['input_family'], df_dict_v2[model]['output_family'])
    confusion_matrix_percentage = confusion_matrix.div(confusion_matrix.sum(axis=1), axis=0) * 100

    color_scale = [[0, '#16193C'], 
                   [0.05, '#4c55be'], 
                   [0.1, '#4D79C7'], 
                   [0.2, '#7FB2F0'], 
                   [0.3, '#ABD4F7'], 
                   [0.4, '#E6DE00'], 
                   [0.5, '#F2E826'], 
                   [0.6, '#BDDB39'],
                   [0.7, '#B6E656'],
                   [0.8, '#68BB6C'],
                   [0.9, '#44A248'],
                   [1, '#32671D']]

    fig = px.imshow(confusion_matrix_percentage,
                    range_color=(0,100),
                    color_continuous_scale=color_scale)

    fig.update_layout(
        yaxis_title="Input Family",
        xaxis_title="Output Family",
        font_family="Times New Roman",
        width=700,
        font=dict(size=14),
        height=600,
        title=model,
        template=custom_template,
        #margin=dict(l=130, r=10, t=10, b=80),
        )
    fig.update_layout(coloraxis_colorbar=dict(
        title="(%)",
    ))
    fig.update_yaxes(automargin=True)
    fig.update_xaxes(automargin=True)

    fig.show()