In [2]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import country_converter as coco
import pycountry_convert as pc

frame = pd.read_pickle('data.pkl')
frame.head()


Unnamed: 0,issn,doi,doi-num,on-crossref,reference,asserted-by-cr,asserted-by-pub,ref-undefined,ref-num,year,type,country,subject
0,1392-1215 2029-5731,10.5755/j01.eie.26.6.25849,1,1,0,0,0,0,0,2020.0,journal-article,LT,STM
1,1392-1215 2029-5731,10.5755/j01.eie.26.6.27484,1,1,0,0,0,0,0,2020.0,journal-article,LT,STM
2,1392-1215 2029-5731,10.5755/j01.eie.26.5.26012,1,1,0,0,0,0,0,2020.0,journal-article,LT,STM
3,1392-1215 2029-5731,10.5755/j01.eie.26.5.26002,1,1,0,0,0,0,0,2020.0,journal-article,LT,STM
4,1392-1215 2029-5731,10.5755/j01.eie.24.4.21482,1,1,0,0,0,0,0,2018.0,journal-article,LT,STM


# 1) Percentage of DOIs registered on Crossref

In [5]:
totdoi = len(frame)
oncross = frame['on-crossref'].sum()
new = pd.DataFrame()
noton = totdoi - oncross
new['val'] = [noton, oncross]
new['ref'] = ['Articles not on Crossref', 'Articles on Crossref']
fig = px.pie(new, values='val', names='ref',color='ref', color_discrete_map={
                                'Articles not on Crossref':'#34B67A',
                                'Articles on Crossref':'#471D6C'}, width=800,
title ="DOAJ articles indexed on Crossref")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.45},
)

fig.show()
fig.write_image("images/fig1.png")

In [64]:
frame3 = frame.copy()
frame3['year'] = frame3['year'].apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame3 = frame3[(frame3.year >= 1950)&(frame3.year < 2022)]
frame3 = frame3.groupby('issn').sum()
frame3['perc_cr'] = (frame3['on-crossref']/frame3['doi-num'])*100
fig = px.violin(frame3, y="perc_cr",  points='all', width=800,
labels={ "perc_cr": "Percentage of DOAJ DOIs on Crossref" },
 title="Distribution of DOAJ DOIs registered on Crossref.")
fig.update_traces(marker_color='#471D6C') 
fig.update_yaxes(range=[50, 100])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [15]:
frame4 = frame.copy()
frame4 = frame4.groupby('country').sum()
frame4 =frame4.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame4['perc_cr'] = (frame4['on-crossref']/frame4['doi-num'])*100
frame4['country-name'] = coco.convert(names=frame4.index, to="name")
frame4 = frame4[(frame4.perc_cr < 80) & (frame4.perc_cr > 0)]
continent_name = []
for x in frame4.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame4['continent'] = continent_name
frame4 = frame4.sort_values(by=['doi-num'])

fig = px.bar(frame4,x="country-name",y='perc_cr', width=800, color="continent", color_discrete_map={
    'Asia' : '#34B67A',
    'Europe' : '#471D6C',
    'Africa' : '#D4E129',
    'South America' : '#25848E',
    'Oceania' : '#A6DA35',
    'North America' : '#FBE625'
},
 labels={ "country": "Country", "perc_cr": "DOAJ articles on Crossref", "continent": "Continent", "country-name": "Country" },
 title="DOAJ articles indexed on Crossref - by Country <br><sup>The plot is showing Countries with less than 80% of articles on Crossref</sup>")


fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()
fig.write_image("images/fig2_boh.png")

In [31]:
frame6 = frame.copy()
frame6 = frame6.groupby('subject').sum()
frame6 = frame6.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame6['perc_cr'] = (frame6['on-crossref']/frame6['doi-num'])*100

fig = px.bar(frame6, x=frame6.index, y= 'perc_cr', barmode='stack', width=800, color =frame6.index, color_discrete_sequence= ['#471D6C', '#34B67A'],
 labels={ "subject": "Research field", "perc_cr": "Percentage of articles indexed on Crossref"},
 title="Articles indexed on Crossref - by research field")

# fig.data[0].x[0] = 'Social, Science and Humanities (SSH)'
# fig.data[0].x[1] = 'Science, Technology and Medicine (STM)'
lables = {'SSH': "Social, Science and Humanities (SSH)", 'STM': "Science, Technology and Medicine (STM)"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5})
fig.show()

fig.write_image("images/fig2.png")



# 2) Reference on crossref doi

In [35]:
f = frame.copy()
f = f[f['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]

new = pd.DataFrame()
noton =  len(f) - f['reference'].sum() 
new['val'] = [noton, frame['reference'].sum() ]
new['ref'] = ['Articles without reference list', 'Articles with reference list']


fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Articles without reference list':'#34B67A',
                                'Articles with reference list':'#471D6C'}, width=800,
title ="DOAJ articles on Crossref with reference list specified")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.45},
)

fig.show()

fig.write_image("images/fig3.png")

In [24]:
frame8 = frame.copy()
frame8 = frame8[frame8['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame8['year'] = frame8['year'].apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame8 = frame8[(frame8.year >= 1950)&(frame8.year < 2022)]
frame8 = frame8.groupby('issn').sum()
frame8['perc_ref'] = (frame8['reference']/frame8['on-crossref'])*100
fig = px.violin(frame8, y="perc_ref",  points='all', width=800,
labels={ "perc_cr": "Percentage of DOAJ DOIs on Crossref" },
 title="Distribution of DOAJ DOIs registered on Crossref.")
fig.update_traces(marker_color='#471D6C') 
fig.update_yaxes(range=[0, 100])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [38]:
frame9 = frame.copy()
frame9 = frame9.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame9 = frame9[(frame9.year >= 1950)&(frame9.year < 2022)]
frame9 = frame9.groupby('year').sum()
frame9['perc_ref'] = (frame9['reference']/frame9['on-crossref'])*100
frame9['perc_cr'] = (frame9['on-crossref']/frame9['doi-num'])*100
fig = px.line(frame9,x=frame9.index,y=['perc_ref', 'perc_cr'],width=800,
 labels={ "year": "Year" }, color_discrete_sequence= ['#471D6C', '#34B67A'] ,
 title="Comparison between DOAJ articles on Crossref <br> and relative reference list presence over years.")

newnames = {'perc_ref': "Percentage of references", 'perc_cr': "Percentage of DOAJ articles"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

fig.write_image("images/fig4.png")

In [39]:
frame10 = frame.copy()
frame10 = frame10.groupby('subject').sum()
frame10 =frame10.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame10['perc_ref_nodoi'] = (frame10['ref-undefined']/frame10['ref-num'])*100
frame10['perc_ref_doi'] = 100 - frame10['perc_ref_nodoi']
fig = px.histogram(frame10, x=frame10.index, y= ['perc_ref_doi', 'perc_ref_nodoi'], barmode='stack', histfunc='avg', width=800,
color_discrete_sequence= ['#471D6C', '#34B67A'] ,
 labels={ "subject": "Research field", "perc_ref_nodoi": "% reference without DOI", "perc_ref_doi": "% reference with DOI"},
 title="Crossref references list overview - by research field")

newnames = {'perc_ref_doi': "Percentage of articles with reference list", 'perc_ref_nodoi': "Percentage of articles without reference list"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

fig.write_image("images/fig5.png")

In [50]:
frame12 = frame.copy()
frame12 = frame12.groupby('country').sum()
frame12 = frame12.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame12['perc_ref'] = (frame12['reference']/frame12['on-crossref'])*100
frame12['country-name'] = coco.convert(names=frame12.index, to="name")
frame12 =  frame12[ (frame12.perc_ref < 20) & (frame12.perc_ref > 0)]
continent_name = []
for x in frame12.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame12['continent'] = continent_name

fig = px.bar(frame12,x='country-name',y='perc_ref', width=800, color="ref-num", color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]], 
    labels={"country-name": "Country", "perc_ref": "Percentage of references on Crossref", "continent":"Contintent", "ref-num": "Number of references" },
    title="Crossref references list overview - by country <br><sup>The plot is showing Countries with less than 20% of reference list on Crossref</sup>")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

fig.write_image("images/fig6.png")

# 3) Reference that have doi specified

In [41]:
f = frame.copy()
f = f[f['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]

ref_defined = f['ref-num']-f['ref-undefined']
ref_defined = ref_defined.sum()
ref_undefined = f['ref-undefined'].sum()

new = pd.DataFrame()
new['val'] = [ref_defined, ref_undefined]
new['ref'] = ['Reference with DOI', 'Reference without DOI']

fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Reference with DOI':'#34B67A',
                                'Reference without DOI':'#471D6C'}, width=800,
title ="References DOI overview")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [57]:
frame14 = frame.copy()
frame14 = frame14[frame14['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame14 = frame14.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame14 = frame14[(frame14.year >= 1950)&(frame14.year < 2022)]
frame14 = frame14.groupby('year').sum()
frame14['perc_ref_nodoi'] = (frame14['ref-undefined']/frame14['ref-num'])*100
frame14['perc_ref_doi'] = 100 - frame14['perc_ref_nodoi']
fig = px.line(frame14,x=frame14.index,y=['perc_ref_nodoi', 'perc_ref_doi'],  color_discrete_map={
                                'perc_ref_doi':'#34B67A',
                                'perc_ref_nodoi':'#471D6C'}, width=800,
                                labels={'year':'Year'},
 title="References DOI overview over years.")

newnames = {'perc_ref_doi': "Percentage of reference lists with DOI", 'perc_ref_nodoi': "Percentage of reference lists without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.45},
)
fig.show()
fig.write_image("images/fig8.png")

In [58]:
frame15  = frame.copy()
frame15 = frame15[frame15['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame15 = frame15.groupby('country').sum()

frame15['country-name'] = coco.convert(names=frame15.index, to='name')
continent_name = []
for x in frame15.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame15['continent'] = continent_name
frame15 = frame15.groupby('continent').sum()
frame15 =frame15.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame15['perc_ref_nodoi'] = (frame15['ref-undefined']/frame15['ref-num'])*100
frame15['perc_ref_doi'] = 100 - frame15['perc_ref_nodoi']

fig = px.bar(frame15, x =frame15.index,  y=['perc_ref_nodoi', 'perc_ref_doi'], color_discrete_map={
    'perc_ref_nodoi':'#34B67A',
    'perc_ref_doi':'#471D6C'}, width=800,
    labels={ 'continent' : "Continent"},
    title="Refeerences DOI overview - by continent")

newnames = {'perc_ref_doi': "Percentage of reference lists with DOI", 'perc_ref_nodoi': "Percentage of reference lists without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
    
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()
fig.write_image("images/fig9.png")

# 4) asserted-by crossref / asserted-by publisher

In [74]:
f = frame.copy()
f = f[f['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]

ass_cross = f['asserted-by-cr'].sum()
ass_pub = f['asserted-by-pub'].sum()
und = f['ref-undefined'].sum()
ass_cross, ass_pub

new = pd.DataFrame()
new['val'] = [ass_cross, ass_pub, und]
new['ref'] = ['Asserted by Crossref', 'Asserted by citing publisher', 'Missing/undefined doi']
fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Asserted by Crossref':'#34B67A',
                                'Asserted by citing publisher':'#471D6C',
                                'Missing/undefined doi':'#D4E129'}, width=800,
title ="Percentage of DOIs registered on Crossref")
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()
fig.write_image("images/fig10.png")

In [61]:
frame17 = frame.copy()
frame17 = frame17[frame17['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame17 = frame17.groupby('country').sum()
frame17['country-name'] = coco.convert(names=frame17.index, to='name')
continent_name = []
for x in frame17.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame17['continent'] = continent_name
frame17 = frame17.groupby('continent').sum()
frame17 =frame17.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame17['perc_asserted_cr'] = (frame17['asserted-by-cr']/frame17['ref-num'])*100
frame17['perc_asserted_pub'] = (frame17['asserted-by-pub']/frame17['ref-num'])*100
frame17['perc_ref_nodoi'] = (frame17['ref-undefined']/frame17['ref-num'])*100
frame17.head()
fig = px.bar(frame17, x=frame17.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'],color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ 'continent' : "Continent"},
title = "Reference assertion overview - by continent.")

newnames = {'perc_asserted_pub': 'Percentage of references DOI asserted by publishers', 'perc_asserted_cr': "Percentage of references DOI asserted by Crossref", 'perc_ref_nodoi': "Percentage of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()
    
fig.write_image("images/fig11.png")

In [62]:
frame18 = frame.copy()
frame18 = frame18[frame18['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame18 = frame18.groupby('subject').sum()
frame18 =frame18.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame18['perc_asserted_cr'] = (frame18['asserted-by-cr']/frame18['ref-num'])*100
frame18['perc_asserted_pub'] = (frame18['asserted-by-pub']/frame18['ref-num'])*100
frame18['perc_ref_nodoi'] = (frame18['ref-undefined']/frame18['ref-num'])*100
fig = px.bar(frame18, x=frame18.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'],color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ "subject": "Research field"},
title = "Reference assertion overviwe - by research field.")

newnames = {'perc_asserted_pub': 'Percentage of references DOI asserted by publishers', 'perc_asserted_cr': "Percentage of references DOI asserted by Crossref", 'perc_ref_nodoi': "Percentage of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()
fig.write_image("images/fig12.png")

In [63]:
frame20 = frame.copy()
frame20 = frame20[frame20['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]
frame20 = frame20.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame20 = frame20[(frame20.year >= 1950)&(frame20.year < 2022)]
frame20 = frame20.groupby('year').sum()
frame20['perc_asserted_cr'] = (frame20['asserted-by-cr']/frame20['ref-num'])*100
frame20['perc_asserted_pub'] = (frame20['asserted-by-pub']/frame20['ref-num'])*100
frame20['perc_ref_nodoi'] = (frame20['ref-undefined']/frame20['ref-num'])*100
fig = px.histogram(frame20, x=frame20.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'], histfunc='avg', color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ "year": "Year"},
title = "Reference assertion overviwe - by year.")

newnames = {'perc_asserted_pub': 'Percentage of references DOI asserted by publishers', 'perc_asserted_cr': "Percentage of references DOI asserted by Crossref", 'perc_ref_nodoi': "Percentage of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

fig.write_image("images/fig13.png")

# Combined viz

In [55]:
frameA = frame.copy()
frameA['year'] = frameA['year'].apply(lambda x: pd.to_numeric(x, errors='coerce'))
frameA = frameA[(frameA.year >= 1950)&(frameA.year < 2022)]
frameA = frameA.groupby('issn').sum()
frameA['perc_ref'] = (frameA['reference']/frameA['on-crossref'])*100
frameA['perc_cr'] = (frameA['on-crossref']/frameA['doi-num'])*100

fig = go.Figure()
fig.add_trace(go.Violin(y=frameA['perc_cr'], points="all", box_visible=True, name="DOAJ articles indexed on Crossref", marker_color='#471D6C' ))
fig.add_trace(go.Violin(y=frameA['perc_ref'], points="all", box_visible=True, name="Crossref reference list", marker_color='#34B67A' ))

fig.update_yaxes(range=[0, 100])
fig.update_layout(
    width = 800, 
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'text': 'Distribution of DOIs registered on Crossref by journal <br>compared to distribution of references', 
    'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

fig.write_image("images/fig7.png")

In [73]:
f = frame.copy()
f = f[f['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]

new = pd.DataFrame()

ass_cross = f['asserted-by-cr'].sum()
ass_pub = f['asserted-by-pub'].sum()
und = f['ref-undefined'].sum()
ref_defined = f['ref-num']-f['ref-undefined']
ref_defined = ref_defined.sum()

new['val'] = [ und, ref_defined, ass_cross, ass_pub ]
new['ref'] = ['Missing/undefined doi', 'Articles with DOI reference list','Asserted by Crossref', 'Asserted by citing publisher']
new['parent'] = ['', '','Articles with DOI reference list', 'Articles with DOI reference list']

fig = px.sunburst(
    new,
    names='ref',
    parents='parent',
    values='val',
    branchvalues= "total",
    title="Percentage of reference lists registered on Crossref and DOIs assertation",
    color='ref', 
    color_discrete_map = {'Missing/undefined doi':'#34B67A','Articles with DOI reference list':'#471D6C', 'Asserted by Crossref':'#D4E129'} )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)


fig.show()