'''
MIT License (MIT)
Copyright © 2022 Silvio Peroni, Alessandro Bertozzi, Davide Brembilla, Chiara Catizone, Constance Dami, Umut Kuçuk, Chiara Manca, Giulia Venditti

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
'''

In [2]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import country_converter as coco
import pycountry_convert as pc

frame = pd.read_pickle('data.pkl')
frame.head()

Unnamed: 0,doi,issn,doi-num,on-crossref,reference,asserted-by-cr,asserted-by-pub,ref-undefined,ref-num,year,type
0,10.1051/ijsmdo:2008025,1779-627X 1779-6288,1,1,1,6,0,6,12,2008,journal-article
1,10.1051/smdo/2019012,1779-627X 1779-6288,1,1,1,22,0,7,29,2019,journal-article
2,10.1051/smdo/2020004,1779-627X 1779-6288,1,1,1,35,0,10,45,2020,journal-article
3,10.1051/smdo/2020001,1779-627X 1779-6288,1,1,1,26,0,9,35,2020,journal-article
4,10.1051/smdo/2016003,1779-627X 1779-6288,1,1,1,41,0,11,52,2016,journal-article


# 1) Percentage of DOIs registered on Crossref

In [3]:
totdoi = len(frame)
oncross = frame['on-crossref'].sum()
new = pd.DataFrame()
noton = totdoi - oncross
new['val'] = [noton, oncross]
new['ref'] = ['Percentage articles not on cross', 'Percentage articles on cross']
fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Percentage articles not on cross':'#34B67A',
                                'Percentage articles on cross':'#471D6C'}, width=800,
title ="Percentage of DOIs registered on Crossref")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [9]:
df = frame.copy()
df = df.groupby('issn').sum()
df =df.apply(lambda x: pd.to_numeric(x, errors='coerce'))
df = df[df['doi-num'] > 10]
df['perc_cr'] = (df['on-crossref']/df['doi-num'])*100 #etc ! dalle statistiche delle reference, ha senso togliere i ref_nd dal total perché non vedi la percentuale
print(df.describe())

             doi-num    on-crossref      reference  asserted-by-cr  \
count   12424.000000   12424.000000   12424.000000    1.242400e+04   
mean      469.028010     438.097231     302.791452    3.381850e+03   
std      2914.665291    2911.085581    2736.179607    8.740770e+04   
min        11.000000       0.000000       0.000000    0.000000e+00   
25%        67.000000      50.000000       0.000000    0.000000e+00   
50%       149.000000     123.000000       9.000000    0.000000e+00   
75%       344.000000     306.000000     136.000000    6.750000e+02   
max    265300.000000  265201.000000  247027.000000    9.548610e+06   

       asserted-by-pub  ref-undefined       ref-num       perc_cr  
count     1.242400e+04   1.242400e+04  1.242400e+04  12424.000000  
mean      6.985912e+03   2.732133e+03  1.309990e+04     87.594361  
std       6.599622e+04   2.498471e+04  1.354657e+05     28.612158  
min       0.000000e+00   0.000000e+00  0.000000e+00      0.000000  
25%       0.000000e+00   0.00

In [11]:
df = df[df['perc_cr'] < 50]
print(df.describe())

            doi-num  on-crossref    reference  asserted-by-cr  \
count   1111.000000  1111.000000  1111.000000     1111.000000   
mean     205.053105     4.293429     0.311431        2.124212   
std      441.914520    10.600966     2.690364       25.907751   
min       11.000000     0.000000     0.000000        0.000000   
25%       52.000000     0.000000     0.000000        0.000000   
50%      111.000000     0.000000     0.000000        0.000000   
75%      214.500000     0.000000     0.000000        0.000000   
max    11449.000000    49.000000    44.000000      585.000000   

       asserted-by-pub  ref-undefined      ref-num      perc_cr  
count      1111.000000    1111.000000  1111.000000  1111.000000  
mean          0.606661       5.915392     8.646265     4.201828  
std          15.954302      63.040356    81.206299    10.538437  
min           0.000000       0.000000     0.000000     0.000000  
25%           0.000000       0.000000     0.000000     0.000000  
50%           0.00

In [12]:
frame2 = frame.copy()
frame2 = frame2.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame2 = frame2[(frame2.year >= 1950)&(frame2.year < 2022)]
frame2 = frame2.groupby('year').sum()
frame2['perc_cr'] = (frame2['on-crossref']/frame2['doi-num'])*100
fig = px.box(frame2, y="perc_cr", width=800,
labels={ "perc_cr": "% DOIs on Crossref" },
 title="Distribution of DOIs registered on Crossref.")
fig.update_traces(marker_color='#471D6C') 
fig.update_yaxes(range=[50, 100])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [14]:
#Non lo usiamo
frame1 = frame.copy()
frame1.head()
# frame1 = frame1.groupby('subject').sum()
# frame1 =frame1.apply(lambda x: pd.to_numeric(x, errors='coerce'))
# frame1['perc_cr'] = (frame1['on-crossref']/frame1['doi-num'])*100
# fig = px.bar(frame1,x=frame1.index,y='perc_cr', log_y=True, color="doi-num",
#     color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]], width=800,
#  labels={ "subject": "Research field", "perc_cr": "% DOIs on Crossref", "doi-num": "Number of DOIs" },
#  title="Percentage of DOIs registered on Crossref - by research field.")

# for idx in range(len(fig.data)):
#     fig.data[idx].x = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']

# fig.update_layout(
#     font_family="sans-serif",
#     title_font_size=20,
#     font_size=10,
#     title = {'xanchor': 'center', 'y':0.9,'x':0.5},
# )
# fig.show()
# # fig.write_html('perc_cr_subj.html')

Unnamed: 0,doi,issn,doi-num,on-crossref,reference,asserted-by-cr,asserted-by-pub,ref-undefined,ref-num,year,type
0,10.1051/ijsmdo:2008025,1779-627X 1779-6288,1,1,1,6,0,6,12,2008,journal-article
1,10.1051/smdo/2019012,1779-627X 1779-6288,1,1,1,22,0,7,29,2019,journal-article
2,10.1051/smdo/2020004,1779-627X 1779-6288,1,1,1,35,0,10,45,2020,journal-article
3,10.1051/smdo/2020001,1779-627X 1779-6288,1,1,1,26,0,9,35,2020,journal-article
4,10.1051/smdo/2016003,1779-627X 1779-6288,1,1,1,41,0,11,52,2016,journal-article


In [77]:
frame11a = frame.copy()
frame11a = frame11a.groupby('subject').sum()
frame11a =frame11a.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame11a['perc_cr'] =  (frame11a['on-crossref']/frame11a['doi-num'])*100
fig = px.scatter(frame11a,x='perc_cr',y=frame11a.index,size='ref-num', width=800, color="doi-num", 
    color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]],
    size_max=60,
    labels={ "subject": "Research field", "perc_cr": "% DOIs on Crossref", "doi-num": "Number of DOIs" },
    title="Percentage of DOIs registered on Crossref - by research field.")

for idx in range(len(fig.data)):
    fig.data[idx].y = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']
fig.update_xaxes(range=[50, 110])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [78]:
frame3 = frame.copy()
frame3 = frame3.groupby('country').sum()
frame3 =frame3.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame3['perc_cr'] = (frame3['on-crossref']/frame3['doi-num'])*100
frame3['country-name'] = coco.convert(names=frame3.index, to="name")
continent_name = []
for x in frame3.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame3['continent'] = continent_name

fig = px.scatter(frame3,x="perc_cr",y="country-name", width=800, color="continent", color_discrete_map={
    'Asia' : '#34B67A',
    'Europe' : '#471D6C',
    'Africa' : '#D4E129',
    'South America' : '#25848E',
    'Oceania' : '#A6DA35',
    'North America' : '#FBE625'
},
 labels={ "country": "Country", "perc_cr": "% DOIs on Crossref", "continent": "Continent" },
 title="Percentage of DOIs registered on Crossref - by country.")


fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [79]:
frame4 = frame.copy()
frame4 = frame4.groupby('country').sum()
frame4 =frame4.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame4['perc_cr'] = (frame4['on-crossref']/frame4['doi-num'])*100
frame4['country-name'] = coco.convert(names=frame4.index, to="name")
frame4 = frame4[(frame4.perc_cr < 80) & (frame4.perc_cr > 0)]
continent_name = []
for x in frame4.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame4['continent'] = continent_name
frame4 = frame4.sort_values(by=['doi-num'])

fig = px.bar(frame4,x="country-name",y='perc_cr', width=800, color="continent", color_discrete_map={
    'Asia' : '#34B67A',
    'Europe' : '#471D6C',
    'Africa' : '#D4E129',
    'South America' : '#25848E',
    'Oceania' : '#A6DA35',
    'North America' : '#FBE625'
},
 labels={ "country": "Country", "perc_cr": "% DOIs on Crossref", "continent": "Continent" },
 title="Percentage of DOIs registered on Crossref - by country.")


fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [80]:
frame5 = frame.copy()
frame5 = frame5.groupby('country').sum()
frame5 =frame5.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame5['perc_cr'] = (frame5['on-crossref']/frame5['doi-num'])*100
frame5['iso_alpha'] = coco.convert(names=frame5.index, to='ISO3')
continent_name = []
for x in frame5.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame5['continent'] = continent_name

fig = px.scatter_geo(frame5, locations="iso_alpha", width=800, color="continent", color_discrete_map={
                        'Asia' : '#34B67A',
                        'Europe' : '#471D6C',
                        'Africa' : '#D4E129',
                        'South America' : '#25848E',
                        'Oceania' : '#A6DA35',
                        'North America' : '#FBE625'
                    },
                     hover_name="doi-num", size="perc_cr", size_max=10,
                     projection="natural earth",  labels={ "iso_alpha": "Country code", "perc_cr": "% DOIs on Crossref", "doi-num" : "Number of dois", "continent": "Continent" },
 title="Percentage of DOIs registered on Crossref - by country.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()


In [15]:
frame6 = frame.copy()
frame6 = frame6.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame6 = frame6[(frame6.year >= 1950)&(frame6.year < 2022)]
frame6 = frame6.groupby('year').sum()
frame6['perc_cr'] = (frame6['on-crossref']/frame6['doi-num'])*100
# fig = px.bar(frame,x=frame.index,y='perc_cr')
fig = px.scatter(frame6,x=frame6.index,y='perc_cr', width=800,
 labels={ "year": "Year", "perc_cr": "% DOIs on Crossref" },
 title="Percentage of DOIs registered on Crossref - by year")
fig.update_yaxes(range=[50, 100])
fig.update_traces(marker_color='#471D6C') 
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [16]:
frame7 = frame.copy()
frame7 = frame7.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame7 = frame7[(frame7.year >= 1950)&(frame7.year < 2022)]
frame7 = frame7.groupby('year').sum()
frame7['perc_cr'] = (frame7['on-crossref']/frame7['doi-num'])*100
# fig = px.bar(frame,x=frame.index,y='perc_cr')
fig = px.line(frame7,x=frame7.index,y='perc_cr', width=800,
 labels={ "year": "Year", "perc_cr": "% DOIs on Crossref" },
 title="Percentage of DOIs registered on Crossref - by year")
fig.update_yaxes(range=[50, 100])
fig.update_traces( line_color='#471D6C', line_width=2)
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [83]:
frame8 = frame.copy()
frame8 = frame8.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame8 = frame8[(frame8.year >= 2000)&(frame8.year < 2022)]
frame8 = frame8.groupby('year').sum()
frame8['perc_cr'] = (frame8['on-crossref']/frame8['doi-num'])*100
# fig = px.bar(frame,x=frame.index,y='perc_cr')
fig = px.scatter(frame8,x=frame8.index, y='perc_cr', size='doi-num',size_max=60,width=800,
 labels={ "year": "Year", "perc_cr": "% DOIs on Crossref", "doi-num": "Number of DOIs" },
 title="Percentage of DOIs registered on Crossref - by year")
fig.update_yaxes(range=[50, 110])
fig.update_traces(marker_color='#471D6C') 
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

Dal grafico possiamo dedurre che gli articoli pubblicati dopo il 2000, anno di fondazione di Crossref, hanno più probabilità di essere presenti sulla piattaforma. Interessante notare il calo avito tra il 2010 e il 2015.

# 2) reference on crosref doi 

In [31]:
f = frame.copy()
f = f[f['type'].isin(['journal-article', 'book', 'book-chapter', 'proceedings-article', 'dataset', 'posted-content', 'report'])]

new = pd.DataFrame()
noton =  len(f) - f['reference'].sum() 
new['val'] = [noton, f['reference'].sum() ]
new['ref'] = ['Articles without references DOI', 'Articles with references DOI']


fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Articles without references DOI':'#34B67A',
                                'Articles with references DOI':'#471D6C'}, width=800,
title ="Percentage of references for article.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [58]:
frame2 = frame.copy()
frame2 = frame2.groupby('issn').sum()
frame2 = frame2[frame2['doi-num'] > 10]
frame2 = frame2.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame2['perc_cr'] = (frame2['on-crossref']/frame2['doi-num'])*100
fig = px.violin(frame2, y="perc_cr", width=800,
labels={ "perc_cr": "% DOIs on Crossref" },
 title="Distribution of DOIs registered on Crossref.")
fig.update_traces(marker_color='#471D6C') 
fig.update_yaxes(range=[50, 100])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [62]:
frame2a = f.copy()
frame2a = frame2a.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame2a = frame2a[(frame2a.year >= 1950)&(frame2a.year < 2022)]
frame2a = frame2a.groupby('year').sum()
frame2a['perc_ref'] = (frame2a['reference']/frame2a['on-crossref'])*100

fig = go.Figure()
fig.add_trace(go.Violin(y=frame2['perc_cr']))
fig.add_trace(go.Violin(y=frame2a['perc_ref']))

# fig = px.box(frame2a, y="perc_ref", width=800,
# labels={ "perc_ref": "% of DOIs' references on Crossref" },
#  title="Distribution of articles'references.")
fig.update_traces(marker_color='#471D6C') 
fig.update_yaxes(range=[0, 100])
fig.update_layout(
    width = 800, 
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'text': 'Distribution of DOIS registered on Crossref by journal compared to distribution of references on .', 'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [86]:
frame9 = frame.copy()
frame9 = frame.groupby('subject').sum()
frame9 =frame9.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame9['perc_ref'] = (frame9['reference']/frame9['on-crossref'])*100
fig = px.bar(frame9,x=frame9.index,y='perc_ref', width=800,
    color="ref-num",color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]],
    labels={ "subject": "Research field", "perc_ref": "% of references on Crossref", "ref-num":"Number of references"  },
    title="Percentage of references registered on Crossref - by research field.")


for idx in range(len(fig.data)):
    fig.data[idx].x = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [87]:
frame10 = frame.copy()
frame10 = frame.groupby('subject').sum()
frame10 =frame10.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame10['perc_ref'] = (frame10['reference']/frame10['on-crossref'])*100
fig = px.scatter(frame10,x='perc_ref',y=frame10.index, width=800,
    labels={ "subject": "Research field", "perc_ref": "% of references on Crossref" },
    title="Percentage of references registered on Crossref - by research field.")


for idx in range(len(fig.data)):
    fig.data[idx].y = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']
fig.update_xaxes(range=[0, 100])
fig.update_traces(marker_color='#471D6C') 
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [88]:
frame11 = frame.copy()
frame11 = frame11.groupby('subject').sum()
frame11 =frame11.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame11['perc_ref'] = (frame11['reference']/frame11['on-crossref'])*100
fig = px.scatter(frame11,x='perc_ref',y=frame11.index,size='doi-num', width=800, color="ref-num", color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]], 
    size_max=60,
    labels={ "subject": "Research field", "perc_ref": "% of references on Crossref", "ref-num":"Number of references" },
    title="Percentage of references registered on Crossref - by research field.")


for idx in range(len(fig.data)):
    fig.data[idx].y = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']
fig.update_xaxes(range=[0, 100])
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [89]:
frame12 = frame.copy()
frame12 = frame12.groupby('country').sum()
frame12 =frame12.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame12['perc_ref'] = (frame12['reference']/frame12['on-crossref'])*100
frame12['country-name'] = coco.convert(names=frame12.index, to="name")
continent_name = []
for x in frame12.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)  
frame12['continent'] = continent_name
fig = px.scatter(frame12,y='country-name',x='perc_ref', width=800, color="continent", color_discrete_map={
    'Asia' : '#34B67A',
    'Europe' : '#471D6C',
    'Africa' : '#D4E129',
    'South America' : '#25848E',
    'Oceania' : '#A6DA35',
    'North America' : '#FBE625'
},
    labels={"country-name": "Country", "perc_ref": "% of references on Crossref", "continent":"Continent" },
    title="Percentage of references registered on Crossref - by country.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [90]:
frame13 = frame.copy()
frame13 = frame13.groupby('country').sum()
frame13 = frame13.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame13['perc_ref'] = (frame13['reference']/frame13['on-crossref'])*100
frame13['country-name'] = coco.convert(names=frame13.index, to="name")
frame13 =  frame13[frame13.perc_ref > 60]
continent_name = []
for x in frame13.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame13['continent'] = continent_name

fig = px.bar(frame13,x='country-name',y='perc_ref', width=800, color="continent", color_discrete_map={
    'Asia' : '#34B67A',
    'Europe' : '#471D6C',
    'Africa' : '#D4E129',
    'South America' : '#25848E',
    'Oceania' : '#A6DA35',
    'North America' : '#FBE625'
},
    labels={"country-name": "Country", "perc_ref": "% of references on Crossref", "continent":"Contintent" },
    title="Percentage of references registered on Crossref - by country.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()


In [91]:
frame13a = frame.copy()
frame13a = frame13a.groupby('country').sum()
frame13a = frame13a.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame13a['perc_ref'] = (frame13a['reference']/frame13a['on-crossref'])*100
frame13a['country-name'] = coco.convert(names=frame13a.index, to="name")
frame13a =  frame13a[frame13a.perc_ref > 60]
continent_name = []
for x in frame13a.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame13a['continent'] = continent_name

fig = px.bar(frame13a,x='country-name',y='perc_ref', width=800, color="doi-num", color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]], 
    labels={"country-name": "Country", "perc_ref": "% of references on Crossref", "continent":"Contintent", "doi-num": "Number of DOIs" },
    title="Percentage of references registered on Crossref - by country.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [92]:
frame14 = frame.copy()
frame14 = frame14.groupby('country').sum()
frame14 =frame14.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame14['perc_ref'] = (frame14['reference']/frame14['on-crossref'])*100
frame14['iso_alpha'] = coco.convert(names=frame14.index, to='ISO3')
frame14['country-name'] = coco.convert(names=frame14.index, to='name')
continent_name = []
for x in frame14.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame14['continent'] = continent_name
frame14 = frame14[frame14['perc_ref'] > 0]

fig = px.scatter_geo(frame14, locations="iso_alpha", width=800, color="continent", color_discrete_map={
                        'Asia' : '#34B67A',
                        'Europe' : '#471D6C',
                        'Africa' : '#D4E129',
                        'South America' : '#25848E',
                        'Oceania' : '#A6DA35',
                        'North America' : '#FBE625'
                    },
                     hover_name="ref-num", size="perc_ref", size_max=15,
                     projection="natural earth",  
                     hover_data=["country-name"],
 labels={ "iso_alpha": "Country code", "perc_ref": "% of references on Crossref", "doi-num" : "Number of dois", "continent": "Continent", "country-name": "Country name" },
 title="Percentage of references registered on Crossref - by country.")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [93]:
frame15 = frame.copy()
frame15 = frame15.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame15 = frame15[(frame15.year >= 1950)&(frame15.year < 2022)]
frame15 = frame15.groupby('year').sum()
frame15['perc_ref'] = (frame15['reference']/frame15['on-crossref'])*100
fig = px.scatter(frame15,x=frame15.index,y='perc_ref', width=800,
    labels={ "subject": "Research field", "perc_ref": "% of references on Crossref" },
    title="Percentage of references registered on Crossref - by year.")

 
fig.update_traces(marker_color='#471D6C') 
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [94]:
frame16 = frame.copy()
frame16 = frame16.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame16 = frame16[(frame16.year >= 1950)&(frame16.year < 2022)]
frame16 = frame16.groupby('year').sum()
frame16['perc_ref'] = (frame15['reference']/frame15['on-crossref'])*100
fig = px.line(frame16,x=frame16.index,y='perc_ref', width=800,
 labels={ "year": "Year", "perc_ref":  "% of references on Crossref" },
 title="Percentage of references registered on Crossref - by year.")
fig.update_traces( line_color='#471D6C', line_width=2)
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [95]:
frame17 = frame.copy()
frame17 = frame17.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame17 = frame17[(frame17.year >= 2000)&(frame17.year < 2022)]
frame17 = frame17.groupby('year').sum()
frame17['perc_ref'] = (frame17['reference']/frame17['on-crossref'])*100
fig = px.scatter(frame17,x=frame17.index, y='perc_ref', size='ref-num',size_max=60,width=800, color="ref-num", 
 color_continuous_scale=[[0,"#440154"],[0.05,"#414084"],[0.2,"#25848E"], [0.5,"#66CA5C"],[1,"#FBE625"]], 
 labels={ "year": "Year", "perc_ref":  "% of references on Crossref", "ref-num":"Number of references" },
 title="Percentage of references registered on Crossref - by year.")
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [96]:
frame18 = frame.copy()
frame18 = frame18.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame18 = frame18[(frame18.year >= 1950)&(frame18.year < 2022)]
frame18 = frame18.groupby('year').sum()
frame18['perc_ref'] = (frame18['reference']/frame18['on-crossref'])*100
frame18['perc_cr'] = (frame18['on-crossref']/frame18['doi-num'])*100
fig = px.line(frame18,x=frame18.index,y=['perc_ref', 'perc_cr'],width=800,
 labels={ "year": "Year" }, color_discrete_sequence= ['#471D6C', '#34B67A'] ,
 title="Comparison between DOI and references registered on Crossref - by year.")

newnames = {'perc_ref': "% of DOIs' references on Crossref", 'perc_cr': "% of DOIs on Crossref"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

# 3) Reference that have doi specified

In [97]:
totdoi = len(frame)
oncross = frame['on-crossref'].sum()
frame[frame['on-crossref']==0].describe()

ref_defined = frame['ref-num']-frame['ref-undefined']
ref_defined = ref_defined.sum()
ref_undefined = frame['ref-undefined'].sum()

new = pd.DataFrame()
new['val'] = [ref_defined, ref_undefined]
new['ref'] = ['Reference with DOI', 'Reference without DOI']

fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Reference with DOI':'#34B67A',
                                'Reference without DOI':'#471D6C'}, width=800,
title ="References DOI overview")

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [98]:
frame20 = frame.copy()
frame20 = frame20.groupby('subject').sum()
frame20 =frame20.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame20['perc_ref_nodoi'] = (frame20['ref-undefined']/frame20['ref-num'])*100
frame20['perc_ref_doi'] = 100 - frame20['perc_ref_nodoi']
fig = px.histogram(frame20, x=frame20.index, y= ['perc_ref_doi', 'perc_ref_nodoi'], barmode='stack', histfunc='avg', width=800,
color_discrete_sequence= ['#471D6C', '#34B67A'] ,
 labels={ "subject": "Research field", "perc_ref_nodoi": "% reference without DOI", "perc_ref_doi": "% reference with DOI"},
 title="References DOI overview - by research field")

newnames = {'perc_ref_doi': "% of DOIs' references with DOI", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

for idx in range(len(fig.data)):
    fig.data[idx].x = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [99]:
frame21 = frame.copy()
frame21 = frame21.groupby('country').sum()
frame21 =frame21.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame21['country-name'] = coco.convert(names=frame21.index, to='name')
frame21['perc_ref_nodoi'] = (frame21['ref-undefined']/frame21['ref-num'])*100
frame21['perc_ref_doi'] = 100 - frame21['perc_ref_nodoi']
fig = px.scatter(frame21,  x= ['perc_ref_doi', 'perc_ref_nodoi'], y="country-name", width=800, color_discrete_sequence= ['#471D6C', '#34B67A'],
labels={"country-name": "Country", 'perc_ref_doi':'% References with DOI', 'perc_ref_nodoi': '% References without DOI'},
     title="References DOI overview - by country")

newnames = {'perc_ref_doi': "% of DOIs' references with DOI", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [106]:
frame22 = frame.copy()
frame22 = frame22.groupby('country').sum()
frame22 =frame22.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame22['iso_alpha'] = coco.convert(names=frame22.index, to='ISO3')
frame22['country-name'] = coco.convert(names=frame22.index, to='name')
frame22['perc_ref_nodoi'] = (frame22['ref-undefined']/frame22['ref-num'])*100
frame22['perc_ref_doi'] = 100 - frame22['perc_ref_nodoi']
continent_name = []
for x in frame22.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame22['continent'] = continent_name
frame22 = frame22[frame22['perc_ref_doi'] > 0]
frame22['color'] = "% of references with doi"

fig1 = px.scatter_geo(frame22, locations="iso_alpha", width=800, 
                     hover_name='ref-num', size='perc_ref_doi', color="color", color_discrete_map={"% of references with doi": "#471D6C"},
                     projection="natural earth",  labels={ "iso_alpha": "Country code", 'perc_ref_doi': "% of references with doi", "color": "Legend" },
title="References DOI overview - by country")


fig1.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig1.show()

In [107]:
frame23 = frame.copy()
frame23 = frame23.groupby('country').sum()
frame23 =frame23.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame23['iso_alpha'] = coco.convert(names=frame23.index, to='ISO3')
frame23['country-name'] = coco.convert(names=frame23.index, to='name')
frame23['perc_ref_nodoi'] = (frame23['ref-undefined']/frame23['ref-num'])*100
continent_name = []
for x in frame23.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame23['continent'] = continent_name
frame23 = frame23[frame23['perc_ref_nodoi'] > 0]
frame23['color'] = "% of references without doi"
frame23.head()
fig2 = px.scatter_geo(frame23, locations="iso_alpha", width=800, color="color", color_discrete_map={"% of references without doi": "#34B67A"},
                     hover_name='ref-num', size='perc_ref_nodoi',
                     projection="natural earth",  labels={ "iso_alpha": "Country code", 'perc_ref_nodoi': "% of references without doi", "color": "Legend" },
title="References DOI overview - by country")

fig2.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig2.show()

In [110]:
fig=px.scatter_geo( title="References DOI overview - by country", width=800)
fig.add_traces(fig1._data)
fig.add_traces(fig2._data)

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [3]:
frame24 = frame.copy()
frame24 = frame24.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame24 = frame24[(frame24.year >= 1950)&(frame24.year < 2022)]
frame24 = frame24.groupby('year').sum()
frame24['perc_ref_nodoi'] = (frame24['ref-undefined']/frame24['ref-num'])*100
frame24['perc_ref_doi'] = 100 - frame24['perc_ref_nodoi']
fig = px.histogram(frame24, x=frame24.index, y= ['perc_ref_doi', 'perc_ref_nodoi'], histfunc='avg', color_discrete_map={
                                'perc_ref_doi':'#34B67A',
                                'perc_ref_nodoi':'#471D6C'}, width=800,
                                labels={'year':'Year'},
title="References DOI overview - by year.")

newnames = {'perc_ref_doi': "% of DOIs' references with DOI", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [104]:
frame25 = frame.copy()
frame25 = frame25.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame25 = frame25[(frame25.year >= 1950)&(frame25.year < 2022)]
frame25 = frame25.groupby('year').sum()
frame25['perc_ref_nodoi'] = (frame25['ref-undefined']/frame25['ref-num'])*100
frame25['perc_ref_doi'] = 100 - frame25['perc_ref_nodoi']
fig = px.line(frame25,x=frame25.index,y=['perc_ref_nodoi', 'perc_ref_doi'],  color_discrete_map={
                                'perc_ref_doi':'#34B67A',
                                'perc_ref_nodoi':'#471D6C'}, width=800,
                                labels={'year':'Year'},
 title="References DOI overview - by year.")

newnames = {'perc_ref_doi': "% of DOIs' references with DOI", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

# 4) asserted-by crossref / asserted-by publisher

In [105]:
totdoi = len(frame)
oncross = frame['on-crossref'].sum()
ass_cross = frame['asserted-by-cr'].sum()
ass_pub = frame['asserted-by-pub'].sum()
und = frame['ref-undefined'].sum()
ass_cross, ass_pub


new = pd.DataFrame()
new['val'] = [ass_cross, ass_pub, und]
new['ref'] = ['Asserted by Crossref', 'Asserted by citing publisher', 'Missing/undefined doi']
fig = px.pie(new, values='val', names='ref', color='ref', color_discrete_map={
                                'Asserted by Crossref':'#34B67A',
                                'Asserted by citing publisher':'#471D6C',
                                'Missing/undefined doi':'#D4E129'}, width=800,
title ="Percentage of DOIs registered on Crossref")
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [116]:
frame26 = frame.copy()
frame26 = frame26.groupby('subject').sum()
frame26 =frame26.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame26['perc_asserted_cr'] = (frame26['asserted-by-cr']/frame26['ref-num'])*100
frame26['perc_asserted_pub'] = (frame26['asserted-by-pub']/frame26['ref-num'])*100
frame26['perc_ref_nodoi'] = (frame26['ref-undefined']/frame26['ref-num'])*100
fig = px.bar(frame26, x=frame26.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'],color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ "subject": "Research field"},
title = "Reference assertion overviwe - by research field.")

newnames = {'perc_asserted_pub': '% references DOI asserted by publishers', 'perc_asserted_cr': "% of references DOI asserted by Crossref", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

for idx in range(len(fig.data)):
    fig.data[idx].x = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [None]:
frame = frame.groupby('subject').sum()
frame =frame.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame['perc_asserted_cr'] = (frame['asserted-by-cr']/frame['ref-num'])*100
frame['perc_asserted_pub'] = (frame['asserted-by-pub']/frame['ref-num'])*100
frame['perc_ref_nodoi'] = (frame['ref-undefined']/frame['ref-num'])*100
fig = px.scatter(frame, x= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'], y=frame.index,
labels={ "subject": "Research field"},
title = "Reference assertion overviwe - by research field.")

for idx in range(len(fig.data)):
    fig.data[idx].y = ['General works','Philosophy, Psychology, Religion','Auxiliary science of History', 'World History', 'History of the Americs', 'Local History of the Americas', 'Geography, Anthropology, Recreation', 'Social Sciences', 'Political Science', 'Law', 'Education', 'Music', 'Fine Arts', 'Language and Literature', 'Science', 'Medicine', 'Agriculture', 'Technology', 'Military Science', 'Naval Science', 'Bibliography, Library Science']

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [117]:
frame = frame.groupby('country').sum()
frame =frame.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame['perc_asserted_cr'] = (frame['asserted-by-cr']/frame['ref-num'])*100
frame['perc_asserted_pub'] = (frame['asserted-by-pub']/frame['ref-num'])*100
frame['perc_ref_nodoi'] = (frame['ref-undefined']/frame['ref-num'])*100
frame['country-name'] = coco.convert(names=frame.index, to='name')
continent_name = []

for x in frame.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame['continent'] = continent_name

fig = px.scatter(frame, x= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'], y="country-name", 
 labels={ "country-name": "Country", "perc_cr": "% DOIs on Crossref" },
 title="Percentage of DOIs registered on Crossref - by country.")

# fig.update_traces(marker_color='continent', marker_line_color='rgb(0, 224, 198)',
#                   marker_line_width=1.5, opacity=0.8)
fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

In [6]:
frame22a = frame.copy()
frame22a = frame22a.groupby('country').sum()
frame22a =frame22a.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame22a['iso_alpha'] = coco.convert(names=frame22a.index, to='ISO3')
frame22a['country-name'] = coco.convert(names=frame22a.index, to='name')

frame22a['perc_asserted_cr'] = (frame22a['asserted-by-cr']/frame22a['ref-num'])*100
continent_name = []
for x in frame22a.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame22a['continent'] = continent_name
frame22a = frame22a[frame22a['perc_asserted_cr'] > 0]
frame22a['color'] = "% of references dois asserted by Crossref"

fig1 = px.scatter_geo(frame22a, locations="iso_alpha", width=800, 
                     hover_name='ref-num', size='perc_asserted_cr', color="color", color_discrete_map={"% of references dois asserted by Crossref": "#471D6C"},
                     projection="natural earth",  labels={ "iso_alpha": "Country code", 'perc_ref_doi': "% of references dois asserted by Crossref", "color": "Legend" },
title="References DOI assertion overview - by country")


fig1.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig1.show()


In [7]:
frame23a = frame.copy()
frame23a = frame23a.groupby('country').sum()
frame23a =frame23a.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame23a['iso_alpha'] = coco.convert(names=frame23a.index, to='ISO3')
frame23a['country-name'] = coco.convert(names=frame23a.index, to='name')
frame23a['perc_asserted_pub'] = (frame23a['asserted-by-pub']/frame23a['ref-num'])*100
continent_name = []
for x in frame23a.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame23a['continent'] = continent_name
frame23a = frame23a[frame23a['perc_asserted_pub'] > 0]
frame23a['color'] = "% of references doi asserted by publisher"
frame23a.head()
fig2 = px.scatter_geo(frame23a, locations="iso_alpha", width=800, color="color", color_discrete_map={"% of references doi asserted by publisher": "#34B67A"},
                     hover_name='ref-num', size='perc_asserted_pub',
                     projection="natural earth",  labels={ "iso_alpha": "Country code", 'perc_asserted_pub': "% of references doi asserted by publisher", "color": "Legend" },
title="References DOI assertion overview - by country")

fig2.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig2.show()
fig=px.scatter_geo( title="References DOI assertion overview - by country", width=800)
fig.add_traces(fig1._data)
fig.add_traces(fig2._data)

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)

fig.show()

In [5]:
frame27 = frame.copy()
frame27 = frame27.groupby('country').sum()
frame27['country-name'] = coco.convert(names=frame27.index, to='name')
continent_name = []
for x in frame27.index:
    country_continent_code = pc.country_alpha2_to_continent_code(x)
    country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
    continent_name.append(country_continent_name)
frame27['continent'] = continent_name
frame27 = frame27.groupby('continent').sum()
frame27 =frame27.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame27['perc_asserted_cr'] = (frame27['asserted-by-cr']/frame27['ref-num'])*100
frame27['perc_asserted_pub'] = (frame27['asserted-by-pub']/frame27['ref-num'])*100
frame27['perc_ref_nodoi'] = (frame27['ref-undefined']/frame27['ref-num'])*100
frame27.head()
fig = px.bar(frame27, x=frame27.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'],color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ 'continent' : "Continent"},
title = "Reference assertion overviwe - by continent.")

newnames = {'perc_asserted_pub': '% references DOI asserted by publishers', 'perc_asserted_cr': "% of references DOI asserted by Crossref", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()

import os

if not os.path.exists("images"):
    os.mkdir("images")
    
fig.write_image("images/pec_ass_continent.png")

In [None]:
frame28 = frame.copy()
frame28 = frame28.apply(lambda x: pd.to_numeric(x, errors='coerce'))
frame28 = frame28[(frame28.year >= 1950)&(frame28.year < 2022)]
frame28 = frame28.groupby('year').sum()
frame28['perc_asserted_cr'] = (frame28['asserted-by-cr']/frame28['ref-num'])*100
frame28['perc_asserted_pub'] = (frame28['asserted-by-pub']/frame28['ref-num'])*100
frame28['perc_ref_nodoi'] = (frame28['ref-undefined']/frame28['ref-num'])*100
fig = px.histogram(frame28, x=frame28.index, y= ['perc_asserted_cr','perc_asserted_pub','perc_ref_nodoi'], histfunc='avg', color_discrete_map={
                                'perc_asserted_cr':'#34B67A',
                                'perc_ref_nodoi':'#471D6C',
                                'perc_asserted_pub':'#D4E129'}, width=800,
labels={ "year": "Year"},
title = "Reference assertion overviwe - by year.")

newnames = {'perc_asserted_pub': '% references DOI asserted by publishers', 'perc_asserted_cr': "% of references DOI asserted by Crossref", 'perc_ref_nodoi': "% of reference without DOI"}
fig.for_each_trace(lambda t: t.update(name = newnames[t.name],
                                      legendgroup = newnames[t.name],
                                      hovertemplate = t.hovertemplate.replace(t.name, newnames[t.name])
                                     )
                  )

fig.update_layout(
    font_family="sans-serif",
    title_font_size=20,
    font_size=10,
    title = {'xanchor': 'center', 'y':0.9,'x':0.5},
)
fig.show()