In [73]:
import zipfile
import io
import requests
import re
from difflib import SequenceMatcher
import string
from habanero import Crossref
cr = Crossref()

import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import wget

import pmagpy.ipmag as ipmag

In [74]:
from scholarly import scholarly
from scholarly import ProxyGenerator

Code that will input a doi and return the magic code.

Apply to PINT references

## Function to find the earth ref id associated with a given doi
adapted from pmagpy's "ipmag.download_magic_from_doi()"

In [75]:
def magic_link_from_doi(doi):
    """
    This uses the earthref MagIC api to search for a magic contribution using a doi. 
    
    Input: 
        doi: str beginning with '10.'
        
    Output: 
        earthref_doi_link: http link if found
        0: if not found in MagIC database
            try using magic_link_from_title to search the title in MagIC
    """
    api = 'https://api.earthref.org/v1/MagIC/{}'
    response = requests.get(api.format('download'), params={'doi': doi, 'n_max_contributions': 1})
    if (response.status_code == 200):
        contribution_zip = zipfile.ZipFile(io.BytesIO(response.content))
        for filename in contribution_zip.namelist():
            if (re.match(r'^\d+\/magic_contribution_\d+\.txt', filename)):
                contribution_text = io.TextIOWrapper(contribution_zip.open(filename)).read()
                file = filename
                with open('magic_contribution.txt', 'wt') as fh:
                    fh.write(contribution_text)
            magic_id = file.split('/')[0]
            #print(file)
            earthref_doi_link = 'http://dx.doi.org/10.7288/V4/MAGIC/' + magic_id
            return earthref_doi_link    
    else:
        earthref_doi_link = 0
    return earthref_doi_link

Test cell

In [105]:
doi = '10.1029/2021GC009990'
#doi = '10.1029/2019GC008728'

magic_link_from_doi(doi)

#magic_link_from_doi('10.1029/231')

'http://dx.doi.org/10.7288/V4/MAGIC/17452'

## Function to find the earth ref id associated with a paper title

In [134]:
def magic_link_from_title(title):
    """
    *if it returns 0 it could mean several issues: 
    """
    api = 'https://api.earthref.org/v1/MagIC/{}'
    response = requests.get(api.format('download'), params={'reference_title': title, 'n_max_contributions': 1}) #'only_latest':True})
    
    #print(response.url)
    #print(response.status_code)
    
    if (response.status_code == 200):
        contribution_zip = zipfile.ZipFile(io.BytesIO(response.content))
        for filename in contribution_zip.namelist():
            
            #print(contribution_zip.namelist()) # check contributions found
            
            if (re.match(r'^\d+\/magic_contribution_\d+\.txt', filename)):
                contribution_text = io.TextIOWrapper(contribution_zip.open(filename)).read()
                file = filename
                with open('magic_contribution.txt', 'wt') as fh:
                    fh.write(contribution_text)
            magic_id = file.split('/')[0]
            earthref_doi_link = 'http://dx.doi.org/10.7288/V4/MAGIC/' + magic_id
            return earthref_doi_link
        
    elif (response.status_code == 204): 
        new_title = title.translate(str.maketrans('', '', string.punctuation)) 
        response = requests.get(api.format('download'), params={'reference_title': new_title, 'n_max_contributions': 1})
        #print(new_title)
        #print(response.url)
        if (response.status_code == 200):
            contribution_zip = zipfile.ZipFile(io.BytesIO(response.content))
            for filename in contribution_zip.namelist():            
                if (re.match(r'^\d+\/magic_contribution_\d+\.txt', filename)):
                    contribution_text = io.TextIOWrapper(contribution_zip.open(filename)).read()
                    file = filename
                    with open('magic_contribution.txt', 'wt') as fh:
                        fh.write(contribution_text)
                magic_id = file.split('/')[0]
                earthref_doi_link = 'http://dx.doi.org/10.7288/V4/MAGIC/' + magic_id
                return earthref_doi_link
        else: 
            print("Earthref Data DOI or spelling Error: Try searching this title directly at 'https://www2.earthref.org/MagIC/search'")
    #return 0

In [135]:
title = "Geomagnetic field intensity between 70 000 and 130 000 years B.P. from a volcanic sequence on La Réunion, Indian Ocean"
#id = '19405'
title1 = 'Magnetic and thermal effects of dike intrusions in Iceland.'
#id = '19847'
#doi = '10.1016/0012-821X(96)00024-6'

magic_link_from_title(title)

'http://dx.doi.org/10.7288/V4/MAGIC/19405'

In [136]:
title2 = 'Determination of the intensity of the ancient geomagnetic field from the magnetization of effusive rocks of the Armenian SSR.'
magic_link_from_title(title2)   # no content found bc it does not have an Earth ref Data DOI

title3 = 'Paleointensity of the geomagnetic field in the early Permian'
magic_link_from_title(title3)   # no content found

Earthref Data DOI or spelling Error: Try searching this title directly at 'https://www2.earthref.org/MagIC/search'
Earthref Data DOI or spelling Error: Try searching this title directly at 'https://www2.earthref.org/MagIC/search'


In [115]:
magic_id = '17273'
ipmag.download_magic_from_id(magic_id)

magic_id = '17907'
ipmag.download_magic_from_id(magic_id)

magic_id = '17624'
ipmag.download_magic_from_id(magic_id)


(False,
 "Couldn't connect to MagIC site, please check your internet connection")

Test on PINT

In [53]:
pint_refs = pd.read_csv('..\docs\PINT_References_magiclinks.csv', encoding = "ISO-8859-1")
titles = pint_refs['TITLE'][0:10]

# send doi number to be searched in magic
magic_links = []
for i in range(len(titles)): magic_links.append(magic_link_from_title(titles[i]))

https://api.earthref.org/v1/MagIC/download?reference_title=Evidence+of+anomalously+weak+geomagnetic+field+during+Matuyama+reversed+epoch&n_max_contributions=1
204
Evidence of anomalously weak geomagnetic field during Matuyama reversed epoch
https://api.earthref.org/v1/MagIC/download?reference_title=Evidence+of+anomalously+weak+geomagnetic+field+during+Matuyama+reversed+epoch&n_max_contributions=1
https://api.earthref.org/v1/MagIC/download?reference_title=Determination+of+the+intensity+of+the+ancient+geomagnetic+field+from+the+magnetization+of+effusive+rocks+of+the+Armenian+SSR&n_max_contributions=1
204
Determination of the intensity of the ancient geomagnetic field from the magnetization of effusive rocks of the Armenian SSR
https://api.earthref.org/v1/MagIC/download?reference_title=Determination+of+the+intensity+of+the+ancient+geomagnetic+field+from+the+magnetization+of+effusive+rocks+of+the+Armenian+SSR&n_max_contributions=1
https://api.earthref.org/v1/MagIC/download?reference_title=

In [54]:
magic_links_10 = magic_links
#pint_refs['MagIC_found'] = magic_links_413
magic_links_10

[None,
 None,
 None,
 'http://dx.doi.org/10.7288/V4/MAGIC/18753',
 None,
 None,
 None,
 None,
 None,
 None]

## Functions to use the crossref API to search for a paper from a given title and collect its doi

In [68]:
def is_string_similar(s1, s2, threshold: float = 0.90):
    return SequenceMatcher(a=s1, b=s2).ratio() > threshold

def get_doi(title, cursor_max = 3):
    """
    This function uses the crossref.org API to search for a journal publication based on its title and
    returns its doi. 
    Specifically, it pulls a query of the top 3 results for the title and checks the similarity of the title
    using 'is_string_similar' to verify that the similarity level between the given title and the found title 
    is above the threshold, which is currently set to 90%. It returns the doi of the title that fits this criteria
    and it not it returns 0.
    
    Input: 
        title: str
    
    Ouput: 
        doi_result: doi that corresponds to the input title
                    0 if a matching title and doi was not found
    """
    result = cr.works(query = title, cursor = '*', cursor_max = 3, limit = 3, select = ['title', 'DOI','author'])['message']['items']
    result0 = result[0]
    result1 = result[1]
    result2 = result[2]
    title_result = result0['title'][0]
    doi_result = result0['DOI']
    
    #print(result)
    #print(title,title_result)
    
    if is_string_similar(title,title_result) == False:
        doi_result = 0
        title_result1 = result1['title'][0]
        doi_result1 = result1['DOI']
        if is_string_similar(title,title_result1) == False:
            doi_result = 0
            title_result2 = result2['title'][0]
            doi_result2 = result2['DOI']
            if is_string_similar(title,title_result2) == False:
                doi_result = 0
            else: 
                doi_result = doi_result2
        else: 
            doi_result = doi_result1
    else: 
        doi_result = doi_result
    print(doi_result)  
    return doi_result

Test cell

In [69]:
get_doi('Geomagnetic field intensity between 70 000 and 130 000 years B.P. from a volcanic sequence on La Reunion, Indian Ocean.')

10.1016/0012-821x(96)00024-6


'10.1016/0012-821x(96)00024-6'

## Function to search for a paper from its given title and author

In [70]:
def is_string_similar(s1, s2, threshold: float = 0.90):
    return SequenceMatcher(a=s1, b=s2).ratio() > threshold

def get_doi_from_title_auth(title, author, num_results = 3):
    """
    This function uses the crossref.org API to search for a journal publication based on its title and author then
    returns its doi. 
    Specifically, it pulls a query of the top results (default 3) for the title and checks the similarity of the title then author
    using 'is_string_similar' to verify that the similarity level between the given title and the found title 
    is above the threshold, which is currently set to 90. It returns the doi of the title that fits this criteria
    and if not, then it returns 0. If the author does not match it will return 1. 
    
    Input: 
        title: str
        author: str
    
    Ouput: 
        doi_result: doi that corresponds to the input title
                    0 if a matching title and doi was not found
                    1 if a matching title was found but the authors do not match
    """
    first_author_last = (author.split(',')[0]).lower()
    result = cr.works(query = title, cursor = '*', cursor_max = num_results, limit = num_results , select = ['title', 'DOI','author'])['message']['items']
 
    for i in range(num_results): 
        result_n  = result[i]
        title_result = result_n['title'][0]
        doi_result = result_n['DOI']
    
        # if found title does not match input title by 90%
        if not is_string_similar(title,title_result): 
            doi_result = 0
        break
    
        if not 'author' in result_n.keys():
             doi_result = 1
        break
        
        first_author_last_result = (result_n['author'][0]['family']).lower()

        #if found author does not match input first author
        if not is_string_similar(first_author_last, first_author_last_result):
            doi_result = 1
        break
    
    return doi_result

In [71]:
title = 'Evidence of anomalously weak geomagnetic field during Matuyama reversed epoch'
author = 'Aoki, Y., Kase, H., Ishibashi, K., Kinoshita, H.'

# title = 'Paleomagnetism of the Stillwater Complex, Montana'
# author = 'Bergh, H.W.'
get_doi_from_title_auth(title, author)

'10.5636/jgg.23.129'

In [72]:
get_doi(title)

10.5636/jgg.23.129


'10.5636/jgg.23.129'

## Importing PINT references table

In [64]:
pint_refs = pd.read_csv('..\docs\PINT_References_magiclinks.csv', encoding = "ISO-8859-1")
pint_refs_dois = pint_refs.loc[pint_refs['DOI'].isna() == False]
#pint_refs_dois
len(pint_refs)
len(pint_refs_dois)

36

# Applying get_doi then magic_link_from_doi to the whole initial sheet

In [65]:
pint_refs

Unnamed: 0,REFNO,AUTHORS,YEAR,TITLE,JOURNAL,VOL,PAGES,DOI,MagIC
0,1,"Aoki, Y., Kase, H., Ishibashi, K., Kinoshita, H.",1971,Evidence of anomalously weak geomagnetic field...,J. Geomag. Geoelect.,23,129-132,,
1,2,"Bagina, O.L., Minasyan, D.O., Petrova, G.N.",1976,Determination of the intensity of the ancient ...,Izv. Akad. Nauk. (in Russian),2,81-86,,
2,3,"Bergh, H.W.",1970,"Paleomagnetism of the Stillwater Complex, Montana","Paleogeophysics (ed. S.K. Runcorn, Academic P...",17,143-158,,
3,4,"Bogue, S.W., Coe, R.S.",1984,"Transitional paleointensities from Kauai, Hawa...",J. Geophys. Res.,89,10341-10354,,
4,5,"Bol'shakov, A.S., Solodovnikov, G.M., Vechfins...",1978,Determination of the geomagnetic field strengt...,"Izv., Earth Phys. (Eng. Trans.)",14,904-910,,
...,...,...,...,...,...,...,...,...,...
408,779,"Lloyd, S. J., Biggin, A. J., Paterson, G. A., ...",2022,Extremely weak early Cambrian dipole moment si...,Earth and Planetary Science Letters,595,117757,https://doi.org/10.1016/j.epsl.2022.117757,
409,780,"Thallner, D., Shcherbakova, V. V., Bakhmutov, ...",2022,New palaeodirections and palaeointensity data ...,Geophys. J. Int.,231 (1),474-492,https://doi.org/10.1093/gji/ggac186,
410,781,"Biasi, J., Kirschvink, J., Fu, R.",2021,Characterizing the Geomagnetic Field at High S...,J. Geophys. Res.,126,e2021JB023273,https://doi.org/10.1029/2021JB023273,
411,782,"Tauxe, L., Asefaw, H., Behar, N., Koppers, A. ...",2022,Paleointensity Estimates From the Pleistocene ...,"Geochemistry, Geophysics, Geosystems",23,e2022GC010473,https://doi.org/10.1029/2022GC010473,


In [60]:
pint_refs_titles = pint_refs['TITLE'][0:10]
pint_refs_authors = pint_refs['AUTHORS'][0:10]

dois = []
for i in range(len(pint_refs_titles)): 
    dois.append(get_doi_from_title_auth(pint_refs_titles[i], pint_refs_authors[i]))
    print(i,'/10')

In [61]:
dois

['10.5636/jgg.23.129',
 0,
 0,
 '10.1029/jb089ib12p10341',
 0,
 0,
 0,
 0,
 '10.1016/0198-0254(88)92556-3',
 0]

In [73]:
dois_413 = dois
dois_413[0]

'10.5636/jgg.23.129'

In [74]:
pint_refs['DOI_found'] = dois_413

In [63]:
# send doi number to be searched in magic
magic_links = []
for i in range(len(dois_413)): magic_links.append(magic_link_from_doi(dois_413[i]))

In [75]:
magic_links_413 = magic_links
pint_refs['MagIC_found'] = magic_links_413

In [97]:
pint_refs
# need to convert the new dois to a str so the doi number id can be made into a link
dois_str = list(map(str, dois_413))
#dois_str

doi_str_links = []
for i in range(len(pint_refs)): 
    if (dois_str[i] != '0'):
        doi_str_links.append('https://doi.org/' + dois_str[i])
    else: 
        doi_str_links.append(0)

In [100]:
doi_str_links_413 = doi_str_links
pint_refs['DOI_found'] = doi_str_links_413

In [102]:
#pint_refs.to_excel('../docs/pint_refs_updated_ordered.xlsx')

In [115]:
(pint_refs.loc[pint_refs['DOI_found']==0])     # 56
(pint_refs.loc[pint_refs['MagIC_found']==0])   # 193

Unnamed: 0,REFNO,AUTHORS,YEAR,TITLE,JOURNAL,VOL,PAGES,DOI,MagIC,DOI_found,MagIC_found
1,2,"Bagina, O.L., Minasyan, D.O., Petrova, G.N.",1976,Determination of the intensity of the ancient ...,Izv. Akad. Nauk. (in Russian),2,81-86,,,0,0
2,3,"Bergh, H.W.",1970,"Paleomagnetism of the Stillwater Complex, Montana","Paleogeophysics (ed. S.K. Runcorn, Academic P...",17,143-158,,,0,0
4,5,"Bol'shakov, A.S., Solodovnikov, G.M., Vechfins...",1978,Determination of the geomagnetic field strengt...,"Izv., Earth Phys. (Eng. Trans.)",14,904-910,,,0,0
5,6,"Bol'shakov, A.S., Gapeyev, A.K., Tkhoa, N.T.K....",1981,Determination of paleointensity of the geomagn...,"Izv., Earth Phys. (Eng. Trans.)",17,306-310,,,0,0
6,7,"Bol'shakov, A.S., Solodovnikov, G.M., Vinograd...",1987,Paleointensity of the geomagnetic field in the...,"Izv., Earth Phys. (Eng. Trans.)",23,324-333,,,0,0
...,...,...,...,...,...,...,...,...,...,...,...
403,774,"Mahgoub A. N., Garcia-Amador B. I., Alva-Valdi...",2021,Comprehensive palaeomagnetic study of San Borj...,Geophys. J. Int.,225,1897-1919,https://doi.org/10.1093/gji/ggab064,,https://doi.org/10.5194/egusphere-egu21-3409,0
404,775,"Schnepp E., Arneitz P., Ganerod M., Scholger R...",2021,Intermediate field directions recorded in Plio...,Earth Planets Space,73,182,https://doi.org/10.1186/s40623-021-01518-w,,https://doi.org/10.21203/rs.3.rs-408695/v1,0
405,776,"Bobrovnikova E. M., Lhuillier F., Shcherbakov ...",2022,High-Latitude Paleointensities During the Cret...,J. Geophys. Res.,127,e2021JB023551,https://doi.org/10.1029/2021JB023551,,https://doi.org/10.1029/2021jb023551,0
407,778,"Zhou, T., Tarduno, J., Nimmo, F., Cottrell, R....",2022,Early Cambrian renewal of the geodynamo and th...,Nature Communications,13,4161,https://doi.org/10.1038/s41467-022-31677-7,,https://doi.org/10.5194/egusphere-egu22-10532,0


# Applying to ICEPMAG references as a check 

In [67]:
icepmag_refs = pd.read_csv('..\docs\icepmag_table_refs.csv', encoding = "ISO-8859-1")
icepmag_refs

Unnamed: 0,ID,AUTHORS,TITLE,YEAR,PUBLICATION_NAME,VOLUME,PAGES,DOI,MAGIC_LINK,SHORT_NAME
0,1,"Kristj&aacute;nsson, L.",Paleomagnetic observations at three locations ...,2010,J&ouml;kull,60,149-164,,,Kristjansson 2010
1,2,"Doell, R.R.",Palaeomagnetic Studies of Icelandic Lava Flows,1972,Geophys. J. Royal Astron. Soc.,26,459-479,10.1111/j.1365-246X.1972.tb05763.x,9265.0,Doell 1972
2,3,"D&oslash;ssing, A., Muxworthy, A.R., Supakulop...",High northern geomagnetic field behavior and n...,2016,Earth Planet. Sci. Lett.,456,98-111,10.1016/j.epsl.2016.09.022,,Dossing et al 2016
3,4,"Kristj&aacute;nsson, L., Fridleifsson, I.B., W...","Stratigraphy and paleomagnetism of the Esja, E...",1980,J. Geophys.,47,31-42,10.7288/V4/MagIC/9121,8511.0,Kristjansson et al 1980
4,5,"Udagawa, S., Kitagawa, H., Gudmundsson, A., Hi...",Age and magnetism of lavas in J&ouml;kuldalur ...,1999,Phys. Earth Planet. Inter.,115,147-171,10.1016/S0031-9201(99)00073-4,9325.0,Udagawa et al 1999
...,...,...,...,...,...,...,...,...,...,...
74,75,"Levi S., Audunsson, H., Duncan, R.A., Kristj&a...",Late Pleistocene geomagnetic excursion in Icel...,1990,Earth Planet. Sci. Lett.,96,443-457,10.1016/0012-821X(90)90019-T,,Levi et al 1990
75,76,"Kristj&aacute;nsson, L.",Extension of the Middle Miocene Kleifakot geom...,2016,J&ouml;kull,66,83-94,,,Kristjansson 2016
76,77,"Pinton, A., Giordano, G., Speranza, F., &THORN...",Paleomagnetism of Holocene lava flows from the...,2018,Bulletin of Volcanology,80,1-19,10.1007/s00445-017-1187-8,,Pinton et al 2018
77,78,"Horst, A.J., Karson, J.A., Varga, R.J.",Large Rotations of Crustal Blocks in the Tj&ou...,2018,Tectonics,37,1607-1625,10.1002/2016TC004371,,Horst et al 2018


In [70]:
icepmag_refs_titles = icepmag_refs['TITLE']
icepmag_refs_authors = icepmag_refs['AUTHORS']

dois = []
for i in range(len(icepmag_refs_titles)): 
    dois.append(get_doi_from_title_auth(icepmag_refs_titles[i],icepmag_refs_authors[i]))
    print(i,'/78')

0 /78
1 /78
2 /78
3 /78
4 /78
5 /78
6 /78
7 /78
8 /78
9 /78
10 /78
11 /78
12 /78
13 /78
14 /78
15 /78
16 /78
17 /78
18 /78
19 /78
20 /78
21 /78
22 /78
23 /78
24 /78
25 /78
26 /78
27 /78


KeyError: 'author'

In [71]:
dois

[1,
 '10.1111/j.1365-246x.1972.tb05763.x',
 1,
 1,
 '10.1016/s0031-9201(99)00073-4',
 '10.5636/jgg.47.89',
 1,
 '10.1029/jb089ib08p07029',
 '10.1111/j.1365-246x.2006.03034.x',
 '10.1016/j.pepi.2009.07.013',
 1,
 '10.1130/0091-7613(2001)029<0179:gihots>2.0.co;2',
 1,
 1,
 1,
 '10.1016/s0012-821x(99)00010-2',
 1,
 '10.1029/jb085ib07p03628',
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [87]:
dois_79 = dois
dois_79

In [88]:
icepmag_refs['DOI_found'] = dois_79

In [91]:
# would be nice to search doi in magic by the title ...

# send doi number to be searched in magic
magic_links = []
for i in range(len(dois_79)): magic_links.append(magic_link_from_doi(dois_79[i]))

In [93]:
magic_links_79 = magic_links
#magic_links_79
icepmag_refs['MagIC_found'] = magic_links_79

In [105]:
icepmag_refs
dois_str = list(map(str, dois_79))
#dois_str

doi_str_links = []
for i in range(len(icepmag_refs)): 
    if (dois_str[i] != '0'):
        doi_str_links.append('https://doi.org/' + dois_str[i])
    else: 
        doi_str_links.append(0)

In [107]:
doi_str_links_79 = doi_str_links
icepmag_refs['DOI_found'] = doi_str_links_79

In [123]:
#icepmag_refs.to_excel('../docs/icepmag_refs_updated.xlsx')
icepmag_refs.loc[icepmag_refs['DOI_found'] ==0]  #9
len(icepmag_refs.loc[icepmag_refs['MagIC_found'] ==0])  #51
icepmag_refs

Unnamed: 0,ID,AUTHORS,TITLE,YEAR,PUBLICATION,VOL,PAGES,DOI,MAGIC,DOI_found,MagIC_found
0,1,"Kristjánsson, L.",Paleomagnetic observations at three locations ...,2010,Jökull,60,149-164,,,https://doi.org/10.33799/jokull2010.60.149,0
1,2,"Doell, R.R.",Palaeomagnetic Studies of Icelandic Lava Flows,1972,Geophys. J. Royal Astron. Soc.,26,459-479,10.1111/j.1365-,9265.0,https://doi.org/10.1111/j.1365-246x.1972.tb057...,http://dx.doi.org/10.7288/V4/MAGIC/12840
2,3,"Døssing, A., Muxworthy, A. R., Supakulopas, R....",High northern geomagnetic field behavior and n...,2016,Earth Planet. Sci. Lett.,456,98-111,10.1016/j.epsl.2016.09.022,,https://doi.org/10.1016/j.epsl.2016.09.022,0
3,4,"Kristjánsson, L., Fridleifsson, I.B., Watkins,...","Stratigraphy and paleomagnetism of the Esja, E...",1980,J. Geophys.,47,31-42,10.7288/V4/MagIC/9121,8511.0,0,0
4,5,"Udagawa, S., Kitagawa, H.,","Age and magnetism of lavas in Jökuldalur area,...",1999,Phys. Earth,115,147-171,10.1016/S0031-9201(99)00073-4,9325.0,https://doi.org/10.1016/s0031-9201(99)00073-4,http://dx.doi.org/10.7288/V4/MAGIC/16400
...,...,...,...,...,...,...,...,...,...,...,...
74,75,"Levi S., Audunsson, H., Duncan, R.A., Kristján...",Late Pleistocene geomagnetic excursion in Icel...,1990,Earth Planet. Sci. Lett.,96,443-457,10.1016/0012-821X(90)90019-T,,https://doi.org/10.1016/0012-821x(90)90019-t,http://dx.doi.org/10.7288/V4/MAGIC/12960
75,76,"Kristjánsson, L.",Extension of the Middle Miocene Kleifakot geom...,2016,Jökull,66,83-94,,,0,0
76,77,"Pinton, A., Giordano, G., Speranza, F., Þórðar...",Paleomagnetism of Holocene lava flows from the...,2018,Bulletin of,80,19-Jan,10.1007/s00445-017-1187-8,,https://doi.org/10.1007/s00445-017-1187-8,0
77,78,"Horst, A.J., Karson, J.A., Varga, R.J.",Large Rotations of Crustal Blocks in the Tjörn...,2018,Tectonics,37,1607-,10.1002/2016TC004371,,https://doi.org/10.1002/2016tc004371,0


## For the 36 PINT references with a listed doi, the doi number id is seperated from the link then its ran through my function to try and find a corresponding earth ref id link 

In [142]:
# seclude doi number as a str
doi_vals = []
for i in pint_refs_dois['DOI']: doi_vals.append(i.split('.org/')[1])

# send doi number to be searched in magic
magic_links = []
for i in range(len(doi_vals)): magic_links.append(magic_link_from_doi(doi_vals[i]))
pint_refs_dois['MagIC'] = magic_links

"pint_refs_dois" is the df of references with given dois

In [231]:
pd.set_option('display.max_rows', 10)
len(pint_refs)

413

# Applying get_doi to the pint references without dois - takes ~25 mins to run

In [None]:
pint_refs_no_dois = pint_refs.loc[pint_refs['DOI'].isna()]
pint_refs_no_dois.reset_index(inplace=True, drop=True) # reset index 
no_doi_test = pint_refs_no_dois['TITLE']

dois = []
for i in range(len(no_doi_test)): dois.append(get_doi(no_doi_test[i]))

Saving the list of dois found to the new name and to the df

In [None]:
dois_0 = dois
pint_refs_no_dois['DOI'] = dois_0

In [104]:
pd.set_option('display.max_rows',10)
pint_refs_no_dois.loc[pint_refs_no_dois['DOI'] != 0]

With the found dois, the doi id number is secluded then the search for earth ref id links is ran

In [130]:
# seclude doi number as a str
doi_vals0 = []
for i in range(len(pint_refs_no_dois['DOI'])): doi_vals0.append(pint_refs_no_dois['DOI'][i])

# send doi number to be searched in magic
magic_links = []
for i in range(len(doi_vals0)): magic_links.append(magic_link_from_doi(doi_vals0[i]))
pint_refs_no_dois['MagIC'] = magic_links

In [164]:
pint_refs_no_dois['DOI'] = dois_0

In [211]:
# need to convert the new dois to a str so the doi number id can be made into a link
dois_str = list(map(str, dois_0))
#dois_str

In [217]:
doi_str_links = []
for i in range(len(pint_refs_no_dois)): 
    if (dois_str[i] != '0'):
        doi_str_links.append('https://doi.org/' + dois_str[i])
    else: 
        doi_str_links.append(0)
   

In [221]:
pint_refs_no_dois['DOI'] = doi_str_links

## Updated df of references without initial dois

In [368]:
pd.set_option('display.max_rows', 10)
pint_refs_no_dois_1st = pint_refs_no_dois.loc[pint_refs_no_dois['DOI'] != 0]
pint_refs_no_dois_1st

Unnamed: 0,REFNO,AUTHORS,YEAR,TITLE,JOURNAL,VOL,PAGES,DOI,MagIC
0,1,"Aoki, Y., Kase, H., Ishibashi, K., Kinoshita, H.",1971,Evidence of anomalously weak geomagnetic field...,J. Geomag. Geoelect.,23,129-132,https://doi.org/10.5636/jgg.23.129,http://dx.doi.org/10.7288/V4/MAGIC/14873
3,4,"Bogue, S.W., Coe, R.S.",1984,"Transitional paleointensities from Kauai, Hawa...",J. Geophys. Res.,89,10341-10354,https://doi.org/10.1029/jb089ib12p10341,http://dx.doi.org/10.7288/V4/MAGIC/14181
8,9,"Borisova, G.P.",1986,Estimation of the paleointensity of the geomag...,"Izv., Earth Phys. (Eng. Trans.)",22,840-845,https://doi.org/10.1016/0198-0254(88)92556-3,0
17,18,"Briden, J.C.",1966,Variation of intensity of the palaeomagnetic f...,Nature,212,246-247,https://doi.org/10.1038/212246a0,http://dx.doi.org/10.7288/V4/MAGIC/13464
18,19,"Briden, J.C.",1966,Estimates of direction and intensity of the pa...,Geophys. J. Roy. Astron. Soc.,11,267-278,https://doi.org/10.1111/j.1365-246x.1966.tb030...,0
...,...,...,...,...,...,...,...,...,...
367,737,"Hawkins, L.M.A., Anwar, T., Shcherbakova, V.V....",2019,An exceptionally weak Devonian geomagnetic fie...,Earth and Planetary Sceince Letters,506,134-145,https://doi.org/10.1016/j.epsl.2018.10.035,0
370,740,"Shcherbakova, V.V., Biggin, A.J., Veselovskiy,...",2017,Was the Devonian geomagnetic field dipolar or ...,Geophys. J. Int.,209,1265-1286,https://doi.org/10.1093/gji/ggx085,0
371,741,"Kenneth P. Kodama , Lorraine K. Carnes, John A...",2019,Palaeointensity of the 1.3 billion-yr-old Gard...,Geophys. J. Int.,217,1974-1987,https://doi.org/10.1093/gji/ggz126,0
374,744,"Gee, J.S., Yu, Y., Bowles, J.",2010,Paleointensity estimates from ignimbrites: An ...,"Geochemistry, Geophysics, Geosystems",11,Q03010,https://doi.org/10.1029/2009gc002834,0


In [227]:
#pint_refs_doi_magic_filled = pd.concat([pint_refs_no_dois, pint_refs_dois])


In [233]:
# pint_refs_doi_magic_filled.reset_index(inplace=True, drop=True) # reset index 
# pint_refs_doi_magic_filled
# pint_refs_filled.to_excel('../docs/pint_refs_updated.xlsx')

In [349]:
pint_refs_doi_2nd_pass = pint_refs_doi_magic_filled.loc[pint_refs_doi_magic_filled['DOI'] == 0]

In [353]:
pint_refs_doi_2nd_pass.reset_index(inplace=True, drop=True) # reset index 
no_doi_2nd = pint_refs_doi_2nd_pass['TITLE']

dois_2nd = []
for i in range(len(no_doi_2nd)): 
    dois_2nd.append(get_doi(no_doi_2nd[i]))
    print(i,'/190')

In [381]:
dois_2nd0 = dois_2nd
dois_2nd_str = list(map(str, dois_2nd0))

In [382]:
doi_str_links_2nd = []
for i in range(len(pint_refs_doi_2nd_pass)): 
    if (dois_2nd_str[i] != '0'):
        doi_str_links_2nd.append('https://doi.org/' + dois_2nd_str[i])
    else: 
        doi_str_links_2nd.append(0)

In [404]:
pint_refs_doi_2nd_pass['DOI'] = doi_str_links_2nd
pd.set_option('display.max_rows', 10)
len(pint_refs_doi_2nd_pass) + len(pint_refs_no_dois_1st) + len(pint_refs_dois)
pint_refs_doi_magic_filled2 = pd.concat([pint_refs_no_dois_1st, pint_refs_doi_2nd_pass, pint_refs_dois])

In [408]:
pint_refs_doi_magic_filled2.to_excel('../docs/pint_refs_updated.xlsx')

In [385]:
# seclude doi number as a str
doi_vals2 = []
for i in range(len(pint_refs_doi_2nd_pass['DOI'])): doi_vals2.append(pint_refs_doi_2nd_pass['DOI'][i])

# send doi number to be searched in magic
magic_links = []
for i in range(len(doi_vals2)): magic_links.append(magic_link_from_doi(doi_vals2[i]))

In [393]:
#magic_links

In [394]:
pint_refs_doi_2nd_pass['MagIC'] = magic_links

In [395]:
pd.set_option('display.max_rows', None)
pint_refs_doi_2nd_pass

Unnamed: 0,REFNO,AUTHORS,YEAR,TITLE,JOURNAL,VOL,PAGES,DOI,MagIC
0,2,"Bagina, O.L., Minasyan, D.O., Petrova, G.N.",1976,Determination of the intensity of the ancient ...,Izv. Akad. Nauk. (in Russian),2.0,81-86,0,0
1,3,"Bergh, H.W.",1970,"Paleomagnetism of the Stillwater Complex, Montana","Paleogeophysics (ed. S.K. Runcorn, Academic P...",17.0,143-158,0,0
2,5,"Bol'shakov, A.S., Solodovnikov, G.M., Vechfins...",1978,Determination of the geomagnetic field strengt...,"Izv., Earth Phys. (Eng. Trans.)",14.0,904-910,0,0
3,6,"Bol'shakov, A.S., Gapeyev, A.K., Tkhoa, N.T.K....",1981,Determination of paleointensity of the geomagn...,"Izv., Earth Phys. (Eng. Trans.)",17.0,306-310,0,0
4,7,"Bol'shakov, A.S., Solodovnikov, G.M., Vinograd...",1987,Paleointensity of the geomagnetic field in the...,"Izv., Earth Phys. (Eng. Trans.)",23.0,324-333,0,0
5,8,"Bol'shakov, A.S., Solodovnikov, G.M., Vinograd...",1989,Paleointensity of the geomagnetic field in the...,"Izv., Earth Phys. (Eng. Trans.)",25.0,70-78,0,0
6,10,"Bol'shakov, A.S., Solodovnikov, G.M.",1966,Magnitude of the geomagnetic field in the Lowe...,Geomag. Aeron. (Eng. Edition),6.0,574-577,0,0
7,11,"Bol'shakov, A.S., Solodovnikov, G.M.",1969,The field strength of the ancient magnetic fie...,"Izv., Earth Phys. (Eng. Trans.)",5.0,325-328,0,0
8,12,"Bol'shakov, A.S., Solodovnikov, G.M.",1973,Intensity of the geomagnetic field in the Tria...,"Izv., Earth Phys. (Eng. Trans.)",9.0,315-319,0,0
9,13,"Bol'shakov, A.S., Solodovnikov, G.M.",1975,Intensity of the geomagnetic field in the Earl...,Dokl. Akad. Nauk (Eng. Trans.),221.0,828-831,0,0


In [263]:
a = 'Paleomagnetic studies and estimation of the geomagnetic paleointensity at the early/middle Riphean boundary in rocks of the Salmi Formation (North Ladoga Area)'
b = 'Paleomagnetic studies and estimation of geomagnetic paleointensity at the early/middle Riphean boundary in rocks of the Salmi Formation (North Ladoga area)'
SequenceMatcher(a=a, b=b).ratio()

0.9808917197452229

In [337]:
a = "The intensity of the Tertiary geomagnetic field"
b = "The Intensity of the Tertiary Geomagnetic Field"
SequenceMatcher(a=a, b=b).ratio()
#is_string_similar(a,b)

0.9361702127659575