In [227]:
import re
import numpy as np
from collections import Counter


In [262]:
def GetReplaceDict(data):
    bib = bib2dict(data)
    urlbib = GetUrls(bib)
    unique_urls = set([urlbib[key][0] for key in urlbib.keys() if urlbib[key] != []])
    duplicates = {}
    for url in unique_urls:
        duplicates[url] = []
        for key in urlbib.keys():
            try:
                if urlbib[key][0] == url:
                    duplicates[url].append(key)
            except:
                None

    keys = list(duplicates.keys())
    keys.sort()
    for key in keys:
        if len(duplicates[key])==1:
            duplicates.pop(key,None)
    replace_dict = {}
    for key in duplicates.keys():
        value = duplicates[key]
        replace_dict[value[0]] = value[1:]
        for item in value[1:]:
            bib.pop(item, None)
    
    return replace_dict

def bib2dict(data):
    keys    = re.findall(r"@.+?\{(.+?)," , data, flags=re.DOTALL)
    entries = re.findall(r"(@.+?)(?=@|$)", data, flags=re.DOTALL)
    
    bib = {}
    for key, entry in zip(keys, entries):
        bib[key] = entry
    return bib

def GetUrls(bib):
    urlbib = {}
    for key in bib.keys():
        urlbib[key] = re.findall("[Uu]rl[ =\{]+(.+?)[\},]", bib[key], flags=re.DOTALL)
    return urlbib


def RemoveDuplicatesInBibfile(bibin, bibout):
    counter = 0
    with open(bibin, 'r') as file:
        data = file.read()
    replace_dict = GetReplaceDict(data)
    entries = re.findall(r"(@.+?)(?=@|$)", data, flags=re.DOTALL)
    keys    = re.findall(r"@.+?\{(.+?)," , data, flags=re.DOTALL)
    frequency = Counter(keys)

    print("Removing items with the same key...")
    for i in range(len(entries) - 1, -1, -1):
        element = entries[i]
        match = re.findall(r"@.+?\{(.+?)," , element, flags=re.DOTALL)[0]

        if frequency[match] > 1:
            del entries[i]
            frequency[match] = frequency[match] - 1
            print("Removed one duplicate entry: {key}".format(key=match))
            counter = counter + 1
    
    print("Removing items with same url-tag...")
    for key in replace_dict.keys():
        for replacekey in replace_dict[key]:
            for i in range(len(entries) - 1, -1, -1):
                element = entries[i]
                match = re.findall(r"(@.+?{{{key},)".format(key=replacekey), element, flags=re.DOTALL)

                if match != []:
                    del entries[i]
                    print("Removed the following item: {key} (duplicate of {key2})".format(
                        key=replacekey, key2=key))
                    counter = counter + 1
                    break
    keys    = re.findall(r"@.+?\{(.+?)," , data, flags=re.DOTALL)

                
    with open(bibout, 'w') as file:
        file.write(''.join(entries))
    print("Removed a total of {counter} items from the bibfile.\n".format(counter=counter))
    

def ReplaceCiteKeys(bib, filein, fileout):
    counter = 0
    with open(bib, 'r') as file:
        data = file.read()
    replace_dict = GetReplaceDict(data)
    
    with open(filein, 'r') as file:
        data = file.read()
    print("Updating {file}...".format(file=filein))    
    for key in replace_dict.keys():
        for replacekey in replace_dict[key]:
            pattern = r"\\cite{{(.*?)({key})([, }}])".format(key=replacekey)

            data, n = re.subn(pattern, r"\\cite{{\1{key}\3".format(key=key), data, flags=re.DOTALL)
            counter = counter + n
            if n != 0:
                print(r"""Replaced the tag "{key1}" with "{key2}" {n} times in {file}""".format(
                    key1 = replacekey, key2=key, file=fileout, n=n))
            
    with open(fileout, 'w') as file:
        file.write(data)
    print(r"Updated a total of {counter} citation keys".format(counter=counter))
    
def MakeBackup(filename):
    with open(filename, 'r') as file:
        data = file.read()
    with open(filename+'.backup', 'w') as file:
        file.write(data)


In [22]:
files = ['./Chapters/Introduction.tex',
         './Chapters/Chapter01.tex',
         './Chapters/Chapter02.tex',
         './Chapters/Chapter03.tex',
         './Chapters/Chapter0A.tex',
         './Chapters/Chapter0B.tex',
         './Chapters/Chapter0C.tex',
         './Chapters/Chapter0D.tex'
        ]
filesbackup = []
for file in files:
    MakeBackup(file)
MakeBackup('Bibliography.bib')


In [261]:
example = "\protect\cite{ivanEPL}.}\protect\cite{ivanEPL}.}"
pattern = r"\\cite{{(.*?)({key})([, }}])".format(key="ivanEPL")

example, n = re.subn(pattern, r"\\cite{{\1{key}\3".format(key="ivan"), example, flags=re.DOTALL)
print(example, n)

\protect\cite{ivan}.}\protect\cite{ivan}.} 2


In [None]:
for file in files:
    ReplaceCiteKeys("Bibliography.bib.backup", file+".backup", file)

Updating ./Chapters/Introduction.tex.backup...
Updated a total of 0 citation keys
Updating ./Chapters/Chapter01.tex.backup...
Replaced the tag "burkov_spin_2010" with "burkov_spin_2010-1" 1 times in ./Chapters/Chapter01.tex
Replaced the tag "ivanEPL" with "ivan" 6 times in ./Chapters/Chapter01.tex
Updated a total of 7 citation keys
Updating ./Chapters/Chapter02.tex.backup...
Replaced the tag "Hoffman2018" with "jungfleisch_perspectives_2018" 1 times in ./Chapters/Chapter02.tex
Replaced the tag "hals_phenomenology_2013" with "Hals2013" 1 times in ./Chapters/Chapter02.tex
Replaced the tag "van_der_bijl_current-induced_2012" with "vanderBijl2012" 1 times in ./Chapters/Chapter02.tex
Replaced the tag "Roschewsky2017" with "roschewsky_spin-orbit_2017" 2 times in ./Chapters/Chapter02.tex
Updated a total of 5 citation keys
Updating ./Chapters/Chapter03.tex.backup...
Replaced the tag "Tshitoyan2015" with "tshitoyan_electrical_2015" 1 times in ./Chapters/Chapter03.tex


In [251]:
RemoveDuplicatesInBibfile('Bibliography.bib.backup', 'Bibliography.bib')

Removing items with the same key...
Removed one duplicate entry: pandey_doping_2017-1
Removed one duplicate entry: jourdan_epitaxial_2015
Removed one duplicate entry: sato_two-terminal_2018
Removed one duplicate entry: bader_spintronics_2010
Removed one duplicate entry: sinova_new_2012
Removed one duplicate entry: bhatti_spintronics_2017
Removed one duplicate entry: kent_new_2015
Removed one duplicate entry: noauthor_devices_2018
Removed one duplicate entry: macdonald_antiferromagnetic_2011
Removed one duplicate entry: gomonay_spintronics_2014
Removed one duplicate entry: wadley_electrical_2016
Removed one duplicate entry: jungwirth_antiferromagnetic_2016
Removed one duplicate entry: baltz_antiferromagnetic_2018-1
Removed one duplicate entry: jungwirth_multiple_2018-1
Removed one duplicate entry: jungfleisch_perspectives_2018-1
Removed one duplicate entry: gomonay_high_2016-1
Removed one duplicate entry: baltz_antiferromagnetic_2018
Removed one duplicate entry: jungwirth_multiple_2018
