In [None]:
'''
Project: Periodic Table

Description: 
  Read in the relative abundance of the isotopes of the elements from pdf
  For each element, create a pie chart with the abundance of each isotope
    Label with the isotope name
  If no stable isotopes, put rectangle with "no stable isotopes"
  Print pie charts in periodic table like format
'''
import pymupdf

In [None]:
#  Note: DO NOT INCLUDE this path with the rest of the code
pth = input('please designate a path:')
print('path is:',pth)


In [None]:
'''
If x0 < prev x0 (i.e. a new line) or block is different from the prev block then output to a dict
  key = page #, Block #, Line #, Word #
  value = list of all the words in that block
If not a new line, and in the same block, then add the word to the list of words
'''

# temp dictionary
adict = {}

# note pth set outside this code, for example, pth = r'C:\'
filename = 'atomic_mass_abund.pdf'
fullpth = pth + '\\' + filename

doc = pymupdf.open(fullpth)
#  Iterates thru Pages
for p in range(len(doc)):

    prevX0 = 0
    prevBlock = 0
    prevLine = 0
    prevWordNum = 0
    
    # get page P
    page = doc[p]
    # each word in words has value: (x0, y0, x1, y1, "word", block_no, line_no, word_no)
    words = page.get_text("words", sort=True)

    # temporary list to keep all words in a specific block 
    tmpList = []

    # assigns variable names, rounds x/y values to make them easier to deal with
    for w in words:
        x0 = round(w[0])
        y0 = round(w[1])
        x1 = round(w[2])
        y1 = round(w[3])
        word = w[4]
        block = w[5]
        line = w[6]
        wordNum = w[7]
        
        #print('x:',x0,x1,'y:',y0,y1,'block/line/word num:',block,line,wordNum,'word:',word)

        # new line or new block -> output prev block to the dictionary
        if ((x0 < prevX0) or (block != prevBlock)):

            #print(p,prevBlock,line,tmpList)
            
            # output tmpList to a dictionary
            adict[(p,prevBlock,prevLine,prevWordNum)] = tmpList
            
            # reset tmpList
            tmpList = [word]
            
            # resets the vars
            prevX0 = x0
            prevBlock = block
            prevLine = line
            prevWordNum = wordNum

        else:
            # otherwise add the word to the temp list
            tmpList.append(word)

            # resets the vars
            prevX0 = x0
            prevBlock = block
            prevLine = line
            prevWordNum = wordNum

    # if new page then output last line
    adict[(p,prevBlock,prevLine,prevWordNum)] = tmpList

In [None]:
cumul = 0
for k,v in adict.items():
    if len(v) in (3,5):
        if ((v[0].strip() == '26') or (v[0].strip() in ('56Fe','57Fe','58Fe'))):
            cumul += float(v[-1])
            print(k,v,'cumul pct:',cumul)

In [None]:
''' 
Special cases where len = 4, but should be INCLUDED:
  ['Deuterium', '2H', '2.014102', '0.0115']
  ['Tritium', '3H', '3.016049', '*']

Spec case where len = 3 but should EXCLUDED:
 ['Z', 'Name', 'Symbol']

Spec case where len = 5 but should EXCLUDED:
 ['Z', 'Name', 'Symbol', '(u)', 'Abundance']

 Non-sp cases examples:
  ['2', 'Helium', '3He', '3.016029', '0.000137']
  ['36S', '35.967081', '0.02']
'''

isotopeDict = {}

for k,v in adict.items():
    if len(v) not in (3,5):
        # exclude except for 2 sp cases
        if v[0].strip().lower() in ('deuterium','tritium'):
            isotopeDict[v[1]] = v[3]

    else:
        # include unless header
        if (v[0].strip().lower() != 'z'):
            if (len(v) == 3):
                isotopeDict[v[0]] = v[2]
            elif (len(v) == 5):
                isotopeDict[v[2]] = v[4]

In [None]:
elempth = pth + '\\' + 'elements_from_wikipedia.txt'

# puts elements and their numerical order into a dictionary: key = element symbol, value = element number
elemDict = {}
with open(elempth, encoding='utf-8') as f:
    for line in f:
        tmpList = line.strip().split('\t')
        element = tmpList[1].strip()
        elemNumber = tmpList[0].strip()
        elemDict[element] = elemNumber
print('number of elements:',len(elemDict.keys()))

In [None]:
# lists digits as strings
strNum = []
for i in range(10):
    strNum.append(str(i))

# dictionary to store relevant data, key = element number, value = subdict of isotope, element symbol, and pct abundance
eDict = {}

for k,v in isotopeDict.items():
    
    tmpDict ={}
    
    tmpIso = []
    tmpElem = []
    for char in k:
        if char in strNum:
            tmpIso.append(char)
        else:
            tmpElem.append(char)
    isotope = ('').join(tmpIso)
    element = ('').join(tmpElem)

    if element in elemDict.keys():
        elemOrd = int(elemDict[element])
        
        #tmpDict['isotope']=isotope
        tmpDict['symbol'] = element

        # for pct < 0.01 -> set to 0
        if v.strip() != '*': 
            tmpDict['pct_abundance'] = v
        else:
            tmpDict['pct_abundance'] = '0'
            
        eDict[(elemOrd,isotope)] = tmpDict

# print(len(eDict.keys()))

In [None]:
'''
# check that pcts all add to 100 (within 0.1)
'''

prevSymbol = ''
totpct = 0

# number of elements that are within +/-0.1 of 100%
hundcntr = 0
# count elements with pct abundance = * (0%)

radioactive = 0

for k,v in sorted(eDict.items()):
    
    #print(k,v)
    #print(totpct)
    
    element = v['symbol']
    
    if (element != prevSymbol):
        if prevSymbol != '':

            # radioactive elements
            if ((prevSymbol.strip() in ('Tc','Pm','Po','At','Rn','Fr','Ra','Ac')) or (int(elemDict[prevSymbol]) > 92)):
                radioactive += 1

                # testing
                #print(radioactive,'RADIOACTIVE:',elemDict[prevSymbol],prevSymbol)

            # total pct not within 0.1 of 100%
            elif abs(100 - totpct) > 0.1: 
                
                # ignore elements that are too unstable
                if ((prevSymbol not in ('Tc','Pm','Po','At','Rn','Fr','Ra','Ac')) and (int(elemDict[prevSymbol]) < 93)):
                    print(elemDict[prevSymbol],prevSymbol,'total pct:',totpct)
                
                else:
                    radioactive += 1
                    
            # all others within tolerance of 100% 
            else:
                hundcntr += 1

                # testing
                #print('cntr:',hundcntr,'|',elemDict[prevSymbol],prevSymbol,'total pct:',totpct)
                
        totpct = float(v['pct_abundance'])
        prevSymbol = element
    else:
        totpct += float(v['pct_abundance'])

print('NOTE: elements 110 - 118 missing or unnamed (e.g. Uuu for element 111), so omitted in count.')
print('Also leaves out 109 for some reason. Expect Total of 108 elements.')
print('')
print('total within 0.1% of 100.0%:',hundcntr)
print('total that have 0% (radioactive/unstable):',radioactive)
print('total:',radioactive + hundcntr)

# Code to create pie charts

In [None]:
import matplotlib.pyplot as plt
import numpy as np

In [None]:
prevSymbol = ''
prevAtomNum = 0

# temp lists to keep the isotope number and the percent abundance
isotopeList = []
pctList = []

# keys = atomic number, values = list of 2 lists: isotopes and percents
mainDict = {}

# keys = (atomicNumber,isotope), value = dict: symbol, pct_abundance as keys
for k,v in sorted(eDict.items()):
    
    atomic_number = int(k[0])
    isotope = int(k[1])
    
    symbol = v['symbol']
    pct = float(v['pct_abundance'])

    #print(atomic_number, isotope,symbol,pct)
    
    if symbol != prevSymbol:

        if prevSymbol != '':
            mainDict[prevAtomNum] = [prevSymbol,isotopeList, pctList]

            if prevAtomNum < 3:
                print(prevAtomNum,'symbol:',prevSymbol)
                print('isotopes:',isotopeList)
                print('percents:',pctList)
                print('---x---')
        
        isotopeList = [isotope]
        pctList = [pct]
    else:
        isotopeList.append(isotope)
        pctList.append(pct)
        
    prevSymbol = symbol
    prevAtomNum = atomic_number
    #print(k,v)

In [None]:
fig, ax = plt.subplots(10, 9, sharey=True, layout='constrained')

# start at -1 so first column has index = 0
row = -1

for i in range(0,90):
    # put number of columns as the modulo
    col = i % 9 
    if col == 0:
        row += 1
    
    atomic_number = i + 1
    symbol, isoList, pctList = mainDict[i+1]
    
    if ((symbol.strip() in ('Tc','Pm','Po','At','Rn','Fr','Ra','Ac')) or (int(atomic_number) > 92)):
        # d3d3d3 is hex for light gray
        colors = ['#d3d3d3']
    
    #print(symbol, pctList)

    if (len(pctList)==1 and pctList[0] == 0.0):
        pctList = [100.0]
        #print('NEW LIST:',pctList)
        
    # label distance => how far in/out the labels are 
    # textprops => format the text properties, such as the font size

    # not radioactive
    if ((symbol.strip() not in ('Tc','Pm','Po','At','Rn','Fr','Ra','Ac')) and (int(atomic_number) < 93)):
        #ax[row,col].pie(pctList, labels=isoList, labeldistance=1.0, textprops={'fontsize': 6})
        ax[row,col].pie(pctList, startangle = 90)

    # color radioactive elements gray
    if ((symbol.strip() in ('Tc','Pm','Po','At','Rn','Fr','Ra','Ac')) or (int(atomic_number) > 92)):
        ax[row,col].pie(pctList, startangle = 90, colors=colors)
    
    # Put label of the pie plot in the middle of the circle using ax.transAxes, va and ha
    ax[row,col].text(0.5, 0.5, symbol, transform = ax[row,col].transAxes, va = 'center', ha = 'center')

outpth = pth + '\\' + 'isotopes.png'
plt.savefig(outpth)