In [1]:
import json
import re

with open('./tags.json', 'r') as reader:
    tags = {k: v for d in json.load(reader) for k, v in d.items()}

with open('./detailed-lcc.json', 'r') as reader:
    detail = json.load(reader)

with open('./top-level-lcc.json', 'r') as reader:
    basic = json.load(reader)

In [None]:
ex1 = 'HV-1568.00000000.B376 2016' #The Minority Body by Elizabeth Barnes
ex2 = 'PS-3614.00000000.G83O97 2022' #Our Missing Hearts by Celest Ng
ex3 = 'DAW1008.00000000.B37 1987' #A guide to Central Europe by Bassett, Richard
ex4 = 'E185.86.00000000.H739 2001' #Salvation: Black People and Love by bell hooks

In [49]:
def split_lcc(code): #takes in an lcc code from OL API
    l = []
    match_lst = []

    #clean input
    key_words = re.split(r'\.0+| ', code) #returns list
    key_words = [w for w in key_words if not w.isdigit()] #get rid of years and buffer between labels
    key_words = [re.sub(r'(?<=\w)0+', '', w.replace('-', '')) for w in key_words] #remove leading 0s, dashes
    key_words = [re.sub(r'\.', '', w) for w in key_words] #remove leading .

    for k in key_words: #for example 2
        if re.match(r'[A-Z]\d+[A-Z]\d+', k):
            # Split the item and add to the result list
            matches = re.findall(r'[A-Z]\d+', k)
            key_words.remove(k)
            key_words.extend(matches)

    #split into dict k,v pair
    for k in key_words:
        match = re.match(r'([A-Z]+)(\d+\.*\d+)', k)
        if match:
            l.append({match.group(1): float(match.group(2))})
    return l #returns a list of standardized codes

def lookup_meaning(codes): #takes in a k,v pair (could change to a tuple if preferred)
    l = []

    for c in codes:
        for k, v in c.items():
            l.append(basic[k[0]][0]['subject']) #get overarching subject

            try:
                d = detail[k]
                for i in d:
                    if v >= i['start'] and v <= i['stop']:
                        l.append(i['subject'])
            except:
                continue

    return l
    #returns a list of definitions for the code

In [48]:
print(lookup_meaning(split_lcc(ex1)))
print(lookup_meaning(split_lcc(ex2)))
print(lookup_meaning(split_lcc(ex3)))
print(lookup_meaning(split_lcc(ex4)))

['HV1568', 'B376']
['Social Sciences', 'Special classes', 'People with disabilities', 'Protection, assistance and relief', 'Social pathology.  Social and public welfare.', 'Philosophy, Psychology, Religion', 'By period', 'Philosophy (General)', 'Ancient']
['PS3614', 'G8397']
['Language and Literature', 'Geography, Anthropology, and Recreation', 'Eastern Hemisphere.  Eurasia, Africa, etc.', 'Africa', 'By region or country', 'Maps']
['DAW18', 'B37']
['World History and History of Europe, Asia, Africa, Australia, New Zealand, etc.', 'Philosophy, Psychology, Religion', 'Philosophy (General)']
['E18586', 'H739']
['History of America', 'Social Sciences']
