In [6]:
import pandas
from lxml import etree
from pymarc import map_records, map_xml, XMLWriter, Field

### Input and output file control
The directory `input/cuc-export` is actually a simlink to another dir. Note that on macOS, you can't use Finder
aliases for such links, you have to create actual symlinks with `ln -s` for this to work.

#### File preparation
The `cuclookup` file on the list below needs to be a csv (e.g. generated in MarcEdit) containing a list of 
`020$a` and `035\$a` exported from the CUC file. Column must be named that way

In [173]:
chathamxml = '../stclair-duplicates/input/chatham-records-20210928-fixed.xml'
windsorxml = '../stclair-duplicates/input/windsor-records-20210928-fixed.xml'

cucfiles = ['input/cuc-export/CUC_bib_01_final.xml', \
            'input/cuc-export/CUC_bib_02_final.xml', \
            'input/cuc-export/CUC_bib_03_final.xml', \
            'input/cuc-export/CUC_bib_04_final.xml', \
            'input/cuc-export/CUC_bib_05_final.xml']

cuclookup = 'input/CUC-020-035-lookup.csv'

chathamoutxml = 'output/chatham-records-cuc035-20220319.xml'
windsoroutxml = 'output/windsor-records-cuc035-20220319.xml'

### Build CUC lookup dictionary
This will be used to look up any 020s found in the St. Clair files and get the corresponding 035s if found.
If more than one 020 exists for a particular record, an additional dictionary entry is created. Records where 035 is `nan` are excluded.

035s can be multiple values split by `;` too, they will be handled individually in the mapping function.

In [200]:
cuc_df = pandas.read_csv(cuclookup)
cuc_dict = {}
def construct_dict(line):
    ohtwooh = str(line['020$a'])
    ohthreefive = str(line['035$a'])
    if ohthreefive != 'nan':
        isbns = ohtwooh.split(';')
        if len(isbns) > 0:
            for isbn in isbns:
                cuc_dict[isbn]=ohthreefive

cuc_df.apply(construct_dict, axis=1)

0         None
1         None
2         None
3         None
4         None
          ... 
395315    None
395316    None
395317    None
395318    None
395319    None
Length: 395320, dtype: object

### ISBN Helper functions
Adapted from [ISBN-13 converter (Python recipe)](https://code.activestate.com/recipes/498104-isbn-13-converter/)

In [120]:
def check_digit_10(isbn):
    assert len(isbn) == 9
    sum = 0
    for i in range(len(isbn)):
        c = int(isbn[i])
        w = i + 1
        sum += w * c
    r = sum % 11
    if r == 10: return 'X'
    else: return str(r)

def check_digit_13(isbn):
    assert len(isbn) == 12
    sum = 0
    for i in range(len(isbn)):
        c = int(isbn[i])
        if i % 2: w = 3
        else: w = 1
        sum += w * c
    r = 10 - (sum % 10)
    if r == 10: return '0'
    else: return str(r)

def convert_10_to_13(isbn):
    assert len(isbn) == 10
    prefix = '978' + isbn[:-1]
    check = check_digit_13(prefix)
    return prefix + check

def convert_13_to_10(isbn):
    assert len(isbn) == 13
    prefix = isbn[3:-1]
    check = check_digit_10(prefix)
    return prefix + check

### Define mapping function
This function will be run on each record in the St.Clair dataset.

In [215]:
def stclaircuc_map(record):
    if record:
        bibid = int(record['001'].value())
        ohtwooh = record['020']
        if (ohtwooh != None):
            # Extract only the first part of the 020 string (before first space separator)
            isbnraw = record['020'].value().split( )[0]
            # Convert it into other ISBN variants
            isbn_nodash = isbnraw.replace('-','')
            if len(isbn_nodash) == 10:
                isbn_13 = convert_10_to_13(isbn_nodash)
                isbn_10 = isbn_nodash
            elif len(isbn_nodash) == 13:
                isbn_13 = isbn_nodash
                isbn_10 = convert_13_to_10(isbn_nodash)
            else:
                # If the 020 string is neither 10 nor 13 characters long, it's likely not an ISBN, 
                # keep this record as-is and go to the next.
                writer.write(record)
                return
            isbn_13_dashed = "-".join([isbn_13[0:3], isbn_13[3], isbn_13[4:7], isbn_13[7:12], isbn_13[12]])
            isbn_10_dashed = "-".join([isbn_10[0], isbn_10[1:4], isbn_10[4:9], isbn_10[9]])
            
            isbn_variants = [isbn_13, isbn_10, isbn_13_dashed, isbn_10_dashed]
            #print(bibid, ': ', isbn_variants)
            
            # Try to find a CUC record with one of those ISBN variants
            for isbn in isbn_variants:
                find035 = cuc_dict.get(isbn)
                if find035 != None:
                    for ohthreefive in find035.split(';'):
                        # Check first if that 035 doesn't exist in the record already
                        control = False
                        existing_fields = record.get_fields('035')
                        for existing in existing_fields:
                            subfields = existing.get_subfields('a')
                            if (len(subfields) > 0) and (subfields[0] == ohthreefive):
                                control = True
                        
                        if control == False:
                            record.add_field(
                                Field(
                                    tag = '035',
                                    indicators = ['',''],
                                    subfields = [
                                        'a', ohthreefive
                                    ]
                                )
                            )
                            print(bibid, ': Added =035 ', ohthreefive)
                    break
                    
        writer.write(record)
    else:
        print('Could not read record')

### Run the mapping function
This is where the magic happen. The mapping function is run on the Chatham and Windsor files.

In [216]:
writer = XMLWriter(open(chathamoutxml,'wb'))
map_xml(stclaircuc_map, chathamxml)
writer.close()

73 : Added =035  (Sirsi) AEJ-1110
343 : Added =035  (Sirsi) AEE-3382
649 : Added =035  (Sirsi) ABY-5118
669 : Added =035  (Sirsi) ACH-9008
688 : Added =035  (Sirsi) AEJ-2778
730 : Added =035  (Sirsi) AEJ-2867
733 : Added =035  (VGER)31049
733 : Added =035  (CA-OTSC)31049-senlcdb-Voyager
837 : Added =035  (Sirsi) AEE-4345
933 : Added =035  (Sirsi) AEE-4950
941 : Added =035  (Sirsi) AEJ-4200
963 : Added =035  (Sirsi) AEE-5119
966 : Added =035  (Sirsi) AEJ-4609
981 : Added =035  (Sirsi) AEJ-5627
1001 : Added =035  (Sirsi) ADU-7976
1008 : Added =035  (Sirsi) AEJ-6356
1010 : Added =035  (Sirsi) AEE-5882
1037 : Added =035  (Sirsi) AEJ-7096
1040 : Added =035  (Sirsi) AEJ-7103
1070 : Added =035  (Sirsi) AEJ-7454
1071 : Added =035  (Sirsi) AEJ-7471
1080 : Added =035  (Sirsi) AEJ-7668
1101 : Added =035  (Sirsi) ACI-4199
1110 : Added =035  (Sirsi) AEE-6596
1116 : Added =035  (Sirsi) AEJ-8023
1117 : Added =035  (Sirsi) AEE-6653
1125 : Added =035  (Sirsi) AAA-6053
1134 : Added =035  (Sirsi) AEE-684

15673 : Added =035  (Sirsi) AER-7947
15683 : Added =035  (Sirsi) AER-0713
16232 : Added =035  (Sirsi) AEG-6053
16368 : Added =035  (Sirsi) ABU-0432
16410 : Added =035  (Sirsi) AET-7317
16659 : Added =035  (Sirsi) AET-6880
17286 : Added =035  (Sirsi) ACI-2681
17329 : Added =035  (Sirsi) ABH-6001
17401 : Added =035  (Sirsi) AEA-0914
17412 : Added =035  (Sirsi) AEU-6152
17524 : Added =035  (Sirsi) AES-0939
17528 : Added =035  (Sirsi) AEQ-3871
17578 : Added =035  (Sirsi) AFF-8856
17581 : Added =035  (Sirsi) AEQ-1412
17617 : Added =035  (Sirsi) AEY-9011
17664 : Added =035  (Sirsi) ACP-8047
17684 : Added =035  (Sirsi) AES-5525
17698 : Added =035  AEV-6300
17711 : Added =035  (Sirsi) AEV-9304
17714 : Added =035  (Sirsi) AEX-5123
17719 : Added =035  (Sirsi) AGY-2148
17721 : Added =035  (Sirsi) AEX-4760
17741 : Added =035  (Sirsi) AEY-0071
17764 : Added =035  (Sirsi) AES-2986
17794 : Added =035  (Sirsi) AAC-6976
17819 : Added =035  (Sirsi) ADZ-8020
17845 : Added =035  (Sirsi) AET-9733
17850 : A

20931 : Added =035  (Sirsi) AGZ-5081
20934 : Added =035  (Sirsi) AGZ-4538
20935 : Added =035  (Sirsi) AHA-3730
20942 : Added =035  (Sirsi) AGX-9914
20947 : Added =035  (Sirsi) AHC-0546
20950 : Added =035  (Sirsi) AGZ-9093
20951 : Added =035  (Sirsi) AHA-3757
20954 : Added =035  (Sirsi) AHA-3505
20957 : Added =035  (Sirsi) AGZ-9341
20960 : Added =035  (Sirsi) AGZ-5537
20966 : Added =035  (Sirsi) AGZ-4942
20978 : Added =035  (Sirsi) AFK-5957
20986 : Added =035  (Sirsi) AHA-6336
20996 : Added =035  (Sirsi) AHA-1921
20997 : Added =035  (Sirsi) AHA-1921
21004 : Added =035  (Sirsi) AGY-6872
21009 : Added =035  (Sirsi) AGZ-6703
21010 : Added =035  (Sirsi) AGZ-6028
21013 : Added =035  (Sirsi) AFL-2722
21014 : Added =035  (Sirsi) AGZ-4013
21016 : Added =035  (Sirsi) AHC-1769
21018 : Added =035  (Sirsi) AGZ-9787
21020 : Added =035  (Sirsi) AHC-1729
21022 : Added =035  (Sirsi) AHA-0868
21023 : Added =035  (Sirsi) AHA-0871
21025 : Added =035  (Sirsi) AFI-9964
21028 : Added =035  (Sirsi) AGZ-9226
2

23034 : Added =035  (Sirsi) AHL-0783
23042 : Added =035  (Sirsi) AHJ-8605
23043 : Added =035  (Sirsi) AHG-2850
23044 : Added =035  (Sirsi) AHG-2850
23065 : Added =035  (Sirsi) AHI-5478
23070 : Added =035  (Sirsi) AHJ-1512
23071 : Added =035  (Sirsi) AHJ-2345
23075 : Added =035  (Sirsi) AHJ-8610
23118 : Added =035  (Sirsi) AHD-7879
23125 : Added =035  (Sirsi) AHK-7836
23132 : Added =035  (Sirsi) AHK-5576
23139 : Added =035  (Sirsi) AHL-4386
23142 : Added =035  (OBEL)0000050143-01loyalist_inst
23142 : Added =035  0000050143
23142 : Added =035  (OCoLC)AHJ-9434
23150 : Added =035  (Sirsi) AHN-5229
23165 : Added =035  (Sirsi) AHF-9298
23167 : Added =035  (Sirsi) AHK-5772
23171 : Added =035  (Sirsi) AHG-6407
23174 : Added =035  (Sirsi) AHA-3662
23178 : Added =035  (Sirsi) AHK-4078
23182 : Added =035  (Sirsi) AHI-8816
23184 : Added =035  (Sirsi) AHI-4599
23186 : Added =035  (Sirsi) AHJ-9439
23204 : Added =035  (Sirsi) AHH-7864
23207 : Added =035  (Sirsi) AHL-1740
23216 : Added =035  (Sirsi) A

25750 : Added =035  (Sirsi) AGZ-5578
25755 : Added =035  (Sirsi) AJH-7064
25765 : Added =035  (Sirsi) BAE-7899
25766 : Added =035  (OBEL)0000117771-01loyalist_inst
25766 : Added =035  0000117771
25768 : Added =035  (OBEL)0000117770-01loyalist_inst
25768 : Added =035  0000117770
25768 : Added =035  (OCoLC)319858871
25769 : Added =035  (OBEL)0000117773-01loyalist_inst
25769 : Added =035  0000117773
25771 : Added =035  (OBEL)0000117775-01loyalist_inst
25771 : Added =035  0000117775
25772 : Added =035  (OBEL)0000117769-01loyalist_inst
25772 : Added =035  0000117769
25773 : Added =035  (OBEL)0000117776-01loyalist_inst
25773 : Added =035  0000117776
25778 : Added =035  (Sirsi) AJJ-6539
25784 : Added =035  (Sirsi) BAD-3888
25796 : Added =035  (Sirsi) BAC-6525
25800 : Added =035  (Sirsi) BAE-2884
25805 : Added =035  (Sirsi) AJO-1928
25809 : Added =035  (Sirsi) AJL-7784
25812 : Added =035  (Sirsi) AJN-8305
25816 : Added =035  (Sirsi) BAA-1979
25825 : Added =035  (OCoLC)ocn419876488
25825 : Adde

In [217]:
writer = XMLWriter(open(windsoroutxml,'wb'))
map_xml(stclaircuc_map, windsorxml)
writer.close()

1912 : Added =035  (Sirsi) AEE-4457
3911 : Added =035  (Sirsi) AEJ-7944
6396 : Added =035  (Sirsi) BAI-1721
6958 : Added =035  (Sirsi) ACI-9768
11217 : Added =035  (Sirsi) AEF-4730
13663 : Added =035  (Sirsi) ACK-1114
17286 : Added =035  (Sirsi) AEG-1431
19290 : Added =035  (Sirsi) AEB-8866
19645 : Added =035  (Sirsi) AEG-3978
19649 : Added =035  (Sirsi) AEG-3982
19677 : Added =035  (Sirsi) AEG-4010
20585 : Added =035  (Sirsi) AEG-4955
20883 : Added =035  (Sirsi) AEG-5257
20888 : Added =035  (Sirsi) AEG-5262
20898 : Added =035  (Sirsi) AEG-5272
20914 : Added =035  (Sirsi) AEM-5197
20917 : Added =035  (Sirsi) AEG-5291
20920 : Added =035  (Sirsi) AEG-5294
21051 : Added =035  (Sirsi) AEG-5435
21089 : Added =035  (Sirsi) AEG-5480
21106 : Added =035  (Sirsi) AAD-3776
21107 : Added =035  (Sirsi) AEG-5500
21124 : Added =035  (Sirsi) AEM-5482
21130 : Added =035  (Sirsi) AEM-5491
21166 : Added =035  (Sirsi) AEG-5560
21201 : Added =035  (Sirsi) AEM-5621
21233 : Added =035  (Sirsi) AEG-5629
21241

23712 : Added =035  (Sirsi) AEN-1008
23735 : Added =035  (Sirsi) AEG-8255
23765 : Added =035  (Sirsi) AAD-8045
23776 : Added =035  (Sirsi) AEN-1280
23790 : Added =035  (Sirsi) AEN-1308
23800 : Added =035  (Sirsi) ACM-1299
23801 : Added =035  (Sirsi) AEN-1357
23826 : Added =035  (Sirsi) AEN-1450
23854 : Added =035  (Sirsi) AEG-8375
23909 : Added =035  (Sirsi) AEG-8433
23916 : Added =035  (Sirsi) AEG-8440
23970 : Added =035  (Sirsi) AAC-9046
24024 : Added =035  (Sirsi) AEG-8548
24048 : Added =035  (Sirsi) AEG-8572
24061 : Added =035  (Sirsi) AEN-2176
24150 : Added =035  (Sirsi) AEG-8676
24172 : Added =035  (Sirsi) AEG-8698
24254 : Added =035  (Sirsi) AEN-2813
24287 : Added =035  (Sirsi) AEN-3035
24293 : Added =035  (Sirsi) AEN-3054
24294 : Added =035  (Sirsi) ABB-0518
24305 : Added =035  (Sirsi) AEN-3089
24307 : Added =035  (Sirsi) AEN-3104
24315 : Added =035  (Sirsi) AEN-3126
24319 : Added =035  (Sirsi) ADX-9524
24321 : Added =035  (Sirsi) AEG-8853
24322 : Added =035  (Sirsi) AEG-8855
2

27653 : Added =035  (Sirsi) AEH-2543
27654 : Added =035  (Sirsi) AEH-2544
27660 : Added =035  (Sirsi) ACM-8672
27662 : Added =035  (Sirsi) AEH-2553
27666 : Added =035  (Sirsi) AEO-0664
27673 : Added =035  (Sirsi) AEH-2565
27678 : Added =035  (Sirsi) AEH-2570
27680 : Added =035  (Sirsi) AEO-0678
27683 : Added =035  (Sirsi) AEH-2575
27696 : Added =035  (Sirsi) AEH-2588
27699 : Added =035  (Sirsi) AEO-0706
27703 : Added =035  (Sirsi) AEH-2595
27710 : Added =035  (Sirsi) AEH-2602
27714 : Added =035  (Sirsi) AEH-2606
27725 : Added =035  (Sirsi) AEO-0762
27733 : Added =035  (Sirsi) AEH-2624
27734 : Added =035  (Sirsi) ACM-8843
27739 : Added =035  (Sirsi) AEH-2632
27740 : Added =035  (Sirsi) AEH-2633
27744 : Added =035  (Sirsi) ADY-5014
27748 : Added =035  (Sirsi) AEH-2641
27760 : Added =035  (Sirsi) AEH-2654
27769 : Added =035  (Sirsi) AEO-0890
27779 : Added =035  (Sirsi) AEO-0915
27783 : Added =035  (Sirsi) AEH-2677
27791 : Added =035  (Sirsi) AEH-2685
27799 : Added =035  (Sirsi) AEH-2693
2

41389 : Added =035  (Sirsi) AEP-6540
41398 : Added =035  (Sirsi) AEI-7527
41411 : Added =035  (Sirsi) AEI-7542
41413 : Added =035  (Sirsi) AEI-7544
41417 : Added =035  (Sirsi) AEI-7548
41429 : Added =035  (Sirsi) AEP-6567
41434 : Added =035  (Sirsi) AEI-7568
41435 : Added =035  (Sirsi) AEI-7570
41475 : Added =035  (Sirsi) AEI-7613
41480 : Added =035  (Sirsi) AEI-7618
41481 : Added =035  (Sirsi) ACC-8258
41485 : Added =035  (Sirsi) AEI-7623
41493 : Added =035  (Sirsi) ACC-8264
41494 : Added =035  (Sirsi) AED-9027
41495 : Added =035  (Sirsi) ACC-8265
41517 : Added =035  (Sirsi) AEI-7657
41549 : Added =035  (Sirsi) ACO-7220
41557 : Added =035  (Sirsi) ACC-8320
41562 : Added =035  (Sirsi) AAE-9281
41563 : Added =035  (Sirsi) AEI-7704
41565 : Added =035  (Sirsi) AAG-7835
41577 : Added =035  (Sirsi) ACO-7361
41583 : Added =035  (Sirsi) AEI-7726
41584 : Added =035  (Sirsi) AED-9349
41588 : Added =035  (Sirsi) ACO-7392
41589 : Added =035  (Sirsi) AEP-6868
41594 : Added =035  (Sirsi) AEI-7739
4

50785 : Added =035  (Sirsi) AEL-5884
50792 : Added =035  (Sirsi) AEJ-6095
50797 : Added =035  (Sirsi) AAC-4161
50823 : Added =035  (Sirsi) AEK-4555
50847 : Added =035  (Sirsi) AEN-9277
50848 : Added =035  (Sirsi) AEM-2875
51031 : Added =035  (Sirsi) 1383716
51041 : Added =035  (OCoLC)18990261
51042 : Added =035  (OCoLC)1006903680
51045 : Added =035  (OCoLC)18990292
51083 : Added =035  (Sirsi) AEF-0825
51123 : Added =035  (Sirsi) ADX-4756
52692 : Added =035  (Sirsi) AEL-2987
52844 : Added =035  (Sirsi) AEF-9621
52886 : Added =035  (Sirsi) AEL-0524
52901 : Added =035  (Sirsi) AAB-0604
53076 : Added =035  (Sirsi) AEF-5648
53119 : Added =035  (Sirsi) AEO-2557
53120 : Added =035  (Sirsi) AEI-3842
53122 : Added =035  (Sirsi) AAD-4292
53274 : Added =035  (Sirsi) AEF-8709
53281 : Added =035  (Sirsi) AEO-5290
53651 : Added =035  (Sirsi) AEF-9495
53654 : Added =035  (Sirsi) AEK-3413
53655 : Added =035  (VGER)21520
53655 : Added =035  (CA-OTSC)21520-senlcdb-Voyager
53667 : Added =035  (Sirsi) AEM

3767285 : Added =035  (Sirsi) AFG-5053
3767319 : Added =035  (Sirsi) AFE-9401
3767326 : Added =035  (Sirsi) AHN-0091
3767854 : Added =035  (Sirsi) AHK-2692
3767966 : Added =035  (Sirsi) AFF-8895
3768033 : Added =035  (Sirsi) AJM-7458
3768532 : Added =035  (Sirsi) AHK-1745
3768572 : Added =035  (Sirsi) AHG-8286
3769275 : Added =035  (Sirsi) AFF-7081
3769542 : Added =035  (Sirsi) AJE-5573
3769544 : Added =035  (Sirsi) AJE-5574
3769554 : Added =035  (Sirsi) AJE-5571
3771564 : Added =035  (Sirsi) AHL-7888
3771565 : Added =035  (Sirsi) AHL-7992
3771567 : Added =035  (Sirsi) AJG-8046
3771569 : Added =035  (Sirsi) AJG-8052
3771570 : Added =035  (Sirsi) AJO-2989
3771571 : Added =035  (Sirsi) AJG-5969
3772168 : Added =035  (Sirsi) AHL-7961
3772404 : Added =035  (Sirsi) AHK-6368
3772835 : Added =035  (Sirsi) AJD-3364
3773352 : Added =035  (Sirsi) AFF-0179
3773354 : Added =035  (Sirsi) AFG-5049
3773356 : Added =035  (Sirsi) AFA-5755
3773906 : Added =035  (Sirsi) AGZ-7407
3774055 : Added =035  (Si

3783723 : Added =035  (DNLM)101526751
3783745 : Added =035  (Sirsi) BAF-2146
3783836 : Added =035  (Sirsi) AJN-0182
3783837 : Added =035  (Sirsi) BAE-7964
3783914 : Added =035  (Sirsi) BAA-8189
3783922 : Added =035  (Sirsi) AJD-7350
3783924 : Added =035  (Sirsi) BAB-5741
3783996 : Added =035  (Sirsi) AJH-6913
3783997 : Added =035  (Sirsi) BAA-4106
3783998 : Added =035  (Sirsi) AJC-6588
3783999 : Added =035  (Sirsi) BAA-4569
3784013 : Added =035  (Sirsi) AJL-5614
3784015 : Added =035  (Sirsi) AJL-1976
3784017 : Added =035  (Sirsi) BAL-6020
3784018 : Added =035  (Sirsi) AJJ-7164
3784025 : Added =035  (Sirsi) AJF-7565
3784036 : Added =035  (OCoLC)ocn535495566
3784036 : Added =035  (OCoLC)535495566
3784036 : Added =035  (VGER)287009
3784036 : Added =035  (CA-OTSC)287009-senlcdb-Voyager
3784064 : Added =035  (Sirsi) AHA-4175
3784071 : Added =035  (OCoLC)AJF-5506
3784072 : Added =035  (Sirsi) AHN-0629
3784073 : Added =035  (Sirsi) AJG-9765
3784083 : Added =035  (Sirsi) ACI-7897
3784089 : Add

3787707 : Added =035  (Sirsi) BAC-4581
3787747 : Added =035  (Sirsi) BAA-3851
3787753 : Added =035  (OCoLC)264018218
3787753 : Added =035  (VGER)285089
3787753 : Added =035  (CA-OTSC)285089-senlcdb-Voyager
3787828 : Added =035  (Sirsi) BAB-0328
3787831 : Added =035  (Sirsi) AJG-9310
3787835 : Added =035  (Sirsi) AJI-5976
3787900 : Added =035  (Sirsi) AAM-5162
3787966 : Added =035  (Sirsi) BAA-1161
3787981 : Added =035  (Sirsi) BAB-4025
3787990 : Added =035  (Sirsi) AEE-2501
3787993 : Added =035  (Sirsi) AFA-0385
3787996 : Added =035  (Sirsi) AHK-2612
3788044 : Added =035  (OCoLC)ocn189666751
3788044 : Added =035  (OCoLC)189666751
3788044 : Added =035  (VGER)192941
3788044 : Added =035  (CA-OTSC)192941-senlcdb-Voyager
3788115 : Added =035  (Sirsi) AGZ-7590
3788117 : Added =035  (Sirsi) AAA-1082
3788118 : Added =035  (Sirsi) AHE-3773
3788119 : Added =035  (Sirsi) AHF-7899
3788129 : Added =035  (Sirsi) AES-1574
3788236 : Added =035  (OCoLC)174114094
3788241 : Added =035  (OCoLC)AJF-4553
3

3792178 : Added =035  (CaONFJC)ct0221587
3792214 : Added =035  (Sirsi) AJN-2453
3792790 : Added =035  (OCoLC)ocm42049250
3792790 : Added =035  (OCoLC)42049250
3792790 : Added =035  (VGER)72212
3792790 : Added =035  (CA-OTSC)72212-senlcdb-Voyager
3792806 : Added =035  (Sirsi) AES-3516
3792824 : Added =035  (OBEL)0000050874-01loyalist_inst
3792824 : Added =035  0000050874
3792824 : Added =035  (OCoLC)AHN-5509
3792846 : Added =035  (Sirsi) BAC-4618
3792848 : Added =035  (Sirsi) AJE-2599
3792869 : Added =035  (Sirsi) AAL-6316
3792869 : Added =035  1071520
3792882 : Added =035  (OCoLC)ocn191889764
3792882 : Added =035  (VGER)206869
3792882 : Added =035  (CA-OTSC)206869-senlcdb-Voyager
3792901 : Added =035  (OCoLC)ocn505279718
3792901 : Added =035  (OCoLC)505279718
3792901 : Added =035  (VGER)274890
3792901 : Added =035  (CA-OTSC)274890-senlcdb-Voyager
3792929 : Added =035  AGY-3728
3792929 : Added =035  (VGER)74162
3792929 : Added =035  (CA-OTSC)74162-senlcdb-Voyager
3792942 : Added =035  (

3803418 : Added =035  (OCoLC)432401469
3803490 : Added =035  (Sirsi) 3520065
3803500 : Added =035  (Sirsi) 3515905
3803528 : Added =035  (Sirsi) BAH-5400
3803555 : Added =035  (Sirsi) AGZ-3148
3803584 : Added =035  (Sirsi) AJG-1167
3803601 : Added =035  (DNLM)101151469
3803672 : Added =035  AHC-5225
3803672 : Added =035  (VGER)75598
3803672 : Added =035  (CA-OTSC)75598-senlcdb-Voyager
3803853 : Added =035  (Sirsi) AJK-2981
3803865 : Added =035  (Sirsi) AJD-7042
3803867 : Added =035  (Sirsi) AHH-0023
3803870 : Added =035  (Sirsi) AHC-5832
3803891 : Added =035  (Sirsi) AHI-6821
3803895 : Added =035  3803895
3803931 : Added =035  (OCoLC)ocm56918568
3803931 : Added =035  (OCoLC)56918568
3803931 : Added =035  (CA-OTSC)58025-senlcdb-Voyager
3804138 : Added =035  (VGER)53538
3804138 : Added =035  (CA-OTSC)53538-senlcdb-Voyager
3804168 : Added =035  (Sirsi) AJF-9387
3804358 : Added =035  (OCoLC)ocm48513927
3804401 : Added =035  (Sirsi) AFA-8796
3804418 : Added =035  (OCoLC)ocm63229231
3804418 

3808105 : Added =035  (Sirsi) BAD-4644
3808107 : Added =035  (Sirsi) BAF-1221
3808114 : Added =035  (Sirsi) BAD-3925
3808122 : Added =035  (Sirsi) BAD-8805
3808136 : Added =035  (OCoLC)226356880
3808162 : Added =035  (Sirsi) BAD-3543
3808167 : Added =035  (Sirsi) BAF-2791
3808169 : Added =035  (Sirsi) BAE-1430
3808197 : Added =035  (Sirsi) BAD-8291
3808223 : Added =035  (Sirsi) AAM-6447
3808279 : Added =035  (Sirsi) ACH-7306
3808301 : Added =035  (Sirsi) ADN-5988
3808315 : Added =035  (OCoLC)ocn601128182
3808315 : Added =035  (VGER)274264
3808315 : Added =035  (CA-OTSC)274264-senlcdb-Voyager
3808316 : Added =035  (Sirsi) BAH-7334
3808320 : Added =035  (Sirsi) BAE-6065
3808329 : Added =035  (Sirsi) BAK-7865
3808357 : Added =035  (Sirsi) BAF-0648
3808390 : Added =035  (Sirsi) BAE-5365
3808391 : Added =035  (Sirsi) BAD-6292
3808392 : Added =035  (OCoLC)ocn729990621
3808392 : Added =035  (OCoLC)729990621
3808423 : Added =035  (Sirsi) BAF-0919
3808444 : Added =035  (Sirsi) BAD-4165
3808502 

3813759 : Added =035  (Sirsi) AAC-2406
3813759 : Added =035  0309091
3813768 : Added =035  (Sirsi) AAD-1515
3813784 : Added =035  (Sirsi) AAC-2494
3813784 : Added =035  0613876
3813844 : Added =035  (Sirsi) AAD-1708
3813845 : Added =035  (Sirsi) ACL-5123
3813851 : Added =035  (Sirsi) AEG-4263
3813853 : Added =035  (Sirsi) AAC-2537
3813853 : Added =035  0823916
3813918 : Added =035  (Sirsi) AEG-4505
3813938 : Added =035  (Sirsi) AEM-3930
3813943 : Added =035  (Sirsi) ACF-6404
3813975 : Added =035  (Sirsi) AEC-0873
3814017 : Added =035  (Sirsi) AEM-4101
3814020 : Added =035  (Sirsi) AAO-4709
3814041 : Added =035  (Sirsi) AEG-4418
3814042 : Added =035  (Sirsi) AAD-1945
3814047 : Added =035  (Sirsi) ACL-5815
3814092 : Added =035  (Sirsi) AEM-4406
3814163 : Added =035  (Sirsi) AEM-4649
3814184 : Added =035  (VGER)32312
3814184 : Added =035  (CA-OTSC)32312-senlcdb-Voyager
3814206 : Added =035  (Sirsi) AAC-3680
3814206 : Added =035  0921882
3814253 : Added =035  (Sirsi) AAC-2831
3814253 : Add

3821365 : Added =035  (Sirsi) AEQ-1584
3821368 : Added =035  (Sirsi) AJM-7656
3821377 : Added =035  (Sirsi) BAA-6256
3821379 : Added =035  16443102
3821382 : Added =035  (OCoLC)ocn665030054
3821393 : Added =035  (OCoLC)AHK-7661
3821419 : Added =035  (Sirsi) BAF-1432
3821430 : Added =035  (Sirsi) BAB-8863
3821431 : Added =035  1565484
3821432 : Added =035  (VGER)206048
3821432 : Added =035  (CA-OTSC)206048-senlcdb-Voyager
3821437 : Added =035  (OCoLC)ocn653388603
3821438 : Added =035  (Sirsi) AFG-3580
3821461 : Added =035  (Sirsi) BAF-6886
3821481 : Added =035  (OCoLC)ocn809249877
3821482 : Added =035  (DLC) 2010920636
3821483 : Added =035  (OCoLC)ocn758394048
3821485 : Added =035  (OCoLC)1069710172
3821485 : Added =035  (OCoLC)on1069710172
3821488 : Added =035  (Sirsi) BAF-5498
3821502 : Added =035  (Sirsi) BAF-8119
3821503 : Added =035  (Sirsi) BAF-8799
3821538 : Added =035  17621124
3821547 : Added =035  (OCoLC)758061690
3821554 : Added =035  (Sirsi) BAD-4587
3821577 : Added =035  (S

3854838 : Added =035  ocn894750869
3854849 : Added =035  (Sirsi) BAJ-1072
3854868 : Added =035  (OCoLC)ocn849798434
3854868 : Added =035  (OCoLC)849798434
3854868 : Added =035  684463
3854868 : Added =035  (CA-OTSC)684463-senlcdb-Voyager
3854884 : Added =035  (Sirsi) BAK-8375
3854906 : Added =035  (VGER)592131
3854906 : Added =035  (CA-OTSC)592131-senlcdb-Voyager
3854910 : Added =035  (CaOOAMICUS)000034543145
3854910 : Added =035  (OCoLC)429484435
3854924 : Added =035  (Sirsi) AEM-8801
3854928 : Added =035  (Sirsi) AAA-6148
3854928 : Added =035  1002133
3854931 : Added =035  (Sirsi) ACD-7023
3854938 : Added =035  (Sirsi) AEJ-4668
3854941 : Added =035  (Sirsi) AEA-2465
3854943 : Added =035  (Sirsi) AEP-9074
3854949 : Added =035  (VGER)498893
3854949 : Added =035  (CA-OTSC)498893-senlcdb-Voyager
3854954 : Added =035  (Sirsi) BAE-3130
3854961 : Added =035  (OCoLC)897352758
3854961 : Added =035  (VGER)629847
3854961 : Added =035  (CA-OTSC)629847-senlcdb-Voyager
3854987 : Added =035  (OCoLC

3857990 : Added =035  (OCoLC)ocn754994994
3857990 : Added =035  (OCoLC)754994994
3857990 : Added =035  (VGER)344309
3857990 : Added =035  (CA-OTSC)344309-senlcdb-Voyager
3857993 : Added =035  (OCoLC)898198842
3857997 : Added =035  (OCoLC)992440931
3857997 : Added =035  (OCoLC)ocn992440931
3857998 : Added =035  (OCoLC)966410386
3857998 : Added =035  (OCoLC)ocn966410386
3858005 : Added =035  (Sirsi) BAE-9541
3858023 : Added =035  (Sirsi) AAE-0497
3858037 : Added =035  (OCoLC)ocn984839743
3858038 : Added =035  (OCoLC)949823398
3858038 : Added =035  (coutts)cts20330060
3858075 : Added =035  (Sirsi) BAL-0565
3858076 : Added =035  (Sirsi) BAL-0259
3858083 : Added =035  (Sirsi) BAG-0265
3858087 : Added =035  (Sirsi) BAK-8575
3858088 : Added =035  (Sirsi) BAK-9034
3858121 : Added =035  (Sirsi) BAB-1991
3858134 : Added =035  (Sirsi) BAE-0710
3858135 : Added =035  (Sirsi) BAK-4428
3858138 : Added =035  (OCoLC)1000150623
3858138 : Added =035  (OCoLC)on1000150623
3858139 : Added =035  (OCoLC)ocn96

3859242 : Added =035  (OCoLC)1053851898
3859242 : Added =035  (OCoLC)on1053851898
3859245 : Added =035  (Sirsi) BAK-2219
3859249 : Added =035  (OCoLC)1029631713
3859249 : Added =035  (OCoLC)on1029631713
3859250 : Added =035  (OCoLC)1044641679
3859250 : Added =035  (OCoLC)on1044641679
3859267 : Added =035  (Sirsi) BAL-8661
3859296 : Added =035  (OCoLC)993623721
3859296 : Added =035  (OCoLC)ocn993623721
3859309 : Added =035  (OCoLC)ocn824645144
3859321 : Added =035  (OCoLC)961008185
3859321 : Added =035  (OCoLC)ocn961008185
3859323 : Added =035  (OCoLC)58526508
3859325 : Added =035  (OCoLC)935757481
3859325 : Added =035  (OCoLC)ocn935757481
3859326 : Added =035  (Sirsi) AJH-1856
3859327 : Added =035  (OCoLC)976416453
3859327 : Added =035  (OCoLC)ocn976416453
3859328 : Added =035  (OCoLC)1035219395
3859328 : Added =035  (OCoLC)on1035219395
3859335 : Added =035  (OCoLC)1088926617
3859335 : Added =035  (OCoLC)on1088926617
3859343 : Added =035  (OCoLC)1057451854
3859346 : Added =035  (Sirsi)

### Old, inefficient way
This was a terrible way to solve the problem. Xpath lookups are very slow. Stored here for posterity.

In [None]:
cucparser = etree.XMLParser(encoding='utf-8',resolve_entities=False)

# This probably takes a good deal of memory...
cuctrees = list(map(lambda infile: etree.parse(infile, cucparser), cucfiles))

# For reference, this is the old function that used a live xpath lookup in the CUC.
def stclaircuc_map_old(record):
    if record:
        bibid = int(record['001'].value())
        ohtwooh = record['020']
        if (ohtwooh != None):
            # Extract only the first part of the 020 string (before first space separator)
            isbnraw = record['020'].value().split( )[0]
            # Convert it into other ISBN variants
            isbn_nodash = isbnraw.replace('-','')
            if len(isbn_nodash) == 10:
                isbn_13 = convert_10_to_13(isbn_nodash)
                isbn_10 = isbn_nodash
            elif len(isbn_nodash) == 13:
                isbn_13 = isbn_nodash
                isbn_10 = convert_13_to_10(isbn_nodash)
            else:
                # If the 020 string is neither 10 nor 13 characters long, it's likely not an ISBN, 
                # keep this record as-is and go to the next.
                writer.write(record)
                return
            isbn_13_dashed = "-".join([isbn_13[0:3], isbn_13[3], isbn_13[4:7], isbn_13[7:12], isbn_13[12]])
            isbn_10_dashed = "-".join([isbn_10[0], isbn_10[1:4], isbn_10[4:9], isbn_10[9]])
            
            isbn_variants = [isbn_13, isbn_10, isbn_13_dashed, isbn_10_dashed]
            #print(bibid, ': ', isbn_variants)
            
            # Try to find a CUC record with one of those ISBN variants
            xquery = "//marc:datafield[@tag='020'][marc:subfield='" \
                     + "' or marc:subfield='".join(isbn_variants) \
                     +"']/../marc:datafield[@tag='035']//text()"
            #print(bibid, ': ', xquery)
            for cuctree in cuctrees:
                find035 = cuctree.xpath(xquery,namespaces = {"marc": "http://www.loc.gov/MARC21/slim"})
                if len(find035) > 0:
                    print(bibid, ': ', isbn_variants)
                    print(bibid, ': ', find035)
                    break
            
            for ohthreefive in find035:
                record.add_field(
                    Field(
                        tag = '035',
                        indicators = ['',''],
                        subfields = [
                            'a', ohthreefive
                        ]
                    )
                )
        writer.write(record)
    else:
        print('Could not read record')