In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import spacy
import os
import regex as re
import json
from pymarc import MARCReader
import requests
print("Done")

Done


In [2]:
nlp = spacy.load("pl_core_news_lg")

In [3]:
#open json file

def open_data(path):
    with open (path, "r") as f:
        data = json.load(f)
    return data

#get data from data.bn.org.pl API (used in get_subj function)

def get_data(url: str) -> list:
    responses = []
    while url:
        url = requests.get(url)
        if url.status_code == 200:
            url = url.json()
            responses.append(url)
            url = url["nextPage"]
            print(f"Downloading: {url}")
        else:
            print("Error while accessing API")
    print("Download complete")
    return responses

    
#get all subject headings - specify: query string - 1. argument; header (subject, subjectCategory etc.) - 2. argument; marc21 field numbers - 3. argument

def get_subj(sub: str, header: str, field_numbers: list) -> dict:  
    responses = get_data(f"http://data.bn.org.pl/api/authorities.json?{header}={sub}")
    subjects = []
    for response in responses:
        for authority in response["authorities"]:
            for field in authority["marc"]["fields"]:
                for field_number in field_numbers:
                    if field_number in field:
                        for i in field:
                            subjects.append(list(field[i]["subfields"][-1].values())[0])
                
    subjects_dict = {}
    subjects_dict[sub] = subjects
    return subjects_dict


#read fbc files from .csv

def prepare_fbc_subjects(path: str) -> list:
    SUBJECTS_ALL = pd.read_csv(path)
    subjects_fbc = SUBJECTS_ALL["0"].values.tolist()
    return [x for x in list(set(subjects_fbc)) if str(x) !="nan"]

    
#tbc

def subject_matcher(path: str, subjects: dict) -> list:
    
    subjects_fbc = prepare_fbc_subjects(path)
    subjects_fbc_with_dbn = [x.replace("DBN", "").strip() for x in subjects_fbc]
    subjects_dbn = []
    for subject in list(subjects.values())[0]:
        subjects_dbn.append(subject)
    return [x for x in tqdm(subjects_fbc_with_dbn) if x in subjects_dbn]


#return lemmatized sentence

def lemmatize(term):
    lemmas = " ".join([w.lemma_ for w in nlp(term)])
    return lemmas

def get_fields_of_subj(subjects: list, fields: list) -> list:
    list_of_dicts = []
    for subject in subjects:
        subjects_with_fields = get_subj(subject, "subject", fields)
        list_of_dicts.append(subjects_with_fields)
    return list_of_dicts

#conversion from data in mrc format into python dict (json friendly)
def marc_to_dict(path:str, fields: list) -> dict:
    records = []
    with open(path, "rb") as f:
        reader = MARCReader(f)
        for record in reader:
            for field in fields:
                if record[field] is not None:
                    records.append(record.as_dict())
    return records


#TODO
def get_authorities_based_on_150(sub: str, header: str, field_numbers: list) -> dict:  
    responses = get_data(f"http://data.bn.org.pl/api/authorities.json?marc={header}+{sub}")
    subjects = []
    for response in responses:
        for authority in response["authorities"]:
            for field in authority["marc"]["fields"]:
                for field_number in field_numbers:
                    if field_number in field:
                        for i in field:
                            subjects.append(list(field[i]["subfields"][0].values())[0])
                
    subjects_dict = {}
    subjects_dict[sub] = subjects
    return subjects_dict


def concat_dicts_with_subjects(subjects: list, field_in: str, fields_out: list) -> dict:
    
    def get_authorities_based_on_150(sub: str, header: str, field_numbers: list) -> dict:  
        responses = get_data(f"http://data.bn.org.pl/api/authorities.json?marc={header}+{sub}")
        subjects = []
        for response in responses:
            for authority in response["authorities"]:
                for field in authority["marc"]["fields"]:
                    for field_number in field_numbers:
                        if field_number in field:
                            print(field["subfields"])
                            for i in field:
                                subjects.append(list(field[i]["subfields"][0].values())[0])

        subjects_dict = {}
        subjects_dict[sub] = subjects
        return subjects_dict
    
    final_dictionary = {}
    for subject in subjects:
        dictionary = get_authorities_based_on_150(subject, field_in, fields_out)
        final_dictionary = final_dictionary | dictionary
    return final_dictionary


def concat_dicts_with_subjects_full(subjects: list, field_in: str, fields_out: list) -> dict:
    
    def get_authorities_based_on_150(sub: str, header: str, field_numbers: list) -> dict:  
        responses = get_data(f"http://data.bn.org.pl/api/authorities.json?marc={header}+{sub}")
        subjects = []
        for response in responses:
            for authority in response["authorities"]:
                subjects.append(authority["name"])

        

        subjects_dict = {}
        subjects_dict[sub] = subjects
        return subjects_dict
    
    final_dictionary = {}
    for subject in subjects:
        dictionary = get_authorities_based_on_150(subject, field_in, fields_out)
        final_dictionary = final_dictionary | dictionary
    return final_dictionary

In [None]:
literaturoznawstwo = pd.read_csv("Kategorie tematyczne/df_literaturoznawstwo.csv").rename(columns = {'Unnamed: 0': '150a'})
lit_terms = list(literaturoznawstwo["150a"])

In [None]:
literaturoznawstwo.head()

In [None]:
testowy_final = concat_dicts_with_subjects(["Wydawnictwa"], "368a", ["110", "111"])

In [None]:
testowy_final

In [None]:
testowy_final_ok = {k: v for k, v in testowy_final.items() if len(v) > 0}

In [None]:
testowy_final_ok

In [56]:
df_110_111_names = pd.DataFrame.from_dict(testowy_final_ok, orient='index').transpose()

In [None]:
df.head()

In [None]:
df_test = pd.DataFrame(testowy_final_ok)

In [None]:
df_test["Główny rekord"] = df_test['Konkursy i festiwale literackie'].apply(lambda x: x.split(")")[0] + ")")
df_test["Pozostałe"] = df_test['Konkursy i festiwale literackie'].apply(lambda x: x.split(")")[1:])

In [None]:
df_test['Konkursy i festiwale literackie']

In [57]:
df_110_111_names.to_csv("Wydawnictwa.csv")

In [None]:
records_155 = []
with open("authorities-all.marc", "rb") as f:
    reader = MARCReader(f)
    for record in reader:
        if record["155"] != None:
            records_155.append(str(record["155"]["a"]))

In [None]:
df_155 = pd.DataFrame(records_155)
df_155.to_csv("df_155.csv")
            

In [None]:
records_from_data_api = get_subj("zarządzanie i marketing", "subjectCategory", ["150"])
records_from_data_api = list(records_from_data_api.values())[0]
records = marc_to_dict("authorities-all.marc", ["150"])


records_dictionary = {}
temp_record = {}
for term in records_from_data_api: 
    
    for record in records:
        print(record)
        for field in record["fields"]:
        
            for k,v in field.items():
            
                if k == "150":
                
                    for x in field.values():
                    
                        if x["subfields"][0]["a"] == term:
                            
                            for field in record["fields"]:
                                
                                for k,v in field.items():
                               
                                    if k == "450":
                                        
                                        if len(field[k]["subfields"]) == 1:
                                            
                                            
                                            if "450a" in temp_record:
                                                
                                                temp_record["450a"].append(field[k]["subfields"][0]["a"])
                                                 
                                            else:
                                                
                                                temp_record["450a"] = [field[k]["subfields"][0]["a"]]
                                                
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'h'}:
                                            
                                            
                                            if "450 'w': 'h'" not in temp_record:
                                                
                                                temp_record["450 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                            else:
                                                
                                                temp_record["450 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                                
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'g'}:
                                             
                                            
                                            if "450 'w': 'g'" in temp_record:
                                                
                                                temp_record["450 'w': 'g'"].append(field[k]["subfields"][1]["a"]) 
                                                
                                            else:
                                                
                                                temp_record["450 'w': 'g'"] = [field[k]["subfields"][1]["a"]]
                                                
                                    
                                         
                                    elif k == "550":
                                        if len(field[k]["subfields"]) == 1:
                                            
                                            
                                            if "550a" in temp_record:
                                                
                                                temp_record["550a"].append(field[k]["subfields"][0]["a"])
                                                
                                            else:
                                                
                                                temp_record["550a"] = [field[k]["subfields"][0]["a"]]
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'h'}:
                                            
                                            
                                            if "550 'w': 'h'" in temp_record:
                                                
                                                temp_record["550 'w': 'h'"].append(field[k]["subfields"][1]["a"])
                                                    
                                            else:
                                                
                                                temp_record["550 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'g'}:
                                            
                                            
                                            if "550 'w': 'g'" in temp_record:
                                                
                                                temp_record["550 'w': 'g'"].append(field[k]["subfields"][1]["a"])
          
                                            else:
                
                                                temp_record["550 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]
                    
                    
                    
                                        elif k == "455":
                                            if len(field[k]["subfields"]) == 1:


                                                if "455a" in temp_record:

                                                    temp_record["455a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["455a"] = [field[k]["subfields"][0]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "455 'w': 'h'" in temp_record:

                                                    temp_record["455 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["455 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                        elif field[k]["subfields"][0] == {'w': 'g'}:
                                            
                                            
                                            if "455 'w': 'g'" in temp_record:
                                                
                                                temp_record["455 'w': 'g'"].append(field[k]["subfields"][1]["a"])
          
                                            else:
                
                                                temp_record["455 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]
                               
                               
                    
                                    elif k == "555":
                                        if len(field[k]["subfields"]) == 1:
                                            
                                            
                                            if "555a" in temp_record:
                                                
                                                temp_record["555a"].append(field[k]["subfields"][0]["a"])
                                                
                                            else:
                                                
                                                temp_record["555a"] = [field[k]["subfields"][0]["a"]]
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'h'}:
                                            
                                            
                                            if "555 'w': 'h'" in temp_record:
                                                
                                                temp_record["555 'w': 'h'"].append(field[k]["subfields"][1]["a"])
                                                    
                                            else:
                                                
                                                temp_record["555 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                                
                                                
                                        elif field[k]["subfields"][0] == {'w': 'g'}:
                                            
                                            
                                            if "555 'w': 'g'" in temp_record:
                                                
                                                temp_record["555 'w': 'g'"].append(field[k]["subfields"][1]["a"])
          
                                            else:
                
                                                temp_record["555 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]
                               
                               
                            records_dictionary[term] = temp_record
                            temp_record = {}

df = pd.DataFrame.from_dict(records_dictionary).transpose()
df.to_csv("df_zarządzanie i marketing.csv")

In [None]:

records_from_data_api = records_155
records = marc_to_dict("authorities-all.marc", ["155"])


records_dictionary = {}
temp_record = {}
for term in records_from_data_api: 
    
    for record in records:

    
        for field in record["fields"]:
        
            for k,v in field.items():
            
                if k == "155":
                
                    for x in field.values():
                    
                        if x["subfields"][0]["a"] == term:
                            for field in record["fields"]:
                                try:
                                
                                    for k,v in field.items():

                                        if k == "450":


                                            if len(field[k]["subfields"]) == 1:


                                                if "450a" in temp_record:

                                                    temp_record["450a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["450a"] = [field[k]["subfields"][0]["a"]]



                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "450 'w': 'h'" not in temp_record:

                                                    temp_record["450 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["450 'w': 'h'"] = [field[k]["subfields"][1]["a"]]



                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "450 'w': 'g'" in temp_record:

                                                    temp_record["450 'w': 'g'"].append(field[k]["subfields"][1]["a"]) 

                                                else:

                                                    temp_record["450 'w': 'g'"] = [field[k]["subfields"][1]["a"]]



                                        elif k == "550":
                                            if len(field[k]["subfields"]) == 1:

                                                print(field[k]["subfields"])
                                                if "550a" in temp_record:

                                                    temp_record["550a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["550a"] = [field[k]["subfields"][0]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "550 'w': 'h'" in temp_record:

                                                    temp_record["550 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["550 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "550 'w': 'g'" in temp_record:

                                                    temp_record["550 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["550 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                        elif k == "555":
                                            if len(field[k]["subfields"]) == 1:


                                                    if "555a" in temp_record:
                                                        try:

                                                            temp_record["555a"].append(field[k]["subfields"][0]["a"])
                                                        except:
                                                            print("err")
                                                            pass

                                                    else:

                                                            temp_record["555a"] = [field[k]["subfields"][0]["a"]]




                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "555 'w': 'h'" in temp_record:

                                                    temp_record["555 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["555 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "555 'w': 'g'" in temp_record:

                                                    temp_record["555 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["555 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]
                                                    
                                        elif k == "455":
                                            if len(field[k]["subfields"]) == 1:


                                                if "455a" in temp_record:

                                                    temp_record["455a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["455a"] = [field[k]["subfields"][0]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "455 'w': 'h'" in temp_record:

                                                    temp_record["455 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["455 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "455 'w': 'g'" in temp_record:

                                                    temp_record["455 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["455 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                except:
                                    pass

                            records_dictionary[term] = temp_record
                            temp_record = {}

df = pd.DataFrame.from_dict(records_dictionary).transpose()
df.to_csv("df_155_skos.csv")

In [None]:

with open("authorities-all.marc", "rb") as f:
    reader = MARCReader(f)
    for record in reader:
        if record["155"] != None:
            print(record)

In [None]:

records_from_data_api = records_155
records = marc_to_dict("authorities-all.marc", ["155"])

def authority_to_table(field, records_from_api, records_from_dump):
    
    records_from_dump = marc_to_dict("authorities-all.marc", [field])
    
    records_155 = []
    with open("authorities-all.marc", "rb") as f:
    reader = MARCReader(f)
    for record in reader:
        if record["155"] != None:
            records_155.append(str(record["155"]["a"]))
    records_from_api = records_155
    

    records_dictionary = {}
    temp_record = {}
    for term in records_from_data_api: 

        for record in records:


            for field in record["fields"]:

                for k,v in field.items():

                    if k == "155":

                        for x in field.values():

                            if x["subfields"][0]["a"] == term:

                                for field in record["fields"]:
                                    try:

                                        for k,v in field.items():

                                            if k == "450":


                                                if len(field[k]["subfields"]) == 1:


                                                    if "450a" in temp_record:

                                                        temp_record["450a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["450a"] = [field[k]["subfields"][0]["a"]]



                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "450 'w': 'h'" not in temp_record:

                                                        temp_record["450 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["450 'w': 'h'"] = [field[k]["subfields"][1]["a"]]



                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "450 'w': 'g'" in temp_record:

                                                        temp_record["450 'w': 'g'"].append(field[k]["subfields"][1]["a"]) 

                                                    else:

                                                        temp_record["450 'w': 'g'"] = [field[k]["subfields"][1]["a"]]



                                            elif k == "550":
                                                if len(field[k]["subfields"]) == 1:

                                                    print(field[k]["subfields"])
                                                    if "550a" in temp_record:

                                                        temp_record["550a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["550a"] = [field[k]["subfields"][0]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "550 'w': 'h'" in temp_record:

                                                        temp_record["550 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["550 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "550 'w': 'g'" in temp_record:

                                                        temp_record["550 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["550 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                            elif k == "555":
                                                if len(field[k]["subfields"]) == 1:


                                                        if "555a" in temp_record:
                                                            try:

                                                                temp_record["555a"].append(field[k]["subfields"][0]["a"])
                                                            except:
                                                                print("err")
                                                                pass

                                                        else:

                                                                temp_record["555a"] = [field[k]["subfields"][0]["a"]]




                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "555 'w': 'h'" in temp_record:

                                                        temp_record["555 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["555 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "555 'w': 'g'" in temp_record:

                                                        temp_record["555 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["555 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                            elif k == "455":
                                                if len(field[k]["subfields"]) == 1:


                                                    if "455a" in temp_record:

                                                        temp_record["455a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["455a"] = [field[k]["subfields"][0]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "455 'w': 'h'" in temp_record:

                                                        temp_record["455 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["455 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "455 'w': 'g'" in temp_record:

                                                        temp_record["455 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["455 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                    except:
                                        pass

                                records_dictionary[term] = temp_record
                                temp_record = {}

    df = pd.DataFrame.from_dict(records_dictionary).transpose()
    df.to_csv("df_155_skos.csv")

In [None]:

records_from_data_api = get_subj("Biologia", "subjectCategory", ["150"])
records_from_data_api = list(records_from_data_api.values())[0]
records = marc_to_dict("authorities-all.marc", ["150"])



records_dictionary = {}
temp_record = {}
for term in records_from_data_api: 
    
    for record in records:

    
        for field in record["fields"]:
        
            for k,v in field.items():
            
                if k == "150":
                
                    for x in field.values():
                    
                        if x["subfields"][0]["a"] == term:
                            for field in record["fields"]:
                                try:
                                
                                    for k,v in field.items():

                                        if k == "450":


                                            if len(field[k]["subfields"]) == 1:


                                                if "450a" in temp_record:

                                                    temp_record["450a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["450a"] = [field[k]["subfields"][0]["a"]]



                                            elif field[k]["subfields"][0] == {'w': 'h'}:

                                                if "450 'w': 'h'" not in temp_record:

                                                    temp_record["450 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["450 'w': 'h'"] = [field[k]["subfields"][1]["a"]]



                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "450 'w': 'g'" in temp_record:

                                                    temp_record["450 'w': 'g'"].append(field[k]["subfields"][1]["a"]) 

                                                else:

                                                    temp_record["450 'w': 'g'"] = [field[k]["subfields"][1]["a"]]



                                        elif k == "550":
                                            if len(field[k]["subfields"]) == 1:

                                                if "550a" in temp_record:

                                                    temp_record["550a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["550a"] = [field[k]["subfields"][0]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "550 'w': 'h'" in temp_record:

                                                    temp_record["550 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["550 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "550 'w': 'g'" in temp_record:

                                                    temp_record["550 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["550 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                        elif k == "555":
                                            if len(field[k]["subfields"]) == 1:


                                                    if "555a" in temp_record:
                                                        try:

                                                            temp_record["555a"].append(field[k]["subfields"][0]["a"])
                                                        except:
                                                            print("err")
                                                            pass

                                                    else:

                                                            temp_record["555a"] = [field[k]["subfields"][0]["a"]]




                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "555 'w': 'h'" in temp_record:

                                                    temp_record["555 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["555 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "555 'w': 'g'" in temp_record:

                                                    temp_record["555 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["555 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]
                                                    
                                        elif k == "455":
                                            if len(field[k]["subfields"]) == 1:


                                                if "455a" in temp_record:

                                                    temp_record["455a"].append(field[k]["subfields"][0]["a"])

                                                else:

                                                    temp_record["455a"] = [field[k]["subfields"][0]["a"]]


                                            elif field[k]["subfields"][0] == {'w': 'h'}:


                                                if "455 'w': 'h'" in temp_record:

                                                    temp_record["455 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["455 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                            elif field[k]["subfields"][0] == {'w': 'g'}:


                                                if "455 'w': 'g'" in temp_record:

                                                    temp_record["455 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                else:

                                                    temp_record["455 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                except:
                                    pass

                            records_dictionary[term] = temp_record
                            temp_record = {}

df = pd.DataFrame.from_dict(records_dictionary).transpose()
df.to_csv("df_biologia.csv")

In [None]:
subject_names = ["Archeologia",
"Architektura i budownictwo",
"Bezpieczeństwo i wojskowość",
"Bibliotekarstwo, archiwistyka, muzealnictwo",
"Biologia",
"Chemia",
"Edukacja i pedagogika",
"Etnologia i antropologia kulturowa",
"Filozofia i etyka",
"Fizyka i astronomia",
"Geografia i nauki o Ziemi",
"Gospodarka, ekonomia, finanse",
"Historia",
"Informatyka i technologie informacyjne",
"Inżynieria i technika",
"Językoznawstwo",
"Kultura fizyczna i sport",
"Kultura i sztuka",
"Literaturoznawstwo",
"Matematyka",
"Media i komunikacja społeczna",
"Medycyna i zdrowie",
"Nauka i badania",
"Ochrona środowiska",
"Opieka nad zwierzętami i weterynaria",
"Polityka, politologia, administracja publiczna",
"Prawo i wymiar sprawiedliwości",
"Psychologia",
"Religia i duchowość",
"Rolnictwo i leśnictwo",
"Socjologia i społeczeństwo",
"Transport i logistyka",
"Zarządzanie i marketing"]

In [None]:

for subject_name in subject_names:
    records_from_data_api = get_subj(subject_name, "subjectCategory", ["150"])
    records_from_data_api = list(records_from_data_api.values())[0]
    records = marc_to_dict("authorities-all.marc", ["150"])



    records_dictionary = {}
    temp_record = {}
    for term in records_from_data_api: 

        for record in records:


            for field in record["fields"]:

                for k,v in field.items():

                    if k == "150":

                        for x in field.values():

                            if x["subfields"][0]["a"] == term:
                                for field in record["fields"]:
                                    try:

                                        for k,v in field.items():

                                            if k == "450":


                                                if len(field[k]["subfields"]) == 1:


                                                    if "450a" in temp_record:

                                                        temp_record["450a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["450a"] = [field[k]["subfields"][0]["a"]]



                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "450 'w': 'h'" not in temp_record:

                                                        temp_record["450 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["450 'w': 'h'"] = [field[k]["subfields"][1]["a"]]



                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "450 'w': 'g'" in temp_record:

                                                        temp_record["450 'w': 'g'"].append(field[k]["subfields"][1]["a"]) 

                                                    else:

                                                        temp_record["450 'w': 'g'"] = [field[k]["subfields"][1]["a"]]



                                            elif k == "550":
                                                if len(field[k]["subfields"]) == 1:

                                                    print(field[k]["subfields"])
                                                    if "550a" in temp_record:

                                                        temp_record["550a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["550a"] = [field[k]["subfields"][0]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "550 'w': 'h'" in temp_record:

                                                        temp_record["550 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["550 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "550 'w': 'g'" in temp_record:

                                                        temp_record["550 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["550 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                            elif k == "555":
                                                if len(field[k]["subfields"]) == 1:


                                                        if "555a" in temp_record:
                                                            try:

                                                                temp_record["555a"].append(field[k]["subfields"][0]["a"])
                                                            except:
                                                                print("err")
                                                                pass

                                                        else:

                                                                temp_record["555a"] = [field[k]["subfields"][0]["a"]]




                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "555 'w': 'h'" in temp_record:

                                                        temp_record["555 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["555 'w': 'h'"] = [field[k]["subfields"][1]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "555 'w': 'g'" in temp_record:

                                                        temp_record["555 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["555 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                            elif k == "455":
                                                if len(field[k]["subfields"]) == 1:


                                                    if "455a" in temp_record:

                                                        temp_record["455a"].append(field[k]["subfields"][0]["a"])

                                                    else:

                                                        temp_record["455a"] = [field[k]["subfields"][0]["a"]]


                                                elif field[k]["subfields"][0] == {'w': 'h'}:


                                                    if "455 'w': 'h'" in temp_record:

                                                        temp_record["455 'w': 'h'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["455 'w': 'h'"] = [field[k]["subfields"][1]["a"]]
                                                elif field[k]["subfields"][0] == {'w': 'g'}:


                                                    if "455 'w': 'g'" in temp_record:

                                                        temp_record["455 'w': 'g'"].append(field[k]["subfields"][1]["a"])

                                                    else:

                                                        temp_record["455 'w': 'g'"] = [str(field[k]["subfields"][1]["a"])]

                                    except:
                                        pass

                                records_dictionary[term] = temp_record
                                temp_record = {}
    with open(f"{subject_name}.json", "w") as outfile:
        json.dump(records_dictionary, outfile)
    
    df = pd.DataFrame.from_dict(records_dictionary).transpose()
    df.to_csv(f"{subject_name}_dataframe.csv")