In [1]:
# load csv file and make it into a graph with rdflib.
import rdflib
# from rdflib import Graph, Literal
from rdflib.namespace import RDF, RDFS, Namespace
from rdflib import BNode
from rdflib import URIRef
import requests_cache
from datetime import timedelta

import csv

# ror lookup
ROR_API_URL = "https://api.ror.org/organizations?affiliation="  

urls_expire_after = {
    # Custom cache duration per url, 0 means "don't cache"
    # f'{SKOSMOS_URL}/rest/v1/label?uri=https%3A//w3id.org/zpid/vocabs/terms/09183&lang=de': 0,
    # f'{SKOSMOS_URL}/rest/v1/label?uri=https%3A//w3id.org/zpid/vocabs/terms/': 0,
}
# using cache for ror requests
session = requests_cache.CachedSession(
    ".cache/requests",
    allowable_codes=[200, 404],
    expire_after=timedelta(days=30),
    urls_expire_after=urls_expire_after,
)

SCHEMA = Namespace("https://schema.org/")


# create a graph
g = rdflib.Graph()

# bind schema to the graph
g.bind("schema", SCHEMA) 



def get_ror_id_from_api(affiliation_string):
# this function takes a string with an affiliation name and returns the ror id for that affiliation from the ror api 

    ror_api_url = ROR_API_URL + affiliation_string
    # make a request to the ror api:
    # ror_api_request = requests.get(ror_api_url)
    # make request to api with caching:
    ror_api_request = session.get(
            ror_api_url, timeout=20
    )
    # if the request was successful, get the json response:
    if ror_api_request.status_code == 200:
        ror_api_response = ror_api_request.json()
        # check if the response has any hits:
        if len(ror_api_response["items"]) > 0:
            # if so, get the item with a key value pair of "chosen" and "true" and return its id:
            for item in ror_api_response["items"]:
                if item["chosen"] == True:
                    return item["organization"]["id"]
        else:
            return None
    else:
        return None

# open csv file
with open('institute_lux.csv', 'r') as csvfile:
# for each row in the csv file, make a node of type schema:Organization and add it to the graph:
    reader = csv.DictReader(csvfile)
    for row in reader:
        # create a node and use "uuid" as the id
        node = rdflib.URIRef("https://w3id.org/zpid/resources/orgs/" + row['uuid'])
        # add a type
        g.add((node, rdflib.RDF.type, SCHEMA.Organization))
        # add a name
        g.add((node, SCHEMA.name, rdflib.Literal(row['prefname'])))
        # look up on ror to get ror id of upper institution,
        # city, country, etc.:
        ror_id = None
        ror_id = get_ror_id_from_api(row['prefname'])
        if ror_id is not None:
            g.add((node, SCHEMA.sameAs, rdflib.URIRef(ror_id)))
        # split known_names into a list on " ## ":
        known_names = row['known_names'].split(" ## ")
        # for each known name, add it to the graph:
        for name in known_names:
            g.add((node, SCHEMA.alternateName, rdflib.Literal(name)))
        # add country to the graph
        g.add((node, SCHEMA.location, rdflib.Literal(row['country'])))
        # use ror_id to get city and country from ror api:
        if ror_id is not None:
            


# serialize the graph to a turtle file

# write the graph to a file
g.serialize(destination='norminstitute_lux.ttl', format='turtle')



<Graph identifier=Nc05bb2e3c69243b084da8dad1268c563 (<class 'rdflib.graph.Graph'>)>