In [1]:
import requests
from bs4 import BeautifulSoup

In [2]:
purl = "https://w3id.org/ontouml-models"
github_url = "/".join([requests.get(purl).url, "tree", "master", "models"])
result = requests.get(github_url)
soup = BeautifulSoup(result.content, 'html.parser')

In [3]:
links = []
for link in soup.findAll('a'):
    link_ref = link.get('href')
    if link_ref.startswith(github_url[18:]):
        link_ref = "https://raw.githubusercontent.com/" + link_ref.replace("/tree", "") + "/metadata.ttl"
        links.append(link_ref)

In [4]:
links

['https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/abel2015petroleum-system/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/abrahao2018agriculture-operations/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/aguiar2018rdbs-o/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/aguiar2019ooco/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/ahmad2018aviation/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/aires2022valuenetworks-geo/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/albuquerque2011ontobio/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/alpinebits2022/metadata.ttl',
 'https://raw.githubusercontent.com//OntoUML/ontouml-models/master/models/amaral2019rot/metadata.ttl',
 'https://r

In [1]:
import requests
import json

from typing import List, Optional, Dict
from github import Github
from timeit import default_timer as timer

In [208]:
username = "mozzherina" # "ontouml"
reponame = "ontouml-models"
token = "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"
bearer = '1qTEODxmOeVh8PP7yPSc5V8bC6BzbV3Be0RK1V5MWYg1UFpOKykBWt38osIaWapVjbQWgK1bkdCfMxd4EjkingamQh6MKGmj5S1SKfk62fKdMPSbkdx8B6MnFVHfOPj2'
catalogId = "5f487ccc-6625-4bfd-a6f8-c700d4606fdc"
basicUrl = 'https://scs-ontouml.eemcs.utwente.nl/dataset'

In [234]:
metadataStr = " a dcat:"
catalogStr = f"dct:isPartOf <https://w3id.org/ontouml-models/catalog/{catalogId}>;\n"
licenseStr = "dct:license <https://creativecommons.org/licenses/by/4.0/>;\n"
publishDict = { "current": "PUBLISHED" }

postHeaders = {
    'Authorization': 'Bearer ' + bearer, 
    'Content-Type': 'text/turtle'
}

publishHeaders = {
    'Authorization': 'Bearer ' + bearer, 
    'Content-Type': 'application/json'
}

basicHeaders = {
    'Authorization': 'Bearer ' + bearer
}

In [210]:
g = Github(token)

user = g.get_user(username) # get_organization
repos = list(filter(lambda repo: repo.full_name.endswith(reponame), user.get_repos()))

repo = repos[0] if repos else None
print(repo)

Repository(full_name="mozzherina/ontouml-models")


In [211]:
def prepare_add(content: str) -> str:
    metadata_idx = content.find(";\n", content.find(metadataStr)) + 3
    
    # remove tail which is not needed for the request
    tail = content.find('<https://w3id.org/', metadata_idx)
    if tail > -1:
        content = content[:tail]
    
    # add license if not given
    if "dct:license" not in content:
        content = content[:metadata_idx] + licenseStr + content[metadata_idx:] 
    
    # add reference to the catalog
    if "dct:isPartOf" not in content:
        content = content[:metadata_idx] + catalogStr + content[metadata_idx:] 
        
    return content

In [212]:
def get_id(s: str) -> str:
    start = s[:s.find(metadataStr)].rfind('<')
    end = s[:s.find(metadataStr)].rfind('>') 
    return s[start+1:end]

In [213]:
def add_request(data, headers = postHeaders) -> Optional[Dict]:
    response = requests.post(basicUrl, data=data, headers=headers)
    
    if response.ok:
        response = response.content.decode()
        return {
            "old_id": get_id(data), 
            "new_id": get_id(response), 
            "data": response
        }
    else:
        print(response.content.decode()) # this is an error
        return None

In [214]:
def publish_request(full_id: str, headers = publishHeaders) -> bool:
    url = f"{basicUrl}/{full_id.split('/')[-1]}/meta/state"
    response = requests.put(url, data=json.dumps(publishDict), headers=headers)
    return response.ok

In [235]:
def delete_request(full_id: str, headers = basicHeaders) -> bool:
    url = f"{basicUrl}/{full_id.split('/')[-1]}"
    response = requests.delete(url, headers=headers)
    return response.ok

In [231]:
delete_request(dpo_content[0]["new_id"])

True

In [236]:
def get_request(full_id: str, headers = basicHeaders) -> Optional[str]:
    url = f"{basicUrl}/{full_id.split('/')[-1]}"
    response = requests.get(url, headers=headers)
    if response.ok:
        return response.content.decode()
    else:
        print(response.content.decode()) # this is an error
        return None

In [259]:
def load (repository, publish: bool, *model_names) -> List:
    contents = []
    if model_names:
        contents = [repository.get_contents(f"models/{name}/metadata.ttl") for name in model_names]
    else:
        all_models = repository.get_contents("models")
        contents = [repository.get_contents(model.path + "/metadata.ttl") for model in all_models]

    results = []    
    for content in contents:
        response = add_request(prepare_add(content.decoded_content.decode()))
        if response: # if request was successful
            if publish:
                publish_request(response["new_id"])
            
            repository.update_file(content.path, "update from FDP:{}", response["data"], content.sha) # branch="test"
            results.append({"old_id": response["old_id"], "new_id": response["new_id"], "path": content.path})
    return results

In [260]:
results = load(repo, True, "amaral2019rot")

In [263]:
results

[{'old_id': 'https://w3id.org/ontouml-models/model/amaral2019rot/',
  'new_id': 'https://w3id.org/ontouml-models/dataset/84f52356-98eb-467f-bf7c-624f1b5236cf',
  'path': 'models/amaral2019rot/metadata.ttl'}]

In [264]:
print(get_request(results[0]["new_id"]))

@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix dcat: <http://www.w3.org/ns/dcat#> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix ldp: <http://www.w3.org/ns/ldp#> .

<https://w3id.org/ontouml-models/dataset/84f52356-98eb-467f-bf7c-624f1b5236cf> a dcat:Resource,
    dcat:Dataset, <https://w3id.org/mod#SemanticArtefact>;
  dcterms:accessRights <https://w3id.org/ontouml-models/dataset/84f52356-98eb-467f-bf7c-624f1b5236cf#accessRights>;
  dcterms:language "en";
  dcterms:license <https://creativecommons.org/licenses/by/4.0/>;
  <https://w3id.org/fdp/fdp-o#metadataIdentifier> <https://w3id.org/ontouml-models/dataset/84f52356-98eb-467f-bf7c-624f1b5236cf#identifier>;
  <https://w3id.org/fdp/fdp-o#metadataIssued> "2023-03-22T11:13:03.073Z"^^xsd:dateTime;
  <https://w3id.org/fdp/fdp-o#metadataModified> "2023-03-22T11:13:03.073Z"^^xsd:dateTime;
  dcterms:contributor <https://dblp.org/pid/11/78>, <https://dblp.org/pid/134/4947>,
  

In [255]:
def reload(repository, file, new_content):
     repository.update_file(file.path, "update from FDP", new_content, file.sha) # branch="test"

In [256]:
reload(repo, file_content[0], dpo_content[0]["data"])

In [8]:
load(repo, "dpo2017", "amaral2019rot")

[ContentFile(path="models/dpo2017/metadata.ttl"),
 ContentFile(path="models/amaral2019rot/metadata.ttl")]

In [11]:
start = timer()
print(load(repo, True))
end = timer()
print(end - start)

37.402872600010596


In [13]:
dpo_content

ContentFile(path="models/dpo2017/metadata.ttl")