In [1]:
import sys
sys.path.append("/home/jovyan/work/sem-covid/")
sys.path = list(set(sys.path))
import os
os.getcwd()
os.chdir('/home/jovyan/work/sem-covid/')
from sem_covid import config
from sem_covid.services.store_registry import store_registry
import pandas as pd
import requests as req
import json
from typing import List

In [2]:
MINIO_RML_BUCKET = 'rdf-transformer'
MINIO_RML_RULES_DIR = 'rml_rules'
MINIO_RML_SOURCES_DIR = 'sources'
MINIO_RML_RESULTS_DIR = 'results'

In [23]:
from abc import ABC, abstractmethod
from sem_covid.adapters.abstract_store import ObjectStoreABC

class RMLMapperABC(ABC):

    @abstractmethod
    def transform(self, rml_rule: str, sources: dict) -> str:
        pass

class RMLMapper(RMLMapperABC):

    def __init__(self,
                rml_mapper_url: str
                ):
        self.rml_mapper_url = rml_mapper_url
    
    def transform(self, rml_rule: str, sources: dict)-> str:
        rml_mapper_query = {"rml": rml_rule, "sources": sources}
        rml_mapper_result = req.post(self.rml_mapper_url, json = rml_mapper_query)
        if rml_mapper_result.ok:
            return json.loads(rml_mapper_result.text)['output']
        else:
            print(rml_mapper_result)
            return None

class RMLTransformPipeline:

    def __init__(self,
                rml_rules_file_name: str,
                source_file_names: List[str],
                rdf_result_file_name: str,
                rml_mapper: RMLMapperABC,
                object_storage: ObjectStoreABC
                ):
        self.rml_rules_file_name = rml_rules_file_name
        self.source_file_names = source_file_names
        self.rdf_result_file_name = rdf_result_file_name
        self.rml_mapper  = rml_mapper
        self.object_storage = object_storage
        self.rml_rule = None
        self.sources = None
        self.rdf_result = None
    
    def extract(self):
        self.rml_rule = self.object_storage.get_object(object_name=f'{MINIO_RML_RULES_DIR}/{self.rml_rules_file_name}').decode('utf8')
        self.sources = { file_name : self.object_storage.get_object(object_name=f'{MINIO_RML_SOURCES_DIR}/{file_name}').decode('utf8')
                for file_name in self.source_file_names}
    
    def transform(self):
        assert self.rml_rule is not None
        assert self.sources is not None 
        self.rdf_result = self.rml_mapper.transform(rml_rule=self.rml_rule, sources = self.sources)

    def load(self):
        assert self.rdf_result is not None 
        self.object_storage.put_object(object_name=f'{MINIO_RML_RESULTS_DIR}/{self.rdf_result_file_name}', content=self.rdf_result.encode('utf8'))
    
    def execute(self):
        self.extract()
        self.transform()
        self.load()

In [24]:
rml_mapper = RMLMapper(rml_mapper_url="http://srv.meaningfy.ws:4000/execute")
rml_transform_pipeline = RMLTransformPipeline(rml_rules_file_name='test_rules.rml',
                                              source_file_names = ['test_source.json'],
                                              rdf_result_file_name = 'test_result.ttl',
                                              rml_mapper = rml_mapper,
                                              object_storage = store_registry.minio_object_store(minio_bucket=MINIO_RML_BUCKET)
                                              )
rml_transform_pipeline.execute()

In [21]:
def rml_mapper(rml_rules_file_name: str, source_file_names: List[str]):
    minio = store_registry.minio_object_store(minio_bucket=MINIO_RML_BUCKET)
    rml_rule = minio.get_object(object_name='rml_rules/'+rml_rules_file_name).decode('utf8')
    sources = { file_name : minio.get_object(object_name='sources/'+file_name).decode('utf8')
                for file_name in source_file_names}
    rml_mapper_url = "http://srv.meaningfy.ws:4000/execute"
    rml_mapper_query = {"rml": rml_rule, "sources": sources}
    #rml_mapper_query = json.dumps(rml_mapper_query)
    rml_mapper_result = req.post(rml_mapper_url, json = rml_mapper_query)
    if rml_mapper_result.ok:
        return json.loads(rml_mapper_result.text)['output']
    else:
        print("Error")
        return None


In [22]:
rml_mapper(rml_rules_file_name='test_rules.rml',
            source_file_names = ['test_source.json']
            )

'<http://example.com/John> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/John> <http://example.com/name> "John".\n<http://example.com/Jane> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/Jane> <http://example.com/name> "Jane".\n<http://example.com/Jane> <http://example.com/name> "Sam".\n<http://example.com/Sarah> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/Sarah> <http://example.com/name> "Sarah".\n'

In [36]:
data = {
  "rml": rml_rule,
  "sources": {
    "test_source.json": source}
}


In [27]:
url = "http://srv.meaningfy.ws:4000/execute"

In [28]:
x = req.post(url, json = data)

In [30]:
x.ok

True

In [29]:
tmp = json.loads(x.text)
tmp

{'output': '<http://example.com/John> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/John> <http://example.com/name> "John".\n<http://example.com/Jane> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/Jane> <http://example.com/name> "Jane".\n<http://example.com/Jane> <http://example.com/name> "Sam".\n<http://example.com/Sarah> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://xmlns.com/foaf/0.1/Person>.\n<http://example.com/Sarah> <http://example.com/name> "Sarah".\n'}

In [3]:

url = 'https://arxiv.org/pdf/1912.04141.pdf'
response = req.get(url)

In [None]:
import base64
encoded = base64.b64encode(response.content)