## Config

In [139]:
from glob import glob
from os import path

VENDOR_PATH = path.join('vendor')
TMP_PATH = path.join('tmp')

DATA_PATH = path.join('data')
DATA_ORIGIN_PATH = path.join('data_originals')

RML_PATH = path.join('rml')
RML_RULES_PATH = path.join(RML_PATH, 'rules')
RML_MAPPER_JAR = path.join(VENDOR_PATH, 'rmlmapper-7.0.0-r374-all.jar')

MAPPING_TEMPLATE = path.join(RML_PATH, 'mapping.template.ttl')
MAPPING_TEMPLATE_BAK = path.join(RML_PATH, 'mapping.template.ttl-bak')
MAPPING_FILE = path.join(TMP_PATH, 'mapping.rml.ttl')

OUTPUT_DIR = path.join('output')

RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color


## Preparing structure

In [140]:
from os import mkdir

if not path.exists(DATA_PATH):
  mkdir(DATA_PATH)

if not path.exists(DATA_ORIGIN_PATH):
  mkdir(DATA_ORIGIN_PATH)

if not path.exists(VENDOR_PATH):
  mkdir(VENDOR_PATH)

if not path.exists(TMP_PATH):
  mkdir(TMP_PATH)

if not path.exists(OUTPUT_DIR):
  mkdir(OUTPUT_DIR)


## XML Preprocessing

- Generate CID for all item
- DELETE all "Okuma" occurrencies from the set2

In [141]:
import csv
from os import path
from ipfs_cid import cid_sha256_hash

LB300_FILE = path.join(DATA_ORIGIN_PATH, 'okuma_lb3000.csv')
_, csv_filename = path.split(LB300_FILE)

with open(LB300_FILE, 'r', encoding='utf-8-sig') as csvinput:
  csvreader = csv.reader(csvinput)
  csvheader = next(csvreader)

  csv_output_filename = csv_filename.replace('.', '.complete.')
  csv_output_file = path.join(DATA_PATH, csv_output_filename)
  with open(csv_output_file, 'w') as csvoutput:
    csvwriter = csv.writer(csvoutput)
    csvwriter.writerow(csvheader + ['CID1', 'CID2'])

    for row in csvreader:
      if not row[9].startswith('OKUMA'):
        cid1 = cid_sha256_hash(str.encode(row[2])) # itm_name1
        cid2 = cid_sha256_hash(str.encode(row[9])) # itm_name2
        csvwriter.writerow(row + [cid1, cid2])


* Split "machinetool" and "components" in two distinct files

In [142]:
import csv
from os import path
from ipfs_cid import cid_sha256_hash

LB300_FILE = path.join(DATA_PATH, 'okuma_lb3000.complete.csv')
_, csv_filename = path.split(LB300_FILE)

mtools_filename = csv_filename.replace('.complete.', '.mtools.')
mtools_file = path.join(DATA_PATH, mtools_filename)

components_filename = csv_filename.replace('.complete.', '.components.')
components_file = path.join(DATA_PATH, components_filename)

with open(LB300_FILE, 'r', encoding='utf-8-sig') as csvinput:
  csvreader = csv.reader(csvinput)
  csvheader = next(csvreader)

  with open(mtools_file, 'w') as mtools_out, open(components_file, 'w') as components_out:
    mtools_writer = csv.writer(mtools_out)
    mtools_writer.writerow(csvheader)

    components_writer = csv.writer(components_out)
    components_writer.writerow(csvheader)

    for row in csvreader:
      if row[2].startswith('OKUMA'): # itm_name1
        mtools_writer.writerow(row)
      else:
        components_writer.writerow(row)

## RML Bundle

In [143]:
import shutil
from os import path

print("Generating RML mapping bundle")

if path.isfile(MAPPING_TEMPLATE):
  print(" * Creating backup of existing mapping file ", end='')
  shutil.move(MAPPING_TEMPLATE, MAPPING_TEMPLATE_BAK)
  print(f"{GREEN}DONE{NC}")

RULE_FILE_PATH = path.join(RML_RULES_PATH, '**', '*.part.ttl')
rule_files = glob(RULE_FILE_PATH, recursive=True)
rule_files.sort()

print(" * Creating new mapping file ", end='')
with open(MAPPING_TEMPLATE, 'w') as bundle:
  for rule_file in rule_files:
    print('.', sep='', end='', flush=True)
    with open(rule_file, 'r') as rule:
      shutil.copyfileobj(rule, bundle)
    bundle.write('\n')
  print(' ', sep='', end='', flush=True)
print(f"{GREEN}DONE{NC}")

Generating RML mapping bundle
 * Creating backup of existing mapping file [0;32mDONE[0m
 * Creating new mapping file ...... [0;32mDONE[0m


## Mapping

### Requirements
Install [rmlmapper](https://github.com/RMLio/rmlmapper-java/releases/tag/v7.0.0) jar

In [144]:
import subprocess

CLASSES_CSV = path.join(DATA_ORIGIN_PATH, 'classes.csv')
LB300_MTOOLS_CSV = path.join(DATA_PATH, 'okuma_lb3000.mtools.csv')
LB300_COMPONENTS_CSV = path.join(DATA_PATH, 'okuma_lb3000.components.csv')

LB300_CSV = path.join(DATA_ORIGIN_PATH, 'okuma_lb3000.csv')
_, csv_filename = path.split(LB300_CSV)
KG_FILE = path.join(OUTPUT_DIR, csv_filename.replace('.csv', '.nt'))

with open(MAPPING_TEMPLATE, 'r') as template_file:
  content = template_file.read()
  content = content.replace('__CLASSES_CSV__', CLASSES_CSV)
  content = content.replace('__MTOOLS_SOURCE__', LB300_MTOOLS_CSV)
  content = content.replace('__COMPONENT_SOURCE__', LB300_COMPONENTS_CSV)

  with open(MAPPING_FILE, 'w') as mapping_file:
    mapping_file.write(content)

subprocess.call([
  'java', '-jar', RML_MAPPER_JAR,
    '-m', MAPPING_FILE,
    '-o', KG_FILE
])


0