## Config

In [194]:
from glob import glob
from os import path

VENDOR_PATH = path.join('vendor')
TMP_PATH = path.join('tmp')

DATA_PATH = path.join('data')
DATA_ORIGIN_PATH = path.join('data_originals')

RML_PATH = path.join('rml')
RML_INCLUDES_PATH = path.join(RML_PATH, 'includes')
RML_RULES_PATH = path.join(RML_PATH, 'rules')
RML_MAPPER_JAR = path.join(VENDOR_PATH, 'rmlmapper-7.0.0-r374-all.jar')

OUTPUT_DIR = path.join('output')

RED='\033[0;31m'
GREEN='\033[0;32m'
NC='\033[0m' # No Color

## Preparing structure

In [195]:
from os import mkdir

if not path.exists(DATA_PATH):
  mkdir(DATA_PATH)

if not path.exists(DATA_ORIGIN_PATH):
  mkdir(DATA_ORIGIN_PATH)

if not path.exists(VENDOR_PATH):
  mkdir(VENDOR_PATH)

if not path.exists(TMP_PATH):
  mkdir(TMP_PATH)

if not path.exists(OUTPUT_DIR):
  mkdir(OUTPUT_DIR)


## XML Preprocessing

In [196]:
import csv
from os import path
import pandas as pd
from ipfs_cid import cid_sha256_hash

# TODO comment

LB3000_CSV = path.join(DATA_ORIGIN_PATH, 'okuma_lb3000.csv')
_, lb3000_filename = path.split(LB3000_CSV)

# TODO comment

mtools_filename = lb3000_filename.replace('.', '.mtools.')
LB3000_MTOOLS_CSV = path.join(DATA_PATH, mtools_filename)

components_filename = lb3000_filename.replace('.', '.components.')
LB3000_COMPONENTS_CSV = path.join(DATA_PATH, components_filename)

# TODO comment

required_filename = lb3000_filename.replace('.', '.required.')
LB3000_REQUIRED_CSV = path.join(DATA_PATH, required_filename)

recommended_filename = lb3000_filename.replace('.', '.recommended.')
LB3000_RECOMMENDED_CSV = path.join(DATA_PATH, recommended_filename)

incompatible_filename = lb3000_filename.replace('.', '.incompatible.')
LB3000_INCOMPATIBLES_CSV = path.join(DATA_PATH, incompatible_filename)

In [197]:
ITEMFILE_HEADERS = [
  'pic_itemId', 'pic_itemCode',
  'itm_name', 'itm_description', 'CEL_INVENTCLASSAID', 'FULLPATH',
  'CID'
]

lb3000csv = pd.read_csv(LB3000_CSV)

with open(LB3000_MTOOLS_CSV, 'w') as metools_out, \
     open(LB3000_COMPONENTS_CSV, 'w') as components_out:

  metools_writer = csv.writer(metools_out)
  metools_writer.writerow(ITEMFILE_HEADERS)

  components_writer = csv.writer(components_out)
  components_writer.writerow(ITEMFILE_HEADERS)

  for index, row in lb3000csv.iterrows():

    cid1 = cid_sha256_hash(str.encode(row['itm_name1']))
    item1 = pd.concat([row[0:6], pd.Series([cid1])], ignore_index=True)
    if row['itm_name1'].startswith('OKUMA'):
      metools_writer.writerow(item1)
    else:
      components_writer.writerow(item1)

    cid2 = cid_sha256_hash(str.encode(row['itm_name2']))
    item2 = pd.concat([row[7:13], pd.Series([cid2])], ignore_index=True)
    components_writer.writerow(item2)


In [198]:
ITEMFILE_HEADERS_SORTED = [
  'CID',
  'itm_name', 'itm_description', 'CEL_INVENTCLASSAID', 'FULLPATH',
  'pic_itemId', 'pic_itemCode'
]

df = pd.read_csv(LB3000_MTOOLS_CSV)
df.drop_duplicates(subset=['CID'], keep='first', inplace=True)
df.reindex(columns=ITEMFILE_HEADERS_SORTED).to_csv(LB3000_MTOOLS_CSV, index=False)

df = pd.read_csv(LB3000_COMPONENTS_CSV)
df.drop_duplicates(subset=['CID'], keep='first', inplace=True)
df.reindex(columns=ITEMFILE_HEADERS_SORTED).to_csv(LB3000_COMPONENTS_CSV, index=False)


In [199]:
MTOOL_CLASS = 'MachineTool'
COMPONENT_CLASS = 'Component'
RELFILE_HEADERS = ['CLASSNAME1', 'CID1', 'CLASSNAME2', 'CID2']

lb3000csv = pd.read_csv(LB3000_CSV)

with open(LB3000_REQUIRED_CSV, 'w') as required_out, \
     open(LB3000_RECOMMENDED_CSV, 'w') as recommended_out, \
     open(LB3000_INCOMPATIBLES_CSV, 'w') as incompatible_out:

  required_writer = csv.writer(required_out)
  required_writer.writerow(RELFILE_HEADERS)

  recommended_writer = csv.writer(recommended_out)
  recommended_writer.writerow(RELFILE_HEADERS)

  incompatible_writer = csv.writer(incompatible_out)
  incompatible_writer.writerow(RELFILE_HEADERS)

  for index, row in lb3000csv.iterrows():
    cid1 = cid_sha256_hash(str.encode(row['itm_name1']))
    class1 = MTOOL_CLASS if row['itm_name1'].startswith('OKUMA') else COMPONENT_CLASS
    path1 = f'{class1}/{cid1}'

    cid2 = cid_sha256_hash(str.encode(row['itm_name2']))
    class2 = MTOOL_CLASS if row['itm_name2'].startswith('OKUMA') else COMPONENT_CLASS
    path2 = f'{class2}/{cid2}'

    if row['RELATION'] == 'Required':
      required_writer.writerow([path1, path2])
    elif row['RELATION'] == 'Recommended':
      recommended_writer.writerow([path1, path2])
    elif row['RELATION'] == 'Incompatible':
      incompatible_writer.writerow([path1, path2])
    else:
      print(f'{RED}Unknown relation: {row["RELATION"]}{NC}')


## RML Bundle

In [200]:
import os
import shutil
from pathlib import Path

MAPPING_STAGE1 = os.path.join(RML_PATH, 'mapping.stage1.ttl')
MAPPING_STAGE2 = os.path.join(RML_PATH, 'mapping.stage2.ttl')
MAPPING_TEMPLATE = os.path.join(RML_PATH, 'mapping.template.ttl')
MAPPING_FILE = os.path.join(TMP_PATH, 'mapping.rml.ttl')

In [201]:
RULE_FILE_PATH = path.join(RML_RULES_PATH, '**', '*.rule.ttl')
rule_files = glob(RULE_FILE_PATH, recursive=True)
rule_files.sort()

print(" * Collecting all rule files ", end='')
with open(MAPPING_STAGE1, 'w') as bundle:
  for rule_file in rule_files:
    print('.', sep='', end='', flush=True)
    with open(rule_file, 'r') as rule:
      shutil.copyfileobj(rule, bundle)
    bundle.write('\n')
  print(' ', sep='', end='', flush=True)
print(f"{GREEN}DONE{NC}")

 * Collecting all rule files ..... [0;32mDONE[0m


In [202]:
INC_FILES_PATH = path.join(RML_INCLUDES_PATH, '**', '*.inc.ttl')
incfiles = glob(INC_FILES_PATH, recursive=True)

print(" * Cncorporate include files ", end='')
with open(MAPPING_STAGE2, 'w') as bundle:
  content = Path(MAPPING_STAGE1).read_text()
  for incfile in incfiles:
    print('.', sep='', end='', flush=True)
    incfile = Path(incfile)
    placeholder = f'__{incfile.stem}__'
    content = content.replace(placeholder, incfile.read_text())
  bundle.write(content)
  print(' ', sep='', end='', flush=True)
print(f"{GREEN}DONE{NC}")

 * Cncorporate include files . [0;32mDONE[0m


In [203]:
os.remove(MAPPING_STAGE1)
os.rename(MAPPING_STAGE2, MAPPING_TEMPLATE)

## Mapping

### Requirements
Install [rmlmapper](https://github.com/RMLio/rmlmapper-java/releases/tag/v7.0.0) jar

In [204]:
CLASSES_CSV = path.join(DATA_ORIGIN_PATH, 'classes.csv')
KG_FILE = path.join(OUTPUT_DIR, lb3000_filename.replace('.csv', '.nt'))

In [205]:
import subprocess

with open(MAPPING_TEMPLATE, 'r') as template_file:
  content = template_file.read()
  content = content.replace('__CLASSES_CSV__', CLASSES_CSV)
  content = content.replace('__MTOOLS_SOURCE__', LB3000_MTOOLS_CSV)
  content = content.replace('__COMPONENT_SOURCE__', LB3000_COMPONENTS_CSV)
  content = content.replace('__REQUIRED_SOURCE__', LB3000_REQUIRED_CSV)
  content = content.replace('__RECOMMENDED_SOURCE__', LB3000_RECOMMENDED_CSV)
  content = content.replace('__INCOMPATIBLES_SOURCE__', LB3000_INCOMPATIBLES_CSV)

  with open(MAPPING_FILE, 'w') as mapping_file:
    mapping_file.write(content)

subprocess.call([
  'java', '-jar', RML_MAPPER_JAR,
    '-m', MAPPING_FILE,
    '-o', KG_FILE
])


0