In [1]:
import pandas as pd 
import numpy as np
import os
import json

In [2]:
LINE_DELIMITER = "\n"

In [3]:
def split_on_id(artifact):
    identifier_started = False
    for char_index in range(len(artifact)):
        character = artifact[char_index]
        if identifier_started and not character.isnumeric():
            break;
        identifier_started = True if character.isnumeric() else identifier_started
        
    return [artifact[:char_index], artifact[char_index+1:]]
    
assert split_on_id("ST-185 Load the default") == ["ST-185", "Load the default"], split_on_id("ST-185 Load the default")

In [4]:
def create_artifact_set_csv(path_to_artifact_file, artifact_type):
    # 1. Open and Read File
    artifacts_df = pd.read_csv(path_to_artifact_file)
    
    # 2. Read Artifact
    artifacts_json = []
    for row_index in range(len(artifacts_df)):
        artifact_id, artifact_body = artifacts_df.iloc[row_index]
        artifacts_json.append({"id": artifact_id, "body": artifact_body, "type": artifact_type.title()})
    return artifacts_json

In [5]:
def create_artifact_set_txt(path_to_artifact_file, artifact_type):
    # 1. Open and Read File
    artifact_file = open(path_to_artifact_file)
    artifact_file_content = artifact_file.read()
    artifact_file.close()

    # 2. Read Artifacts
    artifacts_json = []
    artifacts_raw = artifact_file_content.split("\n")
    for artifact in artifacts_raw:
        artifact_id, artifact_body = split_on_id(artifact)
        artifacts_json.append({"id": artifact_id, "body": artifact_body, "type": artifact_type.title()})
    return artifacts_json

In [6]:
def create_artifact_set(path_to_artifact_file, artifact_name):
    artifacts_json = create_artifact_set_txt(path_to_artifact_file, artifact_name) if \
    ".txt" in path_to_artifact_file else \
    create_artifact_set_csv(path_to_artifact_file, artifact_name)
    # 3. Setup Datastructure
    artifact_set = {
        "name": artifact_name,
        "artifacts": artifacts_json
    }

    # 4. Export
    export_path = os.path.join(path_to_export_folder, artifact_name + ".json")
    with open(export_path, 'w') as fp:
        json.dump(artifact_set, fp)
    return export_path

In [7]:
def create_artifact_sets(path_to_artifact_folder):
    artifact_file_names = list(filter(lambda f: f[0] != ".", os.listdir(path_to_raw_folder)))
    
    for artifact_file_name in artifact_file_names:
        artifact_name = artifact_file_name[:artifact_file_name.index(".")]
        path_to_artifact_file = os.path.join(path_to_artifact_folder, artifact_file_name)
        create_artifact_set(path_to_artifact_file, artifact_name)

In [8]:
dataset = "Drone"
folder = "Raw"

path_to_export_folder = os.path.join(dataset)
path_to_raw_folder = os.path.join(dataset, folder)
create_artifact_sets(path_to_raw_folder)
print("Done!")

Done!
