In [None]:
import yaml
import os
import pandas
from pandas import read_csv
import json

## Dump csv as json or list of json from pandas
--Note that this does not include the validation as generating the validation is a more involved process

In [None]:
"""
classes_file_list = ['Instrument(schema_Product)',
                     'MonetaryGrant(schema_MonetaryGrant)',
                     'Organization(schema_Organization)',
                     'Person(schema_Person)',
                     'Product(schema_Product)',
                     'Analysis(schema_CreativeWork)',
                     'Protocol(schema_HowTo)',
                     'Dataset(schema_Dataset)',
                     'DataDownload(schema_DataDownload)',
                     'Publication(schema_ScholarlyArticle)']
"""
classes_file_list = ['Intervention(schema_Thing)',
                     'ArmGroup(schema_Thing)',
                     'Eligibility(schema_Thing)',
                     'Outcome(schema_Thing)',
                     'StudyDesign(schema_Thing)',
                     'StudyStatus(schema_Thing)',
                     'StudyEvent(schema_Thing)',
                     'ClinicalTrial(schema_MedicalStudy)']


base_path = "classes/"

for eachclass in classes_file_list:
    filepath = base_path+eachclass+".csv"
    file_subclass = eachclass.strip(")").split("(")
    ## Create the subclass item
    itemclassdict = {"@id": "outbreak:" + file_subclass[0],
            "@type": "rdfs:Class",
            "rdfs:label": file_subclass[0],
            "rdfs:comment": "This is the schema for describing the "+file_subclass[0]+" schema used for outbreak.info.",
            "rdfs:subClassOf": {"@id": file_subclass[1].strip(" ").replace("_",":")},
            "schema:isPartOf": {"@id": "https://discovery.biothings.io/view/outbreak/"}
            }
    ## Create the properties
    properties_db = read_csv(filepath, delimiter=',', header=0)
    properties_db.drop("allowed values",axis=1, inplace=True)
    properties_db["@id"] = "outbreak:" + properties_db["Property"].astype(str)
    properties_db["@type"] = "rdf:Property"
    properties_db.rename(columns={"Property":"rdfs:label","Description":"rdfs:comment","cardinality":"owl:cardinality"}, inplace=True)
    newdf = []
    for i in (range(len(properties_db))):
        tmpdict = {"rdfs:comment":properties_db.iloc[i]["rdfs:comment"]}        
        tmpsameAs = properties_db.iloc[i]["sameAs"]
        if "[" in tmpsameAs:
            samelist = tmpsameAs.strip("[").strip("]").split(",")
            newlist = []
            for eachitem in samelist:
                newitem = {"@id":eachitem.strip(" ")}
                newlist.append(newitem)
            tmpdict["rdfs:sameAs"] = newlist
        else:
            tmpdict["rdfs:sameAs"] = {"@id": properties_db.iloc[i]["sameAs"]}
        
        tmpdomainincludes = properties_db.iloc[i]["domainIncludes"]
        if "[" in tmpdomainincludes:
            domslist = tmpdomainincludes.strip("[").strip("]").split(",")
            newlist = []
            for eachitem in domslist:
                newitem = {"@id":eachitem.strip(" ")}
                newlist.append(newitem)
            tmpdict["schema:domainIncludes"] = newlist
        else:
            tmpdict["schema:domainIncludes"] = {"@id":tmpdomainincludes}
            
        tmprangeincludes = properties_db.iloc[i]["expected type"]
        if "[" in tmprangeincludes:
            rangelist = tmprangeincludes.strip("[").strip("]").strip(" ").split(",")
            newlist = []
            for eachitem in rangelist:
                newitem = {"@id":eachitem.strip(" ")}
                newlist.append(newitem)
            tmpdict["schema:rangeIncludes"] = newlist
        else:
            tmpdict["schema:rangeIncludes"] = {"@id":tmprangeincludes}
        
        cardinality = properties_db.iloc[i]["owl:cardinality"]
        if "one" in cardinality.lower():
            tmpdict["owl:cardinality"] = "one"
        else:
            tmpdict["owl:cardinality"] = "many"
        newdf.append(tmpdict)

    df2merge = pandas.DataFrame(newdf)
    properties_db.drop(["domainIncludes","expected type","sameAs","owl:cardinality"], axis=1, inplace=True)
    ordered_df = properties_db.merge(df2merge, on="rdfs:comment",how="outer").copy()
    df4export = ordered_df[["@id","@type","rdfs:comment","rdfs:label","owl:cardinality","marginality","rdfs:sameAs","schema:domainIncludes","schema:rangeIncludes"]]
    json_record_list = df4export.to_dict(orient="records")
    graph = []
    graph.append(itemclassdict)
    for record in json_record_list:
        graph.append(record)
    export_dict = {
          "@context": {
            "outbreak": "https://discovery.biothings.io/view/outbreak/",
            "owl": "http://www.w3.org/2002/07/owl/",
            "rdf": "http://www.w3.org/1999/02/22/-rdf-syntax-ns/",
            "rdfs": "http://www.w3.org/2000/01/rdf/-schema/",
            "schema": "http://schema.org/"
          },
          "@id": "https://discovery.biothings.io/view/outbreak/",
          "@graph":graph
        }
    with open(file_subclass[0]+'.json', 'w') as out_f:
        json.dump(export_dict, out_f, indent=2)
    with open(file_subclass[0]+'.yml', 'w') as out_file:
        yaml.dump(export_dict, out_file, default_flow_style=False)
    
