# PyUnify v1 Release Notebook

The following notebook shows an example of creating a PyUnify object from a Yahoo Finance .csv export.

It walks you through automatically generating meta, concept, and data tables.

You can use the .quality() function to get a 'report card' on the quality of your data enrichment.

Please contact ron@intelligence.ai for any questions or if you would like to contribute.

Everything is open source and we only ask for attribution. Thanks! :)


In [56]:
# commented out to remove accidental re-running of cell, initialize only once

#from google.colab import drive
#drive.mount('/content/drive')

#import os
#os.chdir('drive/MyDrive')
#os.getcwd()

In [57]:
# manages the 'concepts' object
class PyUnify_Concepts:
  def __init__(self, concepts, pd, table_render, data):    
    #init
    self.concepts = {
        'headers':{
          'name': [],
          'type': [],
          'description':[],
          'wikidata':[]
        }, 
        'values':{
            
        }
    }
    self.data = data
    
    
    # concepts already exists, assign    
    if concepts != {}:            
      self.concepts = concepts

    # check if concepts is empty
    if self.concepts['headers']['name'] == [] and data != None:      
      self._init_concepts_from_data()    

    # concepts doesn't exist, create from data if exists
    elif concepts == {} and data != None:      
      self._init_concepts_from_data()

    

    self.pd = pd
    self.table_render = table_render

  def headers(self):
    return self.concepts['headers']
  
  def values(self):
    return self.concepts['values']

  def features(self):
    return self.concetps['features']

  def _init_concepts_from_data(self):
    orientation = self.data.get_orientation()
    if orientation == "rows":
      if self.data.to_list() != []:      
        columns = self.data.to_list()[0]
        for col in columns:      
          self.headers()['name'].append(col)
          self.headers()['type'].append(None)
          self.headers()['description'].append(None)
          #self.headers()['wikidata'].append(None)
    elif orientation == "columns":      
      keys = self.data.get_dict().keys()
      for key in keys:
        self.headers()['name'].append(key)
        self.headers()['type'].append(None)
        self.headers()['description'].append(None)
        #self.headers()['wikidata'].append(None)

  def dict(self):
    return self.concepts
  
  def table_headers(self, render=True, flag=False):         
    h_frame = self.pd.DataFrame(self.concepts['headers'])
    if render:  
      self.table_render.get("CONCEPTS - HEADERS", h_frame)   
    return h_frame

  def table_values(self, render=True, flag=False):         
    df = self.pd.DataFrame(self.concepts['values'])
    if render:  
      self.table_render.get("CONCEPTS - VALUES", df)   
    return df

  def table_features(self, render=True, flag=False):         
    df = self.pd.DataFrame(self.concepts['features'])
    if render:  
      self.table_render.get("CONCEPTS - FEATURES", df)   
    return df

  def table(self, render=True, flag=False):         
    h_frame = self.pd.DataFrame(self.concepts['headers'])
    v_frame = self.pd.DataFrame(self.concepts['values']) 
    f_frame = self.pd.DataFrame(self.concepts['features'])    
    if render:  
      self.table_render.get("CONCEPTS - HEADERS", h_frame)   
      self.table_render.get("CONCEPTS - VALUES", v_frame)   
      self.table_render.get("CONCEPTS - FEATURES", f_frame)   
    return { "headers": h_frame, "values": v_frame, 'features':f_frame }
  
  def df_values(self, render=True, flag=False):             
    return self.pd.DataFrame(self.concepts['values'])        

  def df_headers(self, render=True, flag=False):         
    return self.pd.DataFrame(self.concepts['headers'])    

  def df_features(self, render=True, flag=False):         
    return self.pd.DataFrame(self.concepts['features'])    
    
  



In [58]:
class PyUnify_Contract:
  def __init__(self, params, json_unify, to_json_pretty, md):

    self.filename = None
    self.to_json_pretty = to_json_pretty
    self.json_unify = json_unify
    self.md = md
    self.get()

    

  def get(self):  
    if self.filename != None:
      if self.filename.endswith('.json'):
        with open(self.filename, "w") as outfile:
          outfile.write(self.to_json_pretty(self.json_unify))        
    else:
      return self.json_unify


In [59]:
# manages the 'data' object of the PyUnify object
class PyUnify_Data:
  def __init__(self, data, pd=None, table_render=None, params=None):
    self.pd = pd    
    self.data = data
    self.table_render = table_render
    self.data_orientation = None
    self.params = params
    self.set_orientation()
    

  def set_orientation(self):        
    if type(self.data) == type(list()):
      self.data_orientation = "rows"
    elif type(self.data) == type(dict()) and self.params == None:            
      self.data_orientation = "columns"
    elif type(self.data) == type(dict()) and self.params != None:      
      if 'orient' not in self.params or self.params['orient'] != 'index':        
        self.data_orientation = "columns"        
      elif self.params['orient'] == 'index':                
        self.data_orientation = "index"
    else:      
      print('set_orientation FAILURE')
    
      

  def get_orientation(self):
    return self.data_orientation  

  def df(self):
    df = self.table(False, False)    
    return df

  #instructions how to render this object as a table
  def table(self, render=True, flag=False):  
    df = self.pd.DataFrame()    
    if self.get_orientation() == "rows":        
      if self.data != []:                
        df = self.pd.DataFrame(self.data)    
        df.columns = df.iloc[0] 
        df = df[1:]           
    elif self.get_orientation() == "columns":
      df = self.pd.DataFrame(self.data)    
    elif self.get_orientation() == "index":
      df = self.pd.DataFrame.from_dict(self.data, orient='index')
      df.columns = df.iloc[0] 
      df = df[1:]           
    else:
      print("AN ERROR HAS OCCURED IN data.table()")    
    if render:        
      self.table_render.get("DATA", df)         
    return df

  #converts list to dict format
  def to_dict(self, data=None):    
    retVal = {}
    if data == None:
      data = self.data
    if type(data) == type(list()):
        headers = self.data[0]        
        #{"col1":[]}
        #[['col1', 'col2']['col_1_val1', 'col_2_val1], ['col1_val2', 'col2_val2]]
        for col_index, header in enumerate(headers):          
          retVal[header] = []          
          for row_index, row in enumerate(data):
            if row_index > 0:
              retVal[header].append(data[row_index][col_index])
    else:
      print('An error has occurred in data.to_dict(), only lists can be converted')

    return retVal        
    


  #converts dict to list format
  def to_list(self):
    orientation = self.get_orientation()
    
    if orientation == "rows":
      return list(self.data)
    # need to convert from dict to list
    else:
      #print('data: ',self.data)
      retVal = []
      keys = list(self.data.keys())
      #print('headers', keys)
      header_arr = []
      retVal.append(header_arr)
      # add headers as first row to array
      #[['name']]
      for header_index, key in enumerate(keys): 
        #print('header_index: ', header_index, 'header: ', key)       
        header_arr.append(key)                
      
        #[['name', 'description'], ['hello'], ['world']]
        # for each header, get the row # of an iterator. and append it to the rows of retval, in the column
        for row_num, val in enumerate(self.data[key]):          
          # just headers exist
          #print('key : ', key, 'row_num: ', row_num, 'val to add: ',val, retVal)  
          if len(retVal) == 1:
            
            val_arr = []
            val_arr.append(val)
            retVal.append(val_arr)
            
          else:            

            #check if array already exists, if not, add new one
            if len(retVal) == row_num +1:
              retVal.append([val])
            
            else:
              row = retVal[row_num+1]
              row.append(val)

      return retVal
      

  def get_dict(self):
    return dict(self.data)

  def __str__(self):
    return str(self.data)

In [60]:
class PyUnify_Format:
  def __init__(self):
    pass
  
  # self.format(['italic', 'red']) and outputs the string
  def style(self, styles=[]):
    retVal = ""
      
    if len(styles) == 0:
      styles = ['default']
    
    for style in styles:
      if style == 'italic':
        retVal += '\033[3m'
      elif style == 'default':
        retVal += '\033[0m'
      elif style == 'bold':
        retVal += '\033[1m'
      elif style == 'red':
        retVal += '\033[91m'
      elif style == 'orange':
        retVal += '\033[38;5;208m'
      elif style == 'blue':
        retVal += '\033[38;5;40m'
      elif style == 'green':
        retVal += '\033[38;5;20m'
    return retVal

In [61]:
class PyUnify_Governance:
  def __init__(self, governance, pd, table_render):
    
    
    
    # governance already exists, assign    
    if governance != {}:            
      self.governance = governance    

    self.pd = pd
    self.table_render = table_render

  def sla(self):
    return self.governance['sla']
  
  def roles(self):
    return self.governance['roles']

  def requirements(self):
    return self.governance['requirements']
  
  def dict(self):
    return self._init_blank_governance
  
  def getDataFrame(self, data):
    df = self.pd.DataFrame(data)    
    return df

  def df_sla(self):
    return self.getDataFrame(self.governance['sla'])            
  
  def df_requirements(self):
    return self.getDataFrame(self.governance['requirements'])

  def table_sla(self, render=True, flag=False): 
    s_frame = self.getDataFrame(self.governance['sla'])
    if render:        
      self.table_render.get("GOVERNANCE - SLA", s_frame)                   
    return s_frame

  def table_requirements(self, render=True, flag=False): 
    s_frame = self.getDataFrame(self.governance['requirements'])
    if render:        
      self.table_render.get("GOVERNANCE - REQUIREMENTS", s_frame)                   
    return s_frame

  

  def table(self, render=True, flag=False):             
    s_frame = self.getDataFrame(self.governance['sla']) 
    r_frame = self.getDataFrame(self.governance['requirements'])
    if render:        
      self.table_render.get("GOVERNANCE - SLA", s_frame)
      self.table_render.get("GOVERNANCE - REQUIREMENTS", r_frame)
    return {'sla':s_frame, 'requirements':r_frame }
  

In [62]:
class pyunify_help:
  def __init__(self, params=None):
    self.f = PyUnify_Format()

    self.section_about = "{0}About Us{1}".format(self.f.style(['bold']), self.f.style())
    self.section_specification = "{0}The JSON-Unify Specification{1}".format(self.f.style(['bold']), self.f.style())
    self.section_pyunify = "{0}The PyUnify Python Package{1}".format(self.f.style(['bold']), self.f.style())
    self.section_files = "{0}File Imports/Exports With PyUnify{1}".format(self.f.style(['bold']), self.f.style())
    
    self.about = "{0}Who we are and why{1} we started the project:\n  https://github.com/pyunify/documentation/blob/main/ABOUT.md".format(self.f.style(['bold']), self.f.style())
    self.support = "How to get {0}involved or support{1} PyUnify & JSON-Unify:\n  [GitHub Sponsors link coming soon]".format(self.f.style(['bold']), self.f.style())
    self.spec = "{0}JSON-Unify{1} is the specification that PyUnify implements (Apache 2.0 License):\n  https://github.com/JSON-UNIFY".format(self.f.style(['bold']), self.f.style())
    self.source = "The {0}PyUnify{1} source code and documentation (Apache 2.0 License):\n  https://github.com/pyunify".format(self.f.style(['bold']), self.f.style())
    self.explore = "Exploring your {0}data{1} with PyUnify:\n  https://github.com/pyunify/documentation/blob/main/EXPLORE.md".format(self.f.style(['bold']), self.f.style())
    self.queries = "{0}Queries{1} with PyUnify - update, select, delete, and insert:\n  https://github.com/pyunify/documentation/blob/main/QUERIES.md".format(self.f.style(['bold']), self.f.style())
    #self.quality = "How to check the {0}quality{1} of your data, metadata, and concepts:\n  https://github.com/pyunify/package/blob/main/QUALITY.md".format(self.f.style(['bold']), self.f.style())
    #self.md = "How to generate {0}.MD{1} files:\n  https://github.com/pyunify/package/blob/main/MD.md".format(self.f.style(['bold']), self.f.style())
    #self.schema = "How to generate {0}JSON-schema{1} files:\n  https://github.com/pyunify/package/blob/main/JSON-SCHEMA.md".format(self.f.style(['bold']), self.f.style())
    #self.ld = "How to generate {0}JSON-ld{1} files:\n  https://github.com/pyunify/package/blob/main/JSON-LD.md".format(self.f.style(['bold']), self.f.style())
    #self.file_csv = "How to import/export {0}.csv{1} files:\n  https://github.com/pyunify/package/blob/main/FILE_CSV.md".format(self.f.style(['bold']), self.f.style())
    #self.file_sql = "How to import/export {0}.sql{1} files:\n  https://github.com/pyunify/package/blob/main/FILE_SQL.md".format(self.f.style(['bold']), self.f.style())
    #self.file_json = "How to import/export {0}.json{1} files:\n  https://github.com/pyunify/package/blob/main/FILE_JSON.md".format(self.f.style(['bold']), self.f.style())
    #self.file_md = "How to import/export {0}.md{1} files:\n  https://github.com/pyunify/package/blob/main/FILE_MD.md".format(self.f.style(['bold']), self.f.style())
    self.files = "How to import, export, and work with {0}.md, .csv, .json, and .sql{1} files:\n  https://github.com/pyunify/documentation/blob/main/FILES.md".format(self.f.style(['bold']), self.f.style())

    
    if params == None:
      self.get()
    elif params == "md" or params == "MD":
      self.to_md()


  def get(self):
    about = "{0}\n\n- {1}\n\n {2}".format(self.section_about, self.about, self.support)
    spec = "\n\n\n{0}\n\n- {1}".format(self.section_specification, self.spec)
    py = "\n\n\n{0}\n\n- {1}\n\n- {2}\n\n- {3}".format(self.section_pyunify, self.source, self.explore, self.queries)
    files = "\n\n\n{0}\n\n- {1}".format(
        self.section_files, self.files)
    print(about, spec, py, files)
    
  def to_md(self):
    about = "# {0}\n\n- {1}\n\n- {2}".format(self.section_about, self.about, self.support)
    spec = "\n\n# {0}\n\n- {1}\n\n- {2}".format(self.section_specification, self.spec, self.source)
    py = "\n\n# {0}\n\n- {1}\n\n- {2}".format(self.section_pyunify, self.explore, self.queries)
    files = "\n\n# {0}\n\n- {1}".format(self.section_files, self.files)
    print(about, spec, py, files)
    
 
  

In [63]:
class pyunify_json_ld:
  def __init__(self, jsonld, data, pd, table_render):
    self.data = data
    self.pd = pd
    self.table_render = table_render

    if jsonld != [] and jsonld != None:
      self.json_ld = jsonld
    else:
      self.json_ld = self._init_jsonld()
    
    
  def __str__(self):
    return self.data
  
  def get(self):
    return self.json_ld

  def new_row(self):
    
    return     {
       "@context": "https://schema.org",
       "@type":None,
       "@id":None
    }

  def _init_jsonld(self): 
    arr = []
    data = self.data.to_list()
    
    for row_index, row in enumerate(data):
    
      if row_index > 0:
       obj = self.new_row() 
    
       for col_index, key in enumerate(data[0]):
         obj[key] = row[col_index]
         arr.append(obj)
    self.json_ld = arr
    return arr


In [64]:
class PyUnify_Json_Schema:
  def __init__(self, headers, pretty):    
    self.pretty = pretty
    self.json_schema = self._init_schema()    
    if headers != {}:      
      self.add_properties(headers)

  def add_properties(self, headers):
    for index, n in enumerate(headers['name']):
        props = self.json_schema['properties']
        props[n] = {
                'description': headers['description'][index],
                'type': headers['type'][index]
            }    

  def get(self):
    #print(self.json_schema)
    self.pretty(self.json_schema)

  def _init_schema(self): 
    
    return {
        "$schema": "https://json-schema.org/draft/2020-12/schema",
        "$id": None,
        "title": None,
        "description": None,
        "type": "object",
        "properties": {
        }
    }

In [65]:
# manages internal 
class pyunify_json_unify:
  def __init__(self, _json_unify=None, json=None):
    
    self.json_unify = {}
    self.json = json

    # is it a string?
    if type(_json_unify) == type('string'):            
      #is it a file import?                                   
      _is_json = self.is_json(_json_unify)

      #if it is not valid JSON      
      if _is_json == False:        
        self.json_unify = self._init_json_unify()  
        
      #if it is valid JSON
      else:        
        self.json_unify = self.json.loads(_json_unify)      
    
    #if it is a dict make sure all spec objects are there ignore all else
    elif type(_json_unify) == type({"type":"dict"}):     
      
      # check if data is the only key
      keys = list(_json_unify.keys())
      if len(keys) == 1 and keys[0] == 'data' and type(_json_unify['data']) == type(list()):
        self.instantiate_data_as_list(_json_unify['data'])
          
      else:
        self.json_unify = self._init_json_unify()      
        keys = _json_unify.keys()
        for key in keys:
          if key not in self.json_unify:
            print('an error has occurred, there is no matching key to the specification')
          else:
            self.json_unify[key] = _json_unify[key]

    #ex pyunify([['col1'], ['val1'])
    elif type(_json_unify) == type(list()):            
      self.instantiate_data_as_list(_json_unify)
      #print(self.json_unify)

    #if it is blank, create an empty JSON-Unify spec object
    elif _json_unify == None:                      
      self.json_unify = self._init_json_unify()
    
    #is it of type PyUnify?
    else:
      print('An unknown error has occurred with pyunify_json_unify module')

  

  def instantiate_data_as_list(self, data):
    data = PyUnify_Data(data).to_dict()  
    self.json_unify = self._init_json_unify() 
    self.json_unify['data'] = data

  def get(self):
    return self.json_unify

  def to_json(self):
    print(self.json_unify)
    return self.json.dumps(self.json_unify, indent = 4)

  # checks if arg is valid json and returns boolean 
  def is_json(self, json_string):
    try:
      self.json.loads(json_string)
    except ValueError as e:
      return False
    return True

  def _init_governance(self):
    gov =  PyUnify_Governance()
    return gov.get()
    

  # returns an empty dict using the JSON-Unify specification
  def _init_json_unify(self):
    
    json_unify = {}
    if 'concepts' not in self.json_unify:
      json_unify['concepts'] = {
        'headers':{
          'name': [],
          'type': [],
          'description':[]
        }, 
        'values':{
          'entity': [],
          'header name': [],
          'type': [],
          'description':[]
        },
        'features':{
            'row':[],
            'column':[],  
            'table':[],          
            'relation':[],
            'description':[]
        }
    }
    #if 'compute' not in self.json_unify:
      #json_unify['compute'] = {
        #"function": [            
            
        #],
        #"event listener description": [
            
        #]        
      #}
    #if 'custom' not in self.json_unify:
      #json_unify['custom'] = {}
    if 'data' not in self.json_unify:
      json_unify['data'] = []
    if 'governance' not in self.json_unify:
      json_unify['governance'] = {
        "sla": {
          "category":[],
          "provider": [],          
          "customer": [],
          "requirement":[]   
        },
        "requirements": {
            "header":[],
            "minimum":[],
            "exclusiveMinimum":[],
            "maximum":[],
            "exclusiveMaximum":[],
            "options":[],
            "options_default_selected":[]
        }
    }
    if 'lineage' not in self.json_unify:
      json_unify['lineage'] = {         
        "command":[],
        "params":[],
        'date':[]
      }
    if 'json-schema' not in self.json_unify:
      json_unify['schema'] = {
        "$schema": "https://json-schema.org/draft/2020-12/schema",
        "$id": None,
        "title": None,
        "description": None,
        "type": "object",
        "properties": {
        }
    }
    if 'json-ld' not in self.json_unify:
      json_unify['json-ld'] = []    

    

    if 'meta' not in self.json_unify:
      json_unify['meta'] = {
        "key":["contract", "specification", "description", "source", "table", "query", "tags", "authors", "id", "contact", "name", "markdown"],
        'value':[None, "https://github.com/JSON-UNIFY", None, None, None, None, None, None, None, None, None, None]        
      }
      
    else:
      pass
    #if 'project' not in self.json_unify:
      #json_unify['project'] = {
       # "description": [            
            
        #],
        #"status": [
            
        #],
        #"date": [
            
        #]        
    #}
    
    return json_unify                


In [66]:
# manages the 'lineage' object
class pyunify_lineage:
  def __init__(self, lineage, pd, table_render):
    if lineage != {}:
      self.lineage = lineage    
    elif lineage == {} or lineage == None:
      self.lineage = self._init_lineage()
    self.pd = pd
    self.table_render = table_render
  

  def df(self):
    return self.table(False, False)

  def table(self, render=True, flag=False): 
    df = self.pd.DataFrame(self.lineage)
    if render:  
      self.table_render.get("LINEAGE", df)   
    return df
  
  def _init_lineage(self):
    return {
        "command":[],
        'params':[],
        'date':[]
    }

  

In [67]:
class PyUnify_MD:
  def __init__(self, json, _json_unify, to_json_pretty, to_json_string):        
    self.json = json       
    self._json_unify = _json_unify
    self.to_json_pretty = to_json_pretty
    self.to_json_string = to_json_string

  
  #def pretty_json(self):    
    #return self.json.dumps(self._json_unify, indent=4)
  
  def benefits(self):
    title = "\n### BENEFITS\n"
    benefits = "\nNo license needed, no technology to budget, buy, or implement. Anyone can use it, an obvious and simple user experience that will help entire teams."
    goal = "\nThe purpose and benefit of using JSON-Unify is to improve the user experience of those who create, consume, and share data by reducing ambiguitiy and saving time.\n\n"
    return title+benefits+goal

  def wrapper(self):
    title = "\n### PyUnify IS A WRAPPER TO MAKE WORKING WITH JSON-UNIFY EASY\n"
    rec1 = "\n- PyUnify is a Python library that auto-generates and adds recommended JSON-Unify properties, as well as helper functions like exporting .MD files."
    rec2 = "\n- View the **specification** and **recommendations** to further enrich your data in the specification at: https://github.com/JSON-UNIFY."  
    rec3 = "\n- For example, a few 'meta' recommended fields are: a description of the data, the source of the data, the author of the data, the data contract URL."    
    return title+rec1+rec2+rec3
    

  def specification(self):
    specification = "\n### THE JSON-Unify SPECIFICATION\n"
    requirements = "\n\nJSON-Unify is a <em>minimal & simple specification</em> requiring **metadata**, **concepts**, and **data** are in one self-contained JSON object."
    example = "\n```\nJSON-Unify = {\n\tconcepts: {},\n\tdata: {},\n\tmeta: {}\n}\n```\n"  
    #compatibility = "\n<em>You can have a pointer (URL reference) to the data if the data is too large/inefficient to include in a JSON object or if the data is not in JSON format.</em> This still provides a single object in JSON format that can be used to communicate data"
    return specification + requirements + example

  def instantiate(self):    
    specification = "\n### USING PyUnify\n"
    step1 = "\nTo instantiate this JSON-Unify data contract with PyUnify:\n"
    step2 = "\n- Copy the JSON code in this MD file below.\n- In your **Python Notebook**:\n```\n!pip install PyUnify\nimport PyUnify\nunify = PyUnify(paste_json_from_this_MD_file)\n```\n"    
    return step1+step2

  def generate(self):
    line1 = "\nThis markdown file was automatically generated with \n```\nunify.md()\n```\n"
    line2 = "\nTo edit the title of the documentation use: \n```\nunify.md({'title':'My title here'})\n```\n"
    line3 = "\nOtherwise, the title will be auto-generated from the meta name property\n"
    return line1+line2+line3
    

  def instructions(self):
    instructions = ""
    instructions += self.specification()
    instructions += self.benefits()
    instructions += self.wrapper()
    instructions += self.instantiate()
    instructions += self.generate()    
    return instructions

  def title(self, title=None):
    if title == None:
      return "# Auto-generate JSON-Unify documentation with PyUnify\n\n"
    else:
      return "# {0}".format(title)

  def get(self, data_frames, params, print_md=True):    
    
    markdown = ""    
    if params['about']:
      markdown += self.title()
      markdown += self.instructions()
      markdown += "\n## Example PyUnify .MD output when the only input is a data set\n"
      markdown += "\n### PyUnify can guide you step-by-step to fill out empty tables and fields in order to enrich the discoverability and quality of the data experience you are creating for those that will use your data.\n"
    
    if params['title']:
      title = "\n# {0}\n\n".format(params['title'])
      markdown += title

    for df in data_frames:      
      header=df['header']
      table = df['table'].to_markdown()
      
      description = df['description']
      markdown += "## {0}\n{1}\n```\n{2}\n```\n".format(header, description, table)

    markdown += "\n## {0}\n{1}\n".format("JSON-Unify Object (file)", "This is the JavaScript Object Notation representation of your PyUnify Object. You could save this to a file and import it, use it in Javascript, etc.")
    markdown += '\n\n```\n{0}\n```'.format(self.to_json_pretty())

    markdown += "\n## {0}\n{1}\n".format("JSON-Unify Object (string)", "Use this version to copy and paste into Jupyter Notebook to create a PyUnify Object")
    markdown += '\n\n```\n{0}\n```'.format(repr(self.to_json_string()))
    if print_md:
      print(markdown)
    else:
      return markdown
    

      

    
    
    
    


In [68]:
# manages the 'meta' object
class pyunify_meta:
  def __init__(self, meta, pd, table_render):
                  
    if meta == {} or meta == None or meta == {'key': [], 'value': []}:
      self.meta = self._init_meta()
    else:
      self.meta = meta
    self.pd = pd
    self.table_render = table_render
  

  def df(self):
    return self.table(False, False)

  def table(self, render=True, flag=False): 
    df = self.pd.DataFrame(self.meta)                             
    if render:  
      self.table_render.get("META", df)   
    return df
  
  def _init_meta(self):
    return {
        "key":["contract", "specification", "description", "source", "table", "query", "tags", "authors", "id", "contact", "name", "markdown"],
        'value':[None, "https://github.com/JSON-UNIFY", None, None, None, None, None, None, None, None, None, None]        
    }

  

In [69]:
class PyUnify_Query:
  def __init__(self, json_unify):
    self.json_unify = json_unify
    
  def get_table(self, key):
    if key == 'meta'or key == 'data':
      table = self.json_unify[key]
    elif key == 'headers' or key == 'values' or key == 'features':
      table = self.json_unify['concepts'][key]
    elif key == 'sla' or key == 'requirements':
      table = self.json_unify['governance'][key]
    return table


  # key is the table, cmd is the the action to take like update, insert, select, params are the parameters of the query
  #unify.query('meta', 'update', [{'row':9, 'col':'value', 'set':"Yahoo Finance Download"}, {'row':6, 'col':'value', 'set':'yahoo.com'}])
  def query(self, key, cmd, params):        
    table = self.get_table(key)
    if cmd == 'update':
      return self.query_update(table, params)
    elif cmd == 'select':      
      return self.query_select(table, params)
    elif cmd == 'insert':      
      return self.query_insert(table, params)
    elif cmd == 'delete':      
      return self.query_delete(table, params)

  # unify.query('meta', 'select', {'col':'key', 'target':'source'})
  def query_select(self, table, params):            
    col_name = params['col']    
    column = table[col_name]
    unify_row_index = None 
    retVal = {}   
    for index, item in enumerate(column):
      if item == params['target']:
        retVal['row'] = index
        retVal['column'] = col_name        
        retVal['cell'] = params['target']
    
    return retVal

  # unify.query('meta', 'delete', [{"rows":[4, 5, 6, 8, 9]}])
  def query_delete(self, table, params):    
    
    for param in params:
      if 'rows' in param:
        rows = param['rows']
        for index, row in enumerate(rows):
          
          #for each header delete the list element
          keys = list(table.keys())
                    
          for key in keys:
            
            
            
            del table[key][row-index]
          new_index_length = len(table[keys[0]]) - index        
          
      if 'cols' in param:
        cols = param['cols']
      
    
    
    
  #unify.query('meta', 'update', [{'row':9, 'col':'value', 'set':"Yahoo Finance Download"}, {'row':6, 'col':'value', 'set':'yahoo.com'}])
  def query_update(self, table, params):            
    
    for param in params:
      row = param['row']
      col = param['col']
      set_val = param['set']
      table[col][row] = set_val

  #unify.query('data', 'insert', [{'Animal':'Unicorn', 'Size':'Very Large', 'Gender':None, 'Safe As Pet':True, 'Weight In Pounds':1040.2}])
  def query_insert(self, table, params):                
    length = None
    for param in params:
      keys = list(param.keys())
      for key in keys:
        val = param[key]
        col = table[key]
        length = len(col)
        col.append(val)
      #print("Row number {} added".format(length))


    
    


In [70]:
class PyUnify_SQL:
  def __init__(self, json_unify, data_list):    
    self.json_unify = json_unify
    self.data = json_unify['data']
    self.data_list = data_list
    self.filename = None
    self.tableName = None
    self.query = ""
    
  def create(self, name):
    retVal = ""
    drop = "DROP TABLE IF EXISTS " + name + ";\n"
    
    self.query += drop + '\n'
    create = "CREATE TABLE "+name+" ("
    
    self.query += create + '\n'
    

  def values(self, name):
    
    queries = ""
    
    # get data as rows / array
    
    # for each row, create query
    for row_index, row in enumerate(self.data_list[1:]):
      
      query = "INSERT INTO "+name+"("+self.get_column_names()+") VALUES ("      
      vals = ""
      for index, item in enumerate(row):        
        vals += "'"+str(item)+"'"
        if index < len(row)-1:
          vals += ', '
      
      query += vals + ');'
      
      self.query += query + '\n'
    # add to queries
    
  def set_schema(self, schema):
    keys = list(schema.keys())
    
    for key in keys:
      
      row = '\t'+key + ' ' + schema[key] +'\n'
      self.query += row

  def howTo(self, name):
    query = "--To view the column comments, use \\d+ "+name
    query += "\n--To view the table comments, use \\d+ \n\n"
    self.query += query

  def get(self, params):
    
    name = params['table']  
    self.tableName = name
    if 'file' in params:
      self.filename = params['file']
    
    retVal = ""
    self.howTo(name)
    self.create(name)
    

    if 'schema' in params:
      self.set_schema(params['schema'])
    else:
      self.get_column_names_and_types()
    end_parenthesis = ');\n'
    
    self.query += end_parenthesis + '\n'
    self.values(name)
    self.add_table_comments()
    self.add_column_comments()
    
    print(self.query)
    if self.filename != None:
      with open(self.filename, "w") as outfile:
        outfile.write(self.query) 
    #return retVal

  def add_column_comments(self):
    self.query += '\n'
    headers = self.json_unify['concepts']['headers']
    desc = headers['description']
    for index, header in enumerate(headers['name']):    
      
      query = 'COMMENT ON COLUMN '+str(self.tableName)+'."'+header+'" IS \''+str(desc[index])+'\';\n'
      self.query += query
    
  # format in PSQL is: COMMENT ON TABLE animals IS 'test';
  def add_table_comments(self):
    self.query += '\n'
    meta = self.json_unify['meta']
    value = meta['value']
    comments = ""
    for index, key in enumerate(meta['key']):    
      comments += key+': '+str(value[index])+'\n'
      query = 'COMMENT ON TABLE '+str(self.tableName)+' IS \''+comments+'\';\n'
    self.query += query


  def get_column_names(self):
    keys = list(self.data.keys())
    rows = ""
    for index, key in enumerate(keys):
      if index < len(keys)-1:
        line = '"'+key+'", '
      else:
        line = '"'+key+'"'+' '
      rows += line
    return rows

  def get_types(self):
    types = self.json_unify['concepts']['headers']['type']
    
    retVal = []
    for t in types:
      if t == "string" or t == "text":
        retVal.append('text')
      elif t == None or t == 'None':
        retVal.append('text')
      elif t == "bool":
        retVal.append('boolean')
      elif t == "float":
        retVal.append('double precision')
      elif t == "int":
        retVal.append('bigint')
      elif t == "json":
        retVal.append('jsonb')
    return retVal
      

  def get_column_names_and_types(self):
    keys = list(self.data.keys())
    types = self.get_types()
    
    rows = ""
    for index, key in enumerate(keys):
      if index < len(keys)-1:
        data_type = types[index]+','
      else:
        data_type = types[index]
      key_name = '"'+key+'"'+' '
      line = "\t"+key_name+' '+data_type+" "
      rows += line
      
      self.query += line + '\n'
      
    

  

  

In [71]:
class pyunify_table_render:

  def __init__(self):
    pass

  def print(self, header, df):
    from IPython.display import display, HTML
    h3 = "<h3 style='margin-bottom:20px; padding-bottom:0px'>{0}</h3>".format(header)        
    spacer = '<div style="margin-bottom:0px">&nbsp;</div>'
    display(HTML(h3), df, HTML(spacer))

  def get(self, header, df=None):    
    from IPython.display import display, HTML
    h3 = "<h3 style='margin-bottom:20px; padding-bottom:0px'>{0}</h3>".format(header)        
    empty = '<i style="margin-bottom:0px;">No {0} to display.</i>'.format(header.lower())
    if not df.empty:      
      spacer = '<div style="margin-bottom:0px">&nbsp;</div>'
      display(HTML(h3), df, HTML(spacer))  
    else:      
      spacer = '<div style="margin-bottom:0px">&nbsp;</div>'
      display(HTML(h3), df, HTML(spacer))

In [72]:
# the PyUnify object
class PyUnify:
  def __init__(self, json_unify=None, params=None):   
    
    self._init_dependencies()
    self.table_render = pyunify_table_render()

    #creates an initialized json_unify wrapper object
    self._params = params
    self._json_unify = pyunify_json_unify(json_unify, self.json).get()      
    
    self._md = PyUnify_MD(self.json, self._json_unify, self.to_json_pretty, self.to_json_string)  
    self._meta = pyunify_meta(self._json_unify['meta'], self.pd, self.table_render)    
    self._lineage = pyunify_lineage(self._json_unify['lineage'], self.pd, self.table_render)    
    self._data = PyUnify_Data(self._json_unify['data'], self.pd, self.table_render, params=self._params)   
    orientation = self._data.get_orientation()
    self._governance = PyUnify_Governance(self._json_unify['governance'], self.pd, self.table_render)        
    self._concepts = PyUnify_Concepts(self._json_unify['concepts'], self.pd, self.table_render, data = self._data)
    self._json_filepath = None
    
    
    if params:
      if 'json_filepath' in params:
        self._json_filepath = params['json_filepath']     
      if 'init_command' in params:        
        event = {'command':params['init_command'], 'params':None, 'date':self.now()}
        self.lineage(event)
    else:
      event = {'command':'pyunify object initilization', 'params':None, 'date':self.now()}
      self.lineage(event)
    self._check_if_instantiated_with_file(json_unify)

  def _check_if_instantiated_with_file(self, name):
    
    if type(name) == type(' '):      
      if self._is_filename_valid_for_import(name) == True:               
        pass
        self.load(name)        
        source_row = self.query('meta', 'select', {'col':'key', 'target':'source'})        
        row = source_row['row']        
        self.query('meta', 'update', [{'row':row, 'col':'value', 'set':name}])
        
        

  
  def _is_filename_valid_for_import(self, name):
    if name.endswith(".csv") or name.endswith(".json"):
        return True        
    else:
        return False
  
  def _is_filename_from_url(self, name):
    if name.startswith("https://") or name.startwith("http://"):      
      return True
    else:
      return False
  
  def data(self):
    return self._json_unify['data']

  def meta(self):
    return self._json_unify['meta']

  def pretty(self, to_print=None):
    if to_print == None:
      self.pp.pprint(self._json_unify)
    else:      
      self.pp.pprint(to_print)
      

  def update(self, dfs={}):    
    for key in dfs:         
      df = dfs[key].to_dict()
      if key == 'data' or key == 'meta':        
        self._json_unify[key] = df
      if key == 'sla' or key == 'requirements':
        self._json_unify['governance'][key] = df
      if key == 'lineage':        
        print(df)
        self._json_unify[key] = df
        print(self.pretty())
    self.__init__(self._json_unify, {'json_filepath':self._json_filepath, 'init_command':'JSON-Unify Object Updated'})

  def save(self, dfs={}, path=None):
      if dfs != {}:
        self.update(dfs)      
      self.write(path)

  def check_file_type(self,filename):    
    if (filename.endswith(".csv")):
        return '.csv'
    elif (filename.endswith(".json")):
        return '.json'
    else:
        print("Test File is not in correct format")

  def load(self, path, to_table=False):
    
    self._json_filepath = path    
    ft = self.check_file_type(path)  
    url = self._is_filename_from_url(path)           
    if ft == '.json':      
      with open(path) as f: 
        j = self.json.load(f)            
    elif ft == '.csv' and url == True:    
      
      df = self.pd.read_csv(path)      
      
      split = df.to_dict(orient='split')
      
      j = self.load_from_dict_split(split)
      
    elif ft == '.csv':
      
      with open(path) as csvfile:        
        df = self.pd.read_csv(path)
        split = df.to_dict(orient='split')
        # need { "column":[]
        j = self.load_from_dict_split(split)
    self.__init__(j, params={'json_filepath':path, 'init_command':'Source file loaded'})      
    if to_table:
      self.table()
    
  

  def update_filepath(self, path):
    if path == None and self._json_filepath != None:
      path= self._json_filepath    
    elif self._json_filepath == None and path==None:      
      print("file path must be sent as argument")
    elif path != None:
      self._json_filepath = path
    return self._json_filepath

  def write(self, path=None):    
    _path = self.update_filepath(path)
    json_object = self.to_json_pretty()        
    with open(_path, "w") as outfile:
        outfile.write(json_object)    

  def md_write(self, path, md):

    with open(path, "w") as outfile:
      outfile.write(md)    
  
  def json_write(self, path, content):
    with open(path, "w") as outfile:
      outfile.write(content)    

  def _init_dependencies(self):
    import pandas as pd
    self.pd = pd
    self.pd.set_option('max_colwidth', 1024)    

    import json as json
    self.json = json

    import pprint as pprint
    self.pp = pprint.PrettyPrinter(indent=4)
  
    # using datetime module
    import datetime as datetime;  
    self.datetime = datetime

  def now(self):
     return str(self.datetime.datetime.now())
    

  def sample(self, keys=['concepts', 'data', 'meta', 'lineage'], render=True):
    # if user sends one param of False or True
    if type(keys) == type(True):
      render = keys      
      keys=['concepts', 'data', 'meta', 'lineage']      
      
    obj = {}
    for key in keys:
      if key == 'data':
        data = {
          "Animal":['Mouse', 'Cat', 'Dog', 'Horse', 'Lion'],
          "Size":['Tiny', 'Small', 'Medium', 'Very Large', 'Large'],
          "Gender":['Female', 'Female', 'Male', 'Female', 'Male'],
          "Safe As Pet":[True, True, True, True, False],
          "Weight In Pounds":[0.5, 9, 100, 930, 420]
        }
        obj['data'] = data
      elif key == 'meta':
        
        meta = {
            "key": ['contract', 'description', 'authors', 'contact', 'specification', 'id', 'name', 'markdown'],
            "value": [
                        None,
                        'This is a sample dataset of Pets to test PyUnify functionality',
                        'Ron Itelman, Cameron Prybol, Stephanie Bankes',
                        'ron@intelligence.ai',
                        'https://github.com/JSON-UNIFY',
                        '1',
                        None,
                        'https://github.com/JSON-UNIFY/Data-Contracts/blob/main/1_Pet_PyUnify_Sample_Data.MD'
                      ]
        }
        obj['meta'] = meta
      elif key == 'concepts':
        
        concepts = {
          "headers": {
              "name":['Animal', 'Size', 'Gender', 'Safe As Pet', 'Weight In Pounds'],
              'type':['string', 'string', None, 'bool', 'float'],
              'description':['The name of the animal', None, 'The gender of the animal used in the weight and size fields', 'Whether the animal is safe to have as a pet', 'The average weight of the animal']              
          },
          "values":{
              'entity': [],
              'header name': [],
              'type': [],
              'description':[]
          },
          "features":{
              "row":[],
              "column":[],
              "table":[],
              "relation":[],
              'description':[]
          }
        }
        obj['concepts'] = concepts
      elif key == 'lineage':
        lineage = {
            "command":[],
            "params":[],
            "date":[]
        }
        obj['lineage'] = lineage
    
    self.__init__(obj) 
    if render:
      self.table()

  # takes in a JSON string argument, and returns a Python dictionary
  def dict_from_json(self, json_string):    
    return dict(self.json.loads(json_string))
    
  #prints pretty JSON and then returns theJSON
  def to_json_pretty(self, keys=[]):
    if keys == []:
      retVal= self.json.dumps(self._json_unify, indent=4)
    else:
      for key in keys:
        retVal= ''
        retVal += self.json.dumps(self._json_unify[key], indent=4)
    return retVal

  #returns unformatted JSON
  def to_json_string(self, keys=[]):
    if keys == []:
      retVal =  str(self.json.dumps(self._json_unify))    
      return retVal
    else:
      for key in keys:
        retVal = ''
        retVal +=  str(self.json.dumps(self._json_unify[key]))  
      return retVal  

  def flag(self, keys=[]):
    self.table(keys, params={'render':True, 'flag':True})

  def to_dict(self, keys=[]):
    s = self.to_json_string(keys)
    return self.dict_from_json(s)

  #returns a data frame
  def df(self, key):
    
    if key == 'data':           
      df = self._data.df()
    if key == 'meta':
      df = self._meta.df()
    if key == 'sla':
      df = self._governance.df_sla()
    if key == 'headers':
      df = self._concepts.df_headers()
    if key == 'values':
      df = self._concepts.df_values()
    if key == 'features':
      df = self._concepts.df_features()
    if key == 'lineage':
      df = self._lineage.df()
    if key == 'concepts':      
        print("Please user 'headers' for Concepts - Headers, 'values' for Concepts - Values, and 'features' for Concepts - Features")        
        df = None
    if key == 'governance':
        print("Please user 'sla' for the Governance - SLA")    
        df = None
    return df

  def style_cells_flag(self, val, params={'background-color':'green', "color":'white'}):
    if self.pd.isnull(val): 
      return 'background-color: {0}; color:{1}'.format(params['background-color'], params['color'])
    else:
      return ''

 
  
  
    

  #visualizes a table
  def table(self, keys=[], params={'render':True, 'flag':False}):
    if params:
      if 'render' in params:
        render = params['render']
      else:
        render = True
      if 'flag' in params:
        flag = params['flag']
      else:
        flag = False
    if keys == []:
      keys = ['concepts', 'data', 'meta']
      #keys = ['concepts', 'data', 'meta', 'governance', 'compute', 'custom', 'lineage', 'project']
    for key in keys:
      if key == 'data':           
        df = self._data.table(render, flag)        
      if key == 'meta':
        self._meta.table(render, flag)
      if key == 'concepts':
        self._concepts.table(render, flag)
      if key == 'headers':
        self._concepts.table_headers(render, flag)
      if key == 'features':
        self._concepts.table_features(render, flag)
      if key == 'values':
        self._concepts.table_values(render, flag)
      if key == 'compute':
        self._compute.table(render, flag)
      if key == 'custom':
        self._custom.table(render, flag)
      if key == 'governance':
        self._governance.table(render, flag)
      if key == 'sla':        
        self._governance.table_sla(render, flag)
      if key == 'lineage':
        self._lineage.table(render, flag)
      if key == 'project':
        self._project.table(render, flag)

  def all(self, keys=[], params={'render':True, 'flag':False}):
    if params:
      if 'render' in params:
        render = params['render']
      else:
        render = True
      if 'flag' in params:
        flag = params['flag']
      else:
        flag = False
    if keys == []:
      keys = ['concepts', 'data', 'meta', 'governance', 'lineage']
      #keys = ['concepts', 'data', 'meta', 'governance', 'compute', 'custom', 'lineage', 'project']
    for key in keys:
      if key == 'data':           
        df = self._data.table(render, flag)        
      if key == 'meta':
        self._meta.table(render, flag)
      if key == 'concepts':
        self._concepts.table(render, flag)
      if key == 'compute':
        self._compute.table(render, flag)
      if key == 'custom':
        self._custom.table(render, flag)
      if key == 'governance':
        self._governance.table(render, flag)
      if key == 'sla':
        self._governance.table_sla(render, flag)
      if key == 'lineage':
        self._lineage.table(render, flag)
      if key == 'project':
        self._project.table(render, flag)
  

  #about is a flag whether to include documentation about PyUnify
  def md(self, params={'about':False, 'title':None, 'keys':[], 'file':None}, print_md=False):    
    
    data_frames = [        
        {
            'header': 'Concepts - Headers',
            'description': 'Any documentation or the concepts used in the headers of your data table.',
            'table':self._concepts.table(False)['headers']
        },
        {
            'header': 'Concepts - Values',
            'description': 'Any documentation or the concepts used in the values of a column in your data table.',
            'table':self._concepts.table(False)['values']
        }, 
        {
            'header': 'Concepts - Features',
            'description': 'Any documentation or the concepts used in the features of a column in your data table.',
            'table':self._concepts.table(False)['values']
        },        
        {
            'header': 'Meta',
            'description': 'Any documentation of the metadata used to describe and discover your data.',
            'table':self._meta.table(False)
        },        
        {
            'header': 'Governance - SLA',
            'description': 'Any documentation of any SLA information.',
            'table':self._governance.table(False)['sla']
        },
        {
            'header': 'Lineage',
            'description': 'Data lineage information',
            'table':self._lineage.table(False)
        },
        {
            'header': 'Data',
            'description': 'The data of your JSON-Unify object.',
            'table':self._data.table(False)
        }        
        
                
    ]
    if 'about' not in params:
      params['about'] = False
    if 'title' not in params:
      params['title'] = None
    if 'keys' not in params:
      params['keys'] = []
    
    event = {'command':'Markdown file created', 'params':None, 'date':self.now()}
    self.lineage(event)
    if params['file'] == None:
      return self._md.get(data_frames, params)
    elif type(params['file']) == type(True):      
      if self._json_filepath != None:
        self.md_write(path=self._json_filepath+'.md', md=self._md.get(data_frames, params, False))
    elif type(params['file']) == type(' '):      
      self.md_write(path=params['file'], md=self._md.get(data_frames, params, False))
    if print_md == True:
      print('true wtf')
      return self._md.get(data_frames, params)
    

  

  def __str__(self):    
    return str(self._json_unify)

  def __repr__(self):    
    return str(self._json_unify) 

  def lineage(self, event):
    if self._json_unify['lineage'] != {}:
      orig = self._json_unify['lineage']    
      orig['command'].append(event['command'])
      orig['params'].append(event['params'])
      orig['date'].append(event['date'])    

  def load_from_dict_split(self, split):
    #print(split)
    columns = split['columns']
    rows = split['data']
    #print(columns)
    #print(rows))
    orig = {
        'columns':columns,
        'rows':rows
    }
    #converted_data = {'data': {}}
    
    # Get the input data in the appropriate format
    data = {'orig':{ 'columns': columns, 'rows': rows}}

    # Create an empty dictionary for the output data
    output = {'data':{}}

    # Loop through the columns and rows in the input data and populate the output data
    for column in data['orig']['columns']:
      output['data'][column] = []

    for row in data['orig']['rows']:
      for i, column in enumerate(data['orig']['columns']):
        output['data'][column].append(row[i])

    # Print the output data
    return output

  
  # unify.query('meta', {'insert':['a key', 'a column']})
  def query_insert(self, key, params):
    cmd = list(params.keys())[0]
    exec = params[cmd]    
    if key == 'data' or key == 'meta' or key == 'lineage':
      obj = self._json_unify[key]      
      if type(exec) == type(dict()):
        for header in exec:          
          obj[header].append(exec[header])
      elif type(exec) == type(list()):          
        for index, executable in enumerate(exec):            
          col = list(obj.keys())[index]              
          arr = obj[col]
          arr.append(executable)            
                      
    elif key == 'sla':
      obj = self._json_unify['governance'][key]             
      if type(exec) == type(dict()):
        for header in exec:          
          obj[header].append(exec[header])
      elif type(exec) == type(list()):          
        for index, executable in enumerate(exec):            
          col = list(obj.keys())[index]              
          arr = obj[col]
          arr.append(executable)            
        
    elif key == 'headers' or key == 'values' or key == 'features':
      obj = self._json_unify['concepts'][key]             
      if type(exec) == type(dict()):
        for header in exec:          
          obj[header].append(exec[header])
      elif type(exec) == type(list()):          
        for index, executable in enumerate(exec):            
          col = list(obj.keys())[index]              
          arr = obj[col]
          arr.append(executable)


    
  


  def query(self, key, cmd, params):
    event = {'command':'unify.query()', 'params':key+' '+cmd+' '+str(params), 'date':self.now()}
    self.lineage(event) 
    return PyUnify_Query(self._json_unify).query(key, cmd, params)
  
          
  

  
           

  def quality(self, keys=['meta', 'headers']):        
    for key in keys:
      df = unify.df(key)    
      df = df.style.applymap(self.quality_style)      
      self.table_render.print('Data Quality Check For {0}'.format(key), df)
  
  def quality_style(self, val):            
    if val == None or val == 'None':
      return 'background-color: {0}; color:{1}; border:1px solid #fff; font-weight:bold'.format('rgba(220,0,0,0.80)', 'white')    
    else:      
      return ''

  def json_schema(self, params={'file':None}):
    
    
    schema = PyUnify_Json_Schema(self._json_unify['concepts']['headers'], self.pretty)
    
    
    if 'file' not in params or params['file'] == None:      
      
      #return self.json.dumps(schema.get(), indent=4)
      schema.get()

    #check if filename was passed
    
    elif type(params['file']) == type(True) and params['file'] == True:            
      
      if self._json_filepath != None:    
        
        path=self._json_filepath+'.json-schema.json'
        content = self.json.dumps(schema.get(), indent=4)
        self.json_write(path, content)
    elif type(params['file']) == type(' '):          
      
      content = self.json.dumps(schema.get(), indent=4)
      self.json_write(params['file'], content)
    else:
      print('An unknown error has occured in json_schema()')
        
    
    
  def json_ld(self, json_format=True):
    ld = pyunify_json_ld(self._json_unify['json-ld'],self._data, self.pd, self.table_render)
    return ld.get()
        

  

  def help(self, params=None):
    pyunify_help(params)
    

  def sql(self, params):    
    return PyUnify_SQL(self._json_unify, self._data.to_list()).get(params)


  def contract(self, params=None, print_md=True):
    
    if params != None:      
      if 'catalog' in params:       
        if 'catalog' not in self._json_unify['meta']['key']: 
          self._json_unify['meta']['key'].append('catalog')
          self._json_unify['meta']['value'].append(params['catalog'])
        else:
          i = self._json_unify['meta']['key'].index('catalog')
          self._json_unify['meta']['key'][i] = params['catalog']
      if 'contract' in params: 
        if 'contract' not in self._json_unify['meta']['key']:        
          self._json_unify['meta']['key'].append('contract')
          self._json_unify['meta']['value'].append(params['contract'])
        else:
          i = self._json_unify['meta']['key'].index('contract')
          self._json_unify['meta']['value'][i] = params['contract']
      if 'description' in params: 
        if 'description' not in self._json_unify['meta']['key']:        
          self._json_unify['meta']['key'].append('description')
          self._json_unify['meta']['value'].append(params['description'])
        else:
          i = self._json_unify['meta']['key'].index('description')
          self._json_unify['meta']['value'][i] = params['description']
      if 'data' in params:
        if params['data'] == False:
          
          self._json_unify['data'] = {}
          
      if 'lineage' in params:        
        if params['lineage'] == False:
          
          self._json_unify['lineage']={}
      if 'filename' in params:
        filename = params['filename']
        if 'filename' in params:
          filename = params['filename']
          if filename.endswith('.md'):
            #self._md = pyunify_md(self.json, self._json_unify, self.to_json_pretty, self.to_json_string)
            if filename.endswith('.md') == False:
              filename += '.md'
            self.__init__(self._json_unify)
            self._md = pyunify_md(self.json, self._json_unify, self.to_json_pretty, self.to_json_string)  
            self.md({'file':filename}, print_md)                      
          elif filename.endswith('.json'):
            with open(filename, "w") as outfile:
              outfile.write(self.to_json_pretty(self._json_unify))        
      elif 'md' in params:
        if params['md'] == True:
          self.__init__(self._json_unify)
          self._md = pyunify_md(self.json, self._json_unify, self.to_json_pretty, self.to_json_string)  
          self.md(print_md=True)
      

    

# Create sample PyUnify object with sample data


In [73]:
unify = PyUnify('https://raw.githubusercontent.com/JSON-UNIFY/Data-Contracts/main/AAPL.csv')

In [74]:
unify.all()

Unnamed: 0,name,type,description
0,Date,,
1,Open,,
2,High,,
3,Low,,
4,Close,,
5,Adj Close,,
6,Volume,,


Unnamed: 0,entity,header name,type,description


Unnamed: 0,row,column,table,relation,description


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-11-26,159.570007,160.449997,156.360001,156.809998,155.905167,76959800
1,2021-11-29,159.369995,161.190002,158.789993,160.240005,159.315369,88748200
2,2021-11-30,159.990005,165.520004,159.919998,165.300003,164.346176,174048100
3,2021-12-01,167.479996,170.300003,164.529999,164.770004,163.819244,152052500
4,2021-12-02,158.740005,164.199997,157.800003,163.759995,162.815063,136739200
...,...,...,...,...,...,...,...
247,2022-11-18,152.309998,152.699997,149.970001,151.289993,151.289993,74794600
248,2022-11-21,150.160004,150.369995,147.720001,148.009995,148.009995,58724100
249,2022-11-22,148.130005,150.419998,146.929993,150.179993,150.179993,51804100
250,2022-11-23,149.449997,151.830002,149.339996,151.070007,151.070007,58301400


Unnamed: 0,key,value
0,contract,
1,specification,https://github.com/JSON-UNIFY
2,description,
3,source,https://raw.githubusercontent.com/JSON-UNIFY/Data-Contracts/main/AAPL.csv
4,table,
5,query,
6,tags,
7,authors,
8,id,
9,contact,


Unnamed: 0,category,provider,customer,requirement


Unnamed: 0,header,minimum,exclusiveMinimum,maximum,exclusiveMaximum,options,options_default_selected


Unnamed: 0,command,params,date
0,Source file loaded,,2022-12-13 21:58:08.166549
1,unify.query(),"meta select {'col': 'key', 'target': 'source'}",2022-12-13 21:58:08.166652
2,unify.query(),"meta update [{'row': 3, 'col': 'value', 'set': 'https://raw.githubusercontent.com/JSON-UNIFY/Data-Contracts/main/AAPL.csv'}]",2022-12-13 21:58:08.166685


In [75]:
unify.help()

[1mAbout Us[0m

- [1mWho we are and why[0m we started the project:
  https://github.com/pyunify/documentation/blob/main/ABOUT.md

 How to get [1minvolved or support[0m PyUnify & JSON-Unify:
  [GitHub Sponsors link coming soon] 


[1mThe JSON-Unify Specification[0m

- [1mJSON-Unify[0m is the specification that PyUnify implements (Apache 2.0 License):
  https://github.com/JSON-UNIFY 


[1mThe PyUnify Python Package[0m

- The [1mPyUnify[0m source code and documentation (Apache 2.0 License):
  https://github.com/pyunify

- Exploring your [1mdata[0m with PyUnify:
  https://github.com/pyunify/documentation/blob/main/EXPLORE.md

- [1mQueries[0m with PyUnify - update, select, delete, and insert:
  https://github.com/pyunify/documentation/blob/main/QUERIES.md 


[1mFile Imports/Exports With PyUnify[0m

- How to import, export, and work with [1m.md, .csv, .json, and .sql[0m files:
  https://github.com/pyunify/documentation/blob/main/FILES.md


In [76]:
unify.query('headers', 'update', [{"row":0, "col":"type", "set":"string"}, {"row":0, "col":"description", "set":"The data of the transactions"}])
unify.query('headers', 'update', [{"row":1, "col":"type", "set":"float"}, {"row":1, "col":"description", "set":"The price for a share when the market opens"}])
unify.query('headers', 'update', [{"row":2, "col":"type", "set":"float"}, {"row":2, "col":"description", "set":"The highest price for a share during the trading day"}])
unify.query('headers', 'update', [{"row":3, "col":"type", "set":"float"}, {"row":3, "col":"description", "set":"The lowest price for a share during the trading day"}])
unify.query('headers', 'update', [{"row":4, "col":"type", "set":"float"}, {"row":4, "col":"description", "set":"The closing price for a share at the end of the trading day"}])
unify.query('headers', 'update', [{"row":5, "col":"type", "set":"float"}, {"row":5, "col":"description", "set":"The adjusted closing price for a share at the end of the trading day"}])
unify.query('headers', 'update', [{"row":6, "col":"type", "set":"int"}, {"row":6, "col":"description", "set":"The amount of shares that were exchanged on the day"}])

unify.query('meta', 'update', [{"row":7, "col":"value", "set":"Yahoo Finance"}, {"row":10, "col":"value", "set":"Trading data for Apple"}])
unify.query('meta', 'delete', [{"rows":[4,5,6,8,9,11]}])

unify.query('meta', 'insert', [{'key':'documentation', 'value':'https://my_site.com/xyz...'}])

unify.query('sla', 'insert', [{'category':'service response time',	'provider':True,	'customer':False,	'requirement':'All service requests answered within 24 hours'}])
unify.query('features', 'insert', [{'column':'Open', 'row':'3', 'table':'data', 'relation':"Share Price", 'description':'Highest ever'}])
unify.query('features', 'insert', [{'column':'Open', 'row':'3', 'table':'data', 'relation':"News Events", 'description':"Apple announces iPhone X"}])

In [77]:
unify.table(['features'])

Unnamed: 0,row,column,table,relation,description
0,3,Open,data,Share Price,Highest ever
1,3,Open,data,News Events,Apple announces iPhone X


In [78]:
unify.quality()

Unnamed: 0,key,value
0,contract,
1,specification,https://github.com/JSON-UNIFY
2,description,
3,source,https://raw.githubusercontent.com/JSON-UNIFY/Data-Contracts/main/AAPL.csv
4,authors,Yahoo Finance
5,name,Trading data for Apple
6,documentation,https://my_site.com/xyz...


Unnamed: 0,name,type,description
0,Date,string,The data of the transactions
1,Open,float,The price for a share when the market opens
2,High,float,The highest price for a share during the trading day
3,Low,float,The lowest price for a share during the trading day
4,Close,float,The closing price for a share at the end of the trading day
5,Adj Close,float,The adjusted closing price for a share at the end of the trading day
6,Volume,int,The amount of shares that were exchanged on the day


In [79]:
unify.md()

## Concepts - Headers
Any documentation or the concepts used in the headers of your data table.
```
|    | name      | type   | description                                                          |
|---:|:----------|:-------|:---------------------------------------------------------------------|
|  0 | Date      | string | The data of the transactions                                         |
|  1 | Open      | float  | The price for a share when the market opens                          |
|  2 | High      | float  | The highest price for a share during the trading day                 |
|  3 | Low       | float  | The lowest price for a share during the trading day                  |
|  4 | Close     | float  | The closing price for a share at the end of the trading day          |
|  5 | Adj Close | float  | The adjusted closing price for a share at the end of the trading day |
|  6 | Volume    | int    | The amount of shares that were exchanged on the day                  |
```
## Co

In [80]:
unify.sql({'table':'aapl'})

--To view the column comments, use \d+ aapl
--To view the table comments, use \d+ 

DROP TABLE IF EXISTS aapl;

CREATE TABLE aapl (
	"Date"  text, 
	"Open"  double precision, 
	"High"  double precision, 
	"Low"  double precision, 
	"Close"  double precision, 
	"Adj Close"  double precision, 
	"Volume"  bigint 
);

INSERT INTO aapl("Date", "Open", "High", "Low", "Close", "Adj Close", "Volume" ) VALUES ('2021-11-26', '159.570007', '160.449997', '156.360001', '156.809998', '155.905167', '76959800');
INSERT INTO aapl("Date", "Open", "High", "Low", "Close", "Adj Close", "Volume" ) VALUES ('2021-11-29', '159.369995', '161.190002', '158.789993', '160.240005', '159.315369', '88748200');
INSERT INTO aapl("Date", "Open", "High", "Low", "Close", "Adj Close", "Volume" ) VALUES ('2021-11-30', '159.990005', '165.520004', '159.919998', '165.300003', '164.346176', '174048100');
INSERT INTO aapl("Date", "Open", "High", "Low", "Close", "Adj Close", "Volume" ) VALUES ('2021-12-01', '167.479996', '170.300

In [81]:
unify.json_schema()

{   '$id': None,
    '$schema': 'https://json-schema.org/draft/2020-12/schema',
    'description': None,
    'properties': {   'Adj Close': {   'description': 'The adjusted closing '
                                                      'price for a share at '
                                                      'the end of the trading '
                                                      'day',
                                       'type': 'float'},
                      'Close': {   'description': 'The closing price for a '
                                                  'share at the end of the '
                                                  'trading day',
                                   'type': 'float'},
                      'Date': {   'description': 'The data of the transactions',
                                  'type': 'string'},
                      'High': {   'description': 'The highest price for a '
                                                 'share dur

In [82]:
unify.json_ld()

[{'@context': 'https://schema.org',
  '@type': None,
  '@id': None,
  'Date': '2021-11-26',
  'Open': 159.570007,
  'High': 160.449997,
  'Low': 156.360001,
  'Close': 156.809998,
  'Adj Close': 155.905167,
  'Volume': 76959800},
 {'@context': 'https://schema.org',
  '@type': None,
  '@id': None,
  'Date': '2021-11-26',
  'Open': 159.570007,
  'High': 160.449997,
  'Low': 156.360001,
  'Close': 156.809998,
  'Adj Close': 155.905167,
  'Volume': 76959800},
 {'@context': 'https://schema.org',
  '@type': None,
  '@id': None,
  'Date': '2021-11-26',
  'Open': 159.570007,
  'High': 160.449997,
  'Low': 156.360001,
  'Close': 156.809998,
  'Adj Close': 155.905167,
  'Volume': 76959800},
 {'@context': 'https://schema.org',
  '@type': None,
  '@id': None,
  'Date': '2021-11-26',
  'Open': 159.570007,
  'High': 160.449997,
  'Low': 156.360001,
  'Close': 156.809998,
  'Adj Close': 155.905167,
  'Volume': 76959800},
 {'@context': 'https://schema.org',
  '@type': None,
  '@id': None,
  'Date': '2