## Read CSV file and return list of shapes (as dicts)

Source: [csvreader.py](https://github.com/tombaker/csv2shex/blob/master/csv2shex/csvreader.py)

Shape elements
- shapeID (URI)
- shapeClosed (later)
- shapeLabel (later: annotation)
- start (later)

Triple constraint elements
- propertyID (URI)
- propertyLabel (later: annotation)
- mandatory
- repeatable
- valueNodeType
- valueDataType (URI)
- valueConstraint (URI - maybe)
- valueConstraintType
- valueShape (URI)
- note (later: annotation)

```
def csvreader(csvfile):
    """Read CSV file and return list of CSV shapes, one dict per CSV shape."""
    csvrow_dicts_list = _get_csvrow_dicts_list(csvfile)
    corrected_csvrow_dicts_list = _get_corrected_csvrows_list(csvrow_dicts_list)
    csvshapes_list = _get_csvshape_dicts_list(corrected_csvrow_dicts_list)
    return csvshapes_list
```

In [3]:
from csv2shex.csvreader import (
    csvreader, 
    _get_csvrow_dicts_list,
    _get_corrected_csvrows_list, 
    _get_csvshape_dicts_list, 
)
from csv2shex.csvrow import CSVRow
from csv2shex.utils import pprint_df
import pandas as pd

In [5]:
# Reads CSV file, returns list of dicts - one per row.
csvrow_dicts_list = _get_csvrow_dicts_list('book_ap.csv')
csvrow_dicts_list

[{'shapeID': 'http://example.org/book',
  'propertyID': 'http://purl.org/dc/terms/creator',
  'mandatory': 'Y',
  'repeatable': 'N',
  'valueNodeType': '',
  'valueDataType': '',
  'valueConstraint': '',
  'valueConstraintType': '',
  'valueShape': 'http://example.org/author'},
 {'shapeID': '',
  'propertyID': 'http://purl.org/dc/terms/subject',
  'mandatory': '',
  'repeatable': '',
  'valueNodeType': '',
  'valueDataType': '',
  'valueConstraint': 'http://id.loc.gov/',
  'valueConstraintType': 'IriStem',
  'valueShape': ''},
 {'shapeID': 'http://example.org/author',
  'propertyID': 'http://xmlns.com/foaf/0.1/name',
  'mandatory': '',
  'repeatable': '',
  'valueNodeType': 'literal',
  'valueDataType': 'http://www.w3.org/2001/XMLSchema#string',
  'valueConstraint': '',
  'valueConstraintType': '',
  'valueShape': ''}]

For each row encountered:
- if row has shapeID but not propertyID
  - remembers shapeID in shapeids_list
- if row has propertyID
  - instantiates CSVRow
    - if CSVRow also has a shape ID
      - uses it
      - otherwise gets from shapeids_list
      - otherwise uses default shapeID
- normalizes and validates the row

In [9]:
# 
corrected_csvrows_list = _get_corrected_csvrows_list(csvrow_dicts_list)
corrected_csvrows_list

[{'shapeID': 'http://example.org/book',
  'shapeLabel': None,
  'shapeClosed': False,
  'propertyID': 'http://purl.org/dc/terms/creator',
  'propertyLabel': None,
  'mandatory': True,
  'repeatable': False,
  'valueNodeType': '',
  'valueDataType': '',
  'valueConstraint': '',
  'valueConstraintType': '',
  'valueShape': 'http://example.org/author',
  'note': None},
 {'shapeID': 'http://example.org/book',
  'shapeLabel': None,
  'shapeClosed': False,
  'propertyID': 'http://purl.org/dc/terms/subject',
  'propertyLabel': None,
  'mandatory': False,
  'repeatable': False,
  'valueNodeType': '',
  'valueDataType': '',
  'valueConstraint': 'http://id.loc.gov/',
  'valueConstraintType': 'IriStem',
  'valueShape': '',
  'note': None},
 {'shapeID': 'http://example.org/author',
  'shapeLabel': None,
  'shapeClosed': False,
  'propertyID': 'http://xmlns.com/foaf/0.1/name',
  'propertyLabel': None,
  'mandatory': False,
  'repeatable': False,
  'valueNodeType': 'literal',
  'valueDataType': 'htt

In [8]:
csvshapes_list = _get_csvshape_dicts_list(corrected_csvrows_list)
csvshapes_list

[CSVShape(shapeID='http://example.org/book', shapeLabel=None, shapeClosed=None, start=True, tripleconstraints_list=[{'propertyID': 'http://purl.org/dc/terms/creator', 'propertyLabel': None, 'mandatory': True, 'repeatable': False, 'valueNodeType': '', 'valueDataType': '', 'valueConstraint': '', 'valueConstraintType': '', 'valueShape': 'http://example.org/author', 'note': None}, {'propertyID': 'http://purl.org/dc/terms/subject', 'propertyLabel': None, 'mandatory': False, 'repeatable': False, 'valueNodeType': '', 'valueDataType': '', 'valueConstraint': 'http://id.loc.gov/', 'valueConstraintType': 'IriStem', 'valueShape': '', 'note': None}]),
 CSVShape(shapeID='http://example.org/author', shapeLabel=None, shapeClosed=None, start=False, tripleconstraints_list=[{'propertyID': 'http://xmlns.com/foaf/0.1/name', 'propertyLabel': None, 'mandatory': False, 'repeatable': False, 'valueNodeType': 'literal', 'valueDataType': 'http://www.w3.org/2001/XMLSchema#string', 'valueConstraint': '', 'valueCons

In [8]:
from ShExJSG.SchemaWithContext import Schema
from pyjsg.jsglib import loader
from pyjsg.jsglib.jsg_array import JSGArray
from pyjsg.jsglib.loader import is_valid, StringIO
from pyshex.utils.schema_loader import SchemaLoader
from typing import cast, Union, List, Optional
from ShExJSG import ShExC, ShExJ
from ShExJSG.ShExJ import (
    EachOf,
    IRIREF,
    IriStem,
    NodeConstraint,
    Shape,
    TripleConstraint,
    shapeExpr,
)
from csv2shex.mkshex import generate_node_constraint, add_statement, shape_to_shex

NameError: name 'CSVShape' is not defined

In [5]:
df = pd.read_csv("book_ap.csv", skip_blank_lines=True)

In [6]:
pprint_df(df)

Unnamed: 0,shapeID,propertyID,mandatory,repeatable,valueNodeType,valueDataType,valueConstraint,valueConstraintType,valueShape
0,http://example.org/book,http://purl.org/dc/terms/creator,Y,N,,,,,http://example.org/author
1,,http://purl.org/dc/terms/subject,,,,,http://id.loc.gov/,IriStem,
2,http://example.org/author,http://xmlns.com/foaf/0.1/name,,,literal,http://www.w3.org/2001/XMLSchema#string,,,
