In [None]:
from cow_csvw.csvw_tool import COW
import os
import json
import re
import datetime
import pandas as pd

import ssl

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context


In [None]:
csv_file = 'books.csv'
df = pd.read_csv(csv_file)
print(df.columns)

In [None]:

# Add or remove namesspaces if necessary
namespaces = { "aat": "http://vocab.getty.edu/aat/",
   "bibo": "http://purl.org/ontology/bibo/",
   "cidoc": "http://www.cidoc-crm.org/cidoc-crm/",
   "csvw": "http://www.w3.org/ns/csvw#",
   "dc": "http://purl.org/dc/terms/",
   "dc11": "http://purl.org/dc/elements/1.1/",
   "dcterms": "http://purl.org/dc/terms/",
   "geonames": "http://www.geonames.org/ontology#",
   "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
   "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
   "schema": "http://schema.org/",
   "skos": "http://www.w3.org/2004/02/skos/core#",
   "xml": "http://www.w3.org/XML/1998/namespace/",
   "xsd": "http://www.w3.org/2001/XMLSchema#" }

csv_file = 'books.csv'

# Subjects will be created by appending the identifier to the base_uri
base_uri = 'https://bookandbyte.universiteitleiden.nl/'
identifier = '{_row}'

# Columns
columns = dict()
columns['author'] = ( 'schema:creator' , 'string')
columns['title'] = ( 'dcterms:title' , 'string')
columns['year_of_publication'] = ( 'dcterms:publisher' , 'string')
columns['language'] = ( 'schema:inLanguage' , 'string')

In [None]:
json_dict = dict()

json_dict['@id'] = csv_file
context = []
context.append("https://raw.githubusercontent.com/CLARIAH/COW/master/csvw.json")
context_dict = dict()
context_dict['@language'] = 'en'

if not re.search( '/$' , base_uri ):
    base_uri += '/'

context_dict['@base'] = base_uri
context.append(context_dict)

context.append(namespaces)
json_dict['@context'] = context


json_dict['url'] = "glam.csv"
json_dict['dialect'] = {
  "delimiter": ",",
  "encoding": "utf-8",
  "quoteChar": "\""
 }
json_dict['dc:title'] = "glam.csv"
json_dict['dcat:keyword'] = [] 
json_dict['dc:publisher'] = {
  "schema:name": "CLARIAH Structured Data Hub - Datalegend",
  "schema:url": {
   "@id": "http://datalegend.net"
  }
 }
json_dict['dc:license']: {"@id": "http://opendefinition.org/licenses/cc-by/"}

json_dict['dc:license'] =  {
  "@id": "http://opendefinition.org/licenses/cc-by/"
 }

date = str(datetime.datetime.today()).split()[0]

json_dict['dc:modified'] = {
  "@value": date,
  "@type": "xsd:date"
}
        
        
tableschema = dict()


columns_list = []

for column in columns:
    column_dict = dict()
    column_dict['@id'] = f'{base_uri}{column}'
    column_dict['propertyUrl'] = columns[column][0]
    
    datatype = columns[column][1]
    if not re.search( r'^xsd:' , datatype ):
        datatype = 'xsd:' + columns[column][1]
    column_dict['datatype'] = datatype
    column_dict['name'] = column
    column_dict['dc:description'] = column
    column_dict['titles'] = [ column ]
    columns_list.append(column_dict)
    
tableschema["columns"] = columns_list
tableschema["aboutUrl"] = f"{identifier}"
tableschema['primaryKey'] = list(columns.keys())[0]
    
json_dict['tableSchema'] = tableschema   
json_string = json.dumps(json_dict, indent = 3)

out_file = f'{csv_file}-metadata.json'
                          
with open(  out_file , 'w' , encoding = 'utf-8' ) as file:
    file.write(json_string)
    

In [None]:
COW(mode='convert', 
    files= [csv_file] ,
    dataset='My dataset' )
