## Tutorial for CSVtoRDF

https://stackoverflow.com/questions/43524943/creating-rdf-file-using-csv-file-as-input

In [1]:
from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling
from rdflib.namespace import FOAF, XSD #most common namespaces
import urllib.parse #for parsing strings to URI's
import pandas as pd

In [5]:
url='https://raw.githubusercontent.com/KRontheWeb/csv2rdf-tutorial/master/example.csv'
df=pd.read_csv(url,sep=";",quotechar='"')

In [6]:
df

Unnamed: 0,Name,Address,Place,Country,Age,Hobby,Favourite Colour
0,John,Dam 52,Amsterdam,The Netherlands,32,Fishing,Blue
1,Jenny,Leidseplein 2,Amsterdam,The Netherlands,12,Dancing,Mauve
2,Jill,52W Street 5,Amsterdam,United States of America,28,Carpentry,Cyan
3,Jake,12E Street 98,Amsterdam,United States of America,42,Ballet,Purple


In [7]:
g = Graph()
ppl = Namespace('http://example.com/people/')
loc = Namespace('http://mylocations.org/addresses/')
schema = Namespace('http://schema.org/')

In [8]:
for index, row in df.iterrows():
    g.add((URIRef(ppl+row['Name']), RDF.type, FOAF.Person))
    g.add((URIRef(ppl+row['Name']), URIRef(schema+'name'), Literal(row['Name'], datatype=XSD.string) ))
    g.add((URIRef(ppl+row['Name']), FOAF.age, Literal(row['Age'], datatype=XSD.integer) ))
    g.add((URIRef(ppl+row['Name']), URIRef(schema+'address'), Literal(row['Address'], datatype=XSD.string) ))
    g.add((URIRef(loc+urllib.parse.quote(row['Address'])), URIRef(schema+'name'), Literal(row['Address'], datatype=XSD.string) ))

In [10]:
print(g.serialize(format='turtle'))
g.serialize('example.ttl',format='turtle')

@prefix ns1: <http://xmlns.com/foaf/0.1/> .
@prefix ns2: <http://schema.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

<http://example.com/people/Jake> a ns1:Person ;
    ns2:address "12E Street 98"^^xsd:string ;
    ns2:name "Jake"^^xsd:string ;
    ns1:age 42 .

<http://example.com/people/Jenny> a ns1:Person ;
    ns2:address "Leidseplein 2"^^xsd:string ;
    ns2:name "Jenny"^^xsd:string ;
    ns1:age 12 .

<http://example.com/people/Jill> a ns1:Person ;
    ns2:address "52W Street 5"^^xsd:string ;
    ns2:name "Jill"^^xsd:string ;
    ns1:age 28 .

<http://example.com/people/John> a ns1:Person ;
    ns2:address "Dam 52"^^xsd:string ;
    ns2:name "John"^^xsd:string ;
    ns1:age 32 .

<http://mylocations.org/addresses/12E%20Street%2098> ns2:name "12E Street 98"^^xsd:string .

<http://mylocations.org/addresses/52W%20Street%205> ns2:name "52W Street 5"^^xsd:string .

<http://mylocations.org/addresses/Dam%2052> ns2:name "Dam 52"^^xsd:string .

<http://mylocations.org/addre

<Graph identifier=N42150f30038f4cdaa20c991a4360946a (<class 'rdflib.graph.Graph'>)>

In [1]:
from rdflib import Graph, Literal, RDF, URIRef, Namespace #basic RDF handling
from rdflib.namespace import CSVW, DC, DCAT, DCTERMS, DOAP, FOAF, ODRL2, ORG, OWL, \
                           PROF, PROV, RDF, RDFS, SDO, SH, SKOS, SOSA, SSN, TIME, \
                           VOID, XMLNS, XSD
import urllib.parse #for parsing strings to URI's
import pandas as pd
from pathlib import Path

main_path = Path().absolute().parent
data_path = main_path / 'data'

In [35]:
# full
df = pd.read_csv(data_path / 'AccountRDF.csv', encoding='utf-8')
ns_acc = Namespace('http://fsqa.com/acc#')
namespace_dict = {
    'acc': ns_acc, 'time': TIME, 'rdf': RDF, 'rdfs': RDFS, None: ''
}
g = Graph()
g.bind('rdf', RDF)
g.bind('time', TIME)
g.bind('acc', ns_acc, override=True)
for index, row in df.iterrows():
    s_ns, s = row['subject'].split(':')
    p_ns, p = row['predicate'].split(':')
    if len(row['object'].split(':')) < 2:
        o_ns, o = None, row['object']
    else:
        o_ns, o = row['object'].split(':')
    
    if o_ns is None:
        g.add( (URIRef(namespace_dict[s_ns]+s), URIRef(namespace_dict[p_ns]+p), Literal(o, datatype=XSD.string)) )
    else:
        g.add( (URIRef(namespace_dict[s_ns]+s), URIRef(namespace_dict[p_ns]+p), URIRef(namespace_dict[o_ns]+o)) )

In [36]:
g.serialize(data_path / 'AccountRDF.ttl', format='turtle')

<Graph identifier=N35b60fcf28484699b962e16d48526170 (<class 'rdflib.graph.Graph'>)>

In [37]:
g.serialize(data_path / 'AccountRDF.xml', format='xml', encoding='utf-8')

<Graph identifier=N35b60fcf28484699b962e16d48526170 (<class 'rdflib.graph.Graph'>)>

In [2]:
g = Graph()
g.load(data_path / 'AccountRDF.ttl', format='ttl')

<Graph identifier=Nd839b58febd244e0925a654be0c5ffb8 (<class 'rdflib.graph.Graph'>)>

In [6]:
# show only BalanceSheet
query_string = """
SELECT DISTINCT ?acc
WHERE {
    ?acc acc:Account_Belonging acc:BalanceSheet .
}
"""
qres = g.query(query_string)
for x in qres:
    print(f'{x}')

(rdflib.term.URIRef('http://fsqa.com/acc#AdvancesCustomers'),)
(rdflib.term.URIRef('http://fsqa.com/acc#BondsIssued'),)
(rdflib.term.URIRef('http://fsqa.com/acc#CashAndCashEquivalents'),)
(rdflib.term.URIRef('http://fsqa.com/acc#IntangibleAssets'),)
(rdflib.term.URIRef('http://fsqa.com/acc#Inventories'),)
(rdflib.term.URIRef('http://fsqa.com/acc#LongTermBorrowings'),)
(rdflib.term.URIRef('http://fsqa.com/acc#PrepaidExpenses'),)
(rdflib.term.URIRef('http://fsqa.com/acc#PropertyPlantAndEquipment'),)
(rdflib.term.URIRef('http://fsqa.com/acc#ShorttermBorrowings'),)
(rdflib.term.URIRef('http://fsqa.com/acc#TotalEquity'),)
(rdflib.term.URIRef('http://fsqa.com/acc#TradeAndOtherCurrentPayables'),)
(rdflib.term.URIRef('http://fsqa.com/acc#TradeAndOtherCurrentReceivables'),)
(rdflib.term.URIRef('http://fsqa.com/acc#AssetsAbstract'),)
(rdflib.term.URIRef('http://fsqa.com/acc#LiabilitiesAbstract'),)
(rdflib.term.URIRef('http://fsqa.com/acc#LiabilitiesAndEquities'),)
(rdflib.term.URIRef('http://fsq

In [23]:
# relationship BalanceSheet
query_string = """
CONSTRUCT {
    ?s acc:partOf ?o ;
}
WHERE {
    ?s acc:Account_Belonging acc:BalanceSheet ;
       rdfs:label ?literal .
    ?o acc:Account_Belonging acc:BalanceSheet ;
       rdfs:label ?literal .
}
"""
qres = g.query(query_string)
list(qres)[-1]

(rdflib.term.URIRef('http://fsqa.com/acc#NoncurrentAssets'),
 rdflib.term.URIRef('http://fsqa.com/acc#partOf'),
 rdflib.term.URIRef('http://fsqa.com/acc#NoncurrentAssets'))

In [16]:
# relationship BalanceSheet 
query_string = """
SELECT {
    ?s ?o
}
WHERE {
    ?s acc:Account_Belonging acc:BalanceSheet .
    ?o acc:Account_Belonging acc:BalanceSheet .
    ?acc rdfs:label ?s .
}
"""
qres = g.query(query_string)
list(qres)[0]

ParseException: Expected {SelectQuery | ConstructQuery | DescribeQuery | AskQuery}, found '{'  (at char 8), (line:2, col:8)

In [218]:
qres.graph.serialize(data_path / 'temp.ttl', format='ttl')

<Graph identifier=Neb25ac0f08c746e3b699c94f0361a1a1 (<class 'rdflib.graph.Graph'>)>

In [15]:
from streamlit_agraph import TripleStore, agraph, Config
results = g.query(query_string)
store = TripleStore()

for subj, pred, obj in results.graph:
    store.add_triple(subj, pred, obj, "")

In [16]:
store

<streamlit_agraph.TripleStore at 0x15697ea7520>

In [28]:
x

(rdflib.term.URIRef('http://fsqa.com/acc#LongTermBorrowings'),
 rdflib.term.URIRef('http://fsqa.com/acc#partOf'),
 rdflib.term.URIRef('http://fsqa.com/acc#LongTermBorrowings'))