In [None]:
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import pandas as pd

%matplotlib inline

In [None]:
sparql = SPARQLWrapper("http://localhost:8080/blazegraph-2.1.4/namespace/Regellinks/sparql")

In [None]:
def sparql_result_to_df(result):
    cols = result['head']['vars']
    rows_dicts = result['results']['bindings']
    rows = [{c: r.get(c, {'value': None})['value'] for c in cols} for r in rows_dicts]
    return pd.DataFrame(rows)[cols]

In [None]:
# How many triples do we have?
queryString = """
    select (count(?s) as ?count)
    {
        ?s ?p ?o.
    }
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

In [None]:
queryString = """
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?type (count(?s) as ?count)
    {
        ?s rdf:type ?type.
    }
    group by ?type
    order by desc(?count)
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

In [None]:
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    select ?type (count(?s) as ?count)
    {
        ?s dcterm:type ?type.
    }
    group by ?type
    order by desc(?count)
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

In [None]:
# what attributes does jurispidentie have
queryString = """
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?p (count(?o) as ?count) 
    {
        ?s rdf:type <http://linkeddata.overheid.nl/terms/Jurisprudentie>.
        ?s ?p ?o.
    }
    group by ?p
    having(count(?o)>1)
    order by ?p
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

## Legislation

In [None]:
# Legislation titles
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?article ?title 
    {
        ?article rdf:type <http://linkeddata.overheid.nl/terms/Wet>.
        ?article dcterm:title ?title
    }
    limit 100
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
law_titles = sparql_result_to_df(result)
law_titles #.sort_values('cnt', ascending=False)

## Links

In [None]:
# Link types
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?link_type (count(*) as ?cnt)
    {
        ?link_id overheidrl:heeftLinktype ?link_type.
    }
    group by ?link_type
    order by desc(?cnt)
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
link_titles = sparql_result_to_df(result)
link_titles.head(30)

In [None]:
for l in link_titles.head(20)['link_type']:
    print(l)

In [None]:
# Link types
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?sourceType ?targetType (count(*) as ?cnt)
    {
        ?target rdf:type ?targetType.
        ?source rdf:type ?sourceType.
        ?link_id overheidrl:heeftLinktype <http://linkeddata.overheid.nl/terms/linktype/id/lx-referentie>.
        ?link_id overheidrl:linktNaar  ?target.
        ?link_id overheidrl:linktVan ?source
    }
    group by ?sourceType ?targetType
    order by desc(?cnt)
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
link_types = sparql_result_to_df(result)
link_types

In [None]:
link_types.to_csv('link_types.csv')

In [None]:
# Links from cases to cases
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?link_id ?source ?target ?linktype
    {
        ?target rdf:type overheidrl:Jurisprudentie.
        ?source rdf:type overheidrl:Jurisprudentie.
        ?link_id overheidrl:heeftLinktype ?linktype.
        ?link_id overheidrl:linktNaar  ?target.
        ?link_id overheidrl:linktVan ?source
    }
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
case_to_case_links = sparql_result_to_df(result)
print(case_to_case_links.shape)
case_to_case_links.head()

In [None]:
case_to_case_links.to_csv('case_to_case_links.csv', index=False)

In [None]:
case_to_case_links_lx = case_to_case_links[
    case_to_case_links['linktype']=='http://linkeddata.overheid.nl/terms/linktype/id/lx-referentie']
case_to_case_links_lx = case_to_case_links_lx[['link_id', 'source', 'target']]
print(case_to_case_links_lx.shape, case_to_case_links_lx.drop_duplicates().shape)

In [None]:
case_to_case_links_lx.to_csv('case_to_case_lx_links.csv', index=False)

In [None]:
case_to_case_links.groupby('linktype').count()['link_id'].sort_values(ascending=False)

In [None]:
# Case - Legislation network
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    select ?link_id ?source ?target ?linktype
    {
        ?target rdf:type overheidrl:Artikel.
        ?source rdf:type overheidrl:Jurisprudentie.
        ?link_id overheidrl:heeftLinktype ?linktype.
        ?link_id overheidrl:linktNaar  ?target.
        ?link_id overheidrl:linktVan ?source
    }
"""
sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
case_article_network = sparql_result_to_df(result)
case_article_network.shape

In [None]:
case_article_network.to_csv('/media/sf_VBox_Shared/CaseLaw/2018-01-29-lido/derived/case-to-article-links.csv', index=False)

In [None]:
case_article_network.groupby('linktype').count()['link_id']

## Nodes

In [None]:
# Get all articles
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    prefix owms: <http://standaarden.overheid.nl/owms/terms/>
    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
    prefix skos: <http://www.w3.org/2004/02/skos/core#>
    select ?id ?title ?label ?authority
    {
        ?id rdf:type overheidrl:Artikel.
        optional {?id dcterm:title ?title.}
        optional {?id owms:authority ?authority.}
        optional {?id skos:prefLabel ?label}
    }
"""
sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
article_nodes = sparql_result_to_df(result)
article_nodes.shape

In [None]:
article_nodes.to_csv('/media/sf_VBox_Shared/CaseLaw/2018-01-29-lido/derived/article_nodes.csv', index=False)

In [None]:
article_nodes.head()

Sometimes a law or article changes names, so there are multiple titles/labels. 
Unfortunately, we don't know what the latest version is.
Therefore, we just take the alphabetically first option.

In [None]:
article_nodes_dedup = article_nodes.sort_values(['title', 'label', 'authority']).groupby('id').first()
article_nodes_dedup.shape

In [None]:
article_nodes_dedup.to_csv('/media/sf_VBox_Shared/CaseLaw/2018-01-29-lido/derived/article_nodes_nodup.csv', 
                          encoding='utf-8')

In [None]:
article_nodes_dedup.isnull().sum()