In [1]:
from SPARQLWrapper import SPARQLWrapper, JSON
import os
import pandas as pd

%matplotlib inline

In [2]:
sparql = SPARQLWrapper("http://localhost:9999/blazegraph/namespace/kb/sparql")

In [3]:
def sparql_result_to_df(result):
    cols = result['head']['vars']
    rows_dicts = result['results']['bindings']
    rows = [{c: r.get(c, {'value': None})['value'] for c in cols} for r in rows_dicts]
    return pd.DataFrame(rows)[cols]

In [5]:
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    select ?type (count(?s) as ?count)
    {
        ?s dcterm:type ?type.
    }
    group by ?type
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

Unnamed: 0,type,count
0,Jurisprudentie,162514
1,Verdrag,703
2,Wet,11879
3,Uitvoeringsinformatie,21
4,Officiele overheidspublicatie,853
5,Europese Regelgeving,384
6,BWB Beleidsregel,42
7,Amvb,888
8,MinisteriÃ«le-regeling,464
9,Beleidsregel,14


In [10]:
# wich attributes can appear multiple times?
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    select ?p (count(?o) as ?count) 
    {
        ?s dcterm:type "Jurisprudentie".
        ?s ?p ?o.
    }
    group by ?p
    having(count(?o)>1)
    order by ?p
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
sparql_result_to_df(result)

Unnamed: 0,p,count
0,http://linkeddata.overheid.nl/terms/kleur,162514
1,http://linkeddata.overheid.nl/terms/volgorde,162514
2,http://purl.org/dc/terms/creator,68338
3,http://purl.org/dc/terms/hasVersion,162528
4,http://purl.org/dc/terms/identifier,326093
5,http://purl.org/dc/terms/modified,163236
6,http://purl.org/dc/terms/title,162521
7,http://purl.org/dc/terms/type,162514


In [14]:
# Nodes: all jurispidentie that includes a creator
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    select ?id ?creator ?title
    {
        ?id dcterm:type "Jurisprudentie".
         ?id dcterm:creator ?creator. 
        optional { ?id dcterm:title ?title }
    }
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
cases = sparql_result_to_df(result)
ecli_regex = '(ECLI:[A-Z]{2}:[A-Z]*:[0-9]{4}:[0-9A-Z\.]{1,25}$)'
cases['ecli'] = cases.id.str.extract(ecli_regex, expand=False)
cases['court'] = cases.ecli.str.extract('ECLI:[A-Z]{2}:([A-Z]*):[0-9]{4}:[0-9A-Z\.]{1,25}$', expand=False)
cases.shape

(68339, 5)

In [15]:
# Links between cases
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    select ?link_id ?source ?target ?link_type ?link_type_label
    {
        ?source dcterm:type "Jurisprudentie".
        ?target dcterm:type "Jurisprudentie".
        ?link_id overheidrl:heeftLinktype ?link_type.
        ?link_id overheidrl:linktVan  ?source.
        ?link_id overheidrl:linktNaar  ?target.
        ?link_type rdfs:label ?link_type_label.
        ?source dcterm:creator ?creator_source. 
        ?target dcterm:creator ?creator_target. 
    }
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
links = sparql_result_to_df(result)
links.shape

(17240, 5)

In [None]:
# Legislation titles
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    select ?title (count(*) as ?cnt)
    {
        ?source dcterm:type "Jurisprudentie".
        ?article dcterm:type "Wet".
        ?link_id overheidrl:heeftLinktype ?link_type.
        ?link_id overheidrl:linktVan  ?source.
        ?link_id overheidrl:linktNaar  ?article.
        ?link_type rdfs:label ?link_type_label.
        ?article dcterm:title ?title
    }
    group by ?title
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
law_titles = sparql_result_to_df(result)
law_titles.sort_values('cnt', ascending=False)

In [7]:
law_titles.sort_values('cnt', ascending=False).head(10)

Unnamed: 0,title,cnt
1398,"Wetboek van Strafvordering, Artikel 218",99
869,"Wetboek van Strafrecht, Artikel 326",96
326,"Wetboek van Strafvordering, Artikel 361",96
2650,"Algemene wet bestuursrecht, Artikel 8:42",92
239,"Wetboek van Strafvordering, Artikel 98",91
5168,"Wetboek van Strafrecht, Artikel 15i",9
4680,"Algemene wet bestuursrecht, Artikel 6:12",9
494,"Gezondheids- en welzijnswet voor dieren, Artik...",9
1140,"Wetboek van Strafvordering, Artikel 282",9
2337,Wet financiering volksverzekeringen,9


In [9]:
# Legislation references
# Links between cases
queryString = """
    prefix dcterm: <http://purl.org/dc/terms/> 
    prefix overheidrl: <http://linkeddata.overheid.nl/terms/>
    select ?link_id ?source ?article ?link_type ?link_type_label  ?title
    {
        ?source dcterm:type "Jurisprudentie".
        ?article dcterm:type "Wet".
        ?link_id overheidrl:heeftLinktype ?link_type.
        ?link_id overheidrl:linktVan  ?source.
        ?link_id overheidrl:linktNaar  ?article.
        ?link_type rdfs:label ?link_type_label.
        optional {?article dcterm:title ?title}
    }
"""

sparql.setQuery(queryString)
sparql.setReturnFormat(JSON)
ret = sparql.query()
result = ret.convert()
legislation = sparql_result_to_df(result)
legislation.shape

(87481, 6)

In [12]:
legislation['title'].value_counts().head(20)

Wet op de rechterlijke organisatie, Artikel 81     14555
Wetboek van Strafvordering                          3641
Wetboek van Strafrecht                              2428
Wet op de rechterlijke organisatie, Artikel 80a     1239
Wetboek van Strafvordering, Artikel 359              731
Opiumwet                                             720
Wetboek van Burgerlijke Rechtsvordering              715
Burgerlijk Wetboek Boek 6                            714
Wet waardering onroerende zaken                      714
Wetboek van Strafvordering, Artikel 552a             700
Wetboek van Strafvordering, Artikel 457              658
Burgerlijk Wetboek Boek 7                            475
Wetboek van Strafvordering, Artikel 359a             443
Wetboek van Strafvordering, Artikel 440              438
Wet op de omzetbelasting 1968                        429
Algemene wet bestuursrecht                           424
Algemene wet inzake rijksbelastingen                 424
Wetboek van Strafvordering, Art

In [16]:
cases['court'].value_counts()

HR    68339
Name: court, dtype: int64

In [19]:
# Filter cases on 81 RO - maybe not because can still contain references
ref81ro = legislation[legislation['title']=='Wet op de rechterlijke organisatie, Artikel 81']
cases_filtered = cases[~cases.id.isin(ref81ro['source'].unique())]
cases_filtered.shape

(55607, 5)

In [28]:
links_from_81ro = links[links.source.isin(ref81ro['source'].unique())]
[print(l) for l in links_from_81ro['source'][:10]]

http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2015:743
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2011:BP8952
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2006:AU9096
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2014:1299
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2007:BA1792
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2004:AR6885
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2010:BM8075
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2015:471
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2005:AS8645
http://linkeddata.overheid.nl/terms/jurisprudentie/id/ECLI:NL:HR:2008:BE9104


[None, None, None, None, None, None, None, None, None, None]

In [43]:
# Filter self-loops
links = links[~(links['source']==links['target'])]

In [30]:
import caselawnet

[nltk_data] Error loading punkt: <urlopen error [Errno -2] Name or
[nltk_data]     service not known>


In [35]:
outpath = '/media/sf_VBox_Shared/CaseLaw/graphs/lido/'

In [34]:
# Export nodes and links
caselawnet.utils.to_sigma_json(cases.to_dict(orient='records'),
                               links.to_dict(orient='records'),
                               'Hoge Raad',
                               filename=os.path.join('hr_simple.json')
                              )

In [80]:
cases.to_csv(os.path.join(outpath, 'hr_simple_nodes.csv'), index=False)
links.to_csv(os.path.join(outpath, 'hr_simple_links.csv'), index=False)

## Make bi-modal network

In [64]:
legislation.to_csv(os.path.join(outpath, 'hr_simple_legislation_links.csv'), index=False)

In [56]:
leg_per_node = legislation.groupby('source')['title'].apply(lambda l: "|".join(list(sorted(l))))

In [59]:
cases = cases.set_index('id')

In [61]:
cases['leg'] = leg_per_node

In [72]:
cases = cases.reset_index()

In [67]:
legislation.groupby('title')['source'].nunique().sort_values(ascending=False).head(20)

title
Wet op de rechterlijke organisatie, Artikel 81     12732
Wetboek van Strafvordering                          2043
Wetboek van Strafrecht                              1395
Wet op de rechterlijke organisatie, Artikel 80a     1222
Wet waardering onroerende zaken                      708
Opiumwet                                             624
Wetboek van Strafvordering, Artikel 457              614
Wetboek van Strafvordering, Artikel 359              603
Wetboek van Strafvordering, Artikel 552a             567
Wetboek van Strafvordering, Artikel 440              432
Wet op de omzetbelasting 1968                        413
Burgerlijk Wetboek Boek 6                            410
Wetboek van Burgerlijke Rechtsvordering              402
Algemene wet inzake rijksbelastingen                 400
Wetboek van Strafvordering, Artikel 365a             397
Wetboek van Strafvordering, Artikel 359a             338
Algemene wet bestuursrecht, Artikel 8:41             337
Algemene wet bestuursrech

## Create subnetwork

In [77]:
def create_subnetwork(articles, legislation, nodes_in, links_in):
    case_ids = legislation[legislation['title'].isin(articles)]['source'].unique()
    nodes_sub = nodes_in[nodes_in['id'].isin(case_ids)]
    links_sub = links_in[links_in['source'].isin(case_ids)]
    links_sub = links_sub[links_sub['target'].isin(case_ids)]
    return nodes_sub, links_sub

In [79]:
nodes_wga, links_wga = create_subnetwork(
    ["Burgerlijk Wetboek Boek 7, Artikel 658", "Burgerlijk Wetboek Boek 7, Artikel 611"], 
    legislation, cases, links)
print(nodes_wga.shape, links_wga.shape)

(127, 6) (53, 5)


## Enrich the nodes

In [None]:
nodes_enriched = caselawnet.enrich_eclis(list(cases['ecli']), rootpath='/media/sf_VBox_Shared/CaseLaw/data_april/')

In [209]:
nodes_enriched_df = pd.DataFrame(nodes_enriched)
cases_merged = cases.merge(nodes_enriched_df[['ecli', 'subject', 'creator', 'year', 'date', 'abstract']], on='ecli')

In [210]:
cases_merged['creator'] = [c['creator_x'] if c['creator_x'] else c['creator_y'].split('/')[-1].replace('_', ' ')
                           for i, c in cases_merged.iterrows()]

In [None]:
cases_merged