In [67]:
import os
import pandas as pd
import caselawnet

In [96]:
# imports
inputpath = '/media/sf_VBox_Shared/CaseLaw/graphs/lido/'
cases = pd.read_csv(os.path.join(inputpath, 'hr_enriched_nodes_2.csv'))
case_to_leg = pd.read_csv(os.path.join(inputpath, 'hr_simple_legislation_links.csv'))
cases_links = pd.read_csv(os.path.join(inputpath, 'hr_simple_links.csv'))
leg_to_leg_nodes = pd.read_csv(os.path.join(inputpath, 'leg_to_leg_nodes_min10.csv'))
communities = pd.read_csv(os.path.join(inputpath, 'leg_to_leg_communities.csv'))

In [97]:
cases = cases.set_index('id')
cases['articles'] = case_to_leg.groupby('source')['title'].apply(lambda l: list(l)) #".".join(list(sorted(l))))
cases = cases.reset_index()

In [71]:
def create_subnetwork(articles, legislation, nodes_in, links_in, 
                      include_referenced=False,
                     exclude_singletons=False):
    case_ids = legislation[legislation['title'].isin(articles)]['source'].unique()
    
    if include_referenced:
        links_sub = links_in[links_in['source'].isin(case_ids) | links_in['target'].isin(case_ids)]
        case_ids_extended = set(links_sub['source']).union(set(links_sub['target'])).union(case_ids)
        nodes_sub = nodes_in[nodes_in['id'].isin(case_ids_extended)].copy()
        nodes_sub['in_set'] = nodes_sub['id'].isin(case_ids)
    else:
        nodes_sub = nodes_in[nodes_in['id'].isin(case_ids)]
        links_sub = links_in[links_in['source'].isin(case_ids)]
        links_sub = links_sub[links_sub['target'].isin(case_ids)]
    
    if exclude_singletons:
        case_ids = set(links_sub['source']).union(set(links_sub['target']))
        nodes_sub = nodes_in[nodes_in['id'].isin(case_ids)]

    return nodes_sub, links_sub

In [78]:
def save_network(nodes_sub, links_sub, title, filename):
    nodes = nodes_sub.copy()
    links = links_sub.copy()
    
    lido_url = 'http://linkeddata.overheid.nl/terms/jurisprudentie/id/'
    rechtspraak_url = 'https://uitspraken.rechtspraak.nl/inziendocument?id='
    nodes['id'] = nodes['id'].str.replace(lido_url, rechtspraak_url)
    for a in ['link_id', 'source', 'target']:
        links[a] = links[a].str.replace(lido_url, rechtspraak_url)
    links = links.rename(columns={'link_id': 'id'})
    nodes = nodes.fillna('')
    nodes, links = caselawnet.get_network(nodes.to_dict(orient='rows'), links.to_dict(orient='rows'))
    caselawnet.utils.to_sigma_json(nodes, links, title, filename)

# Example: employer liability

In [91]:
nodes_wga, links_wga = create_subnetwork(
    ["Burgerlijk Wetboek Boek 7, Artikel 658", "Burgerlijk Wetboek Boek 7, Artikel 611"], 
    case_to_leg, cases, cases_links)
save_network(nodes_wga, links_wga, 'wga', os.path.join(inputpath, 'subnetworks/wga.json'))
print(nodes_wga.shape, links_wga.shape)

(127, 9) (53, 5)


In [92]:
nodes_wga_extended, links_wga_extended = create_subnetwork(
    ["Burgerlijk Wetboek Boek 7, Artikel 658", "Burgerlijk Wetboek Boek 7, Artikel 611"], 
    case_to_leg, cases, cases_links, include_referenced=True)
save_network(nodes_wga_extended, links_wga_extended, 'wga extended', os.path.join(inputpath, 'subnetworks/wga_extended.json'))
print(nodes_wga_extended.shape, links_wga_extended.shape)

(197, 10) (154, 5)


In [93]:
nodes_wga_extended2, links_wga_extended2 = create_subnetwork(
    ["Burgerlijk Wetboek Boek 7, Artikel 658", "Burgerlijk Wetboek Boek 7, Artikel 611"], 
    case_to_leg, cases, cases_links, include_referenced=True, exclude_singletons=True)
save_network(nodes_wga_extended2, links_wga_extended2, 'wga extended', os.path.join(inputpath, 'subnetworks/wga_extended_connected.json'))
print(nodes_wga_extended2.shape, links_wga_extended2.shape)

(146, 9) (154, 5)


# Now for all communities

In [109]:
links.head()

Unnamed: 0,link_id,source,target,link_type,link_type_label
6582,http://linkeddata.overheid.nl/terms/linktype/i...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/linktype/i...,Door computer herkende referentie
6835,http://linkeddata.overheid.nl/terms/linktype/i...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/linktype/i...,Door computer herkende referentie
6866,http://linkeddata.overheid.nl/terms/linktype/i...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/linktype/i...,Door computer herkende referentie
6878,http://linkeddata.overheid.nl/terms/linktype/i...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/linktype/i...,Door computer herkende referentie
6879,http://linkeddata.overheid.nl/terms/linktype/i...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/jurisprude...,http://linkeddata.overheid.nl/terms/linktype/i...,Door computer herkende referentie


In [115]:
for i, comm in communities.iterrows():
        comm_nr = comm['community']
        leg_names = list(leg_to_leg_nodes[leg_to_leg_nodes['louvain_weighted_sub']==comm_nr]['name'])
        # Regular
        nodes, links = create_subnetwork(leg_names, case_to_leg, cases, cases_links)
        save_network(nodes, links, comm['nodes'], os.path.join(inputpath, 'subnetworks', str(comm_nr)+'.json'))
        # No singletons
        nodes, links = create_subnetwork(leg_names, case_to_leg, cases, cases_links, exclude_singletons=True)
        communities.loc[i, 'nr_connected'] = len(nodes)
        save_network(nodes, links, comm['nodes'], 
                     os.path.join(inputpath, 'subnetworks', str(comm_nr)+'_connected.json'))
         # Extended
        nodes, links = create_subnetwork(leg_names, case_to_leg, cases, cases_links, include_referenced=True)
        communities.loc[i, 'nr_extended'] = len(nodes)
        save_network(nodes, links, comm['nodes'], 
                     os.path.join(inputpath, 'subnetworks', str(comm_nr)+'_extended.json'))
        # Extended, no singletons
        nodes, links = create_subnetwork(leg_names, case_to_leg, cases, cases_links, include_referenced=True, 
                                         exclude_singletons=True)
        communities.loc[i, 'nr_extended_connected'] = len(nodes)
        save_network(nodes, links, comm['nodes'], 
                     os.path.join(inputpath, 'subnetworks', str(comm_nr)+'_extended_connected.json'))



In [123]:
communities.to_csv(os.path.join(inputpath, 'leg_to_leg_communities_counts.csv'), index=False)