In [3]:
import os
import pandas as pd

In [4]:
# load cases, links and legislation
inputpath = '/media/sf_VBox_Shared/CaseLaw/graphs/lido/'
cases = pd.read_csv(os.path.join(inputpath, 'hr_simple_nodes.csv'))
legislation = pd.read_csv(os.path.join(inputpath, 'hr_simple_legislation_links.csv'))

In [5]:
leg_per_node = legislation.groupby('source')['title'].apply(lambda l: "|".join(list(sorted(l))))

In [6]:
cases = cases.set_index('id')
cases['leg'] = leg_per_node
cases = cases.reset_index()

In [7]:
legislation.groupby('title')['source'].nunique().sort_values(ascending=False).head(20)

title
Wet op de rechterlijke organisatie, Artikel 81     12732
Wetboek van Strafvordering                          2043
Wetboek van Strafrecht                              1395
Wet op de rechterlijke organisatie, Artikel 80a     1222
Wet waardering onroerende zaken                      708
Opiumwet                                             624
Wetboek van Strafvordering, Artikel 457              614
Wetboek van Strafvordering, Artikel 359              603
Wetboek van Strafvordering, Artikel 552a             567
Wetboek van Strafvordering, Artikel 440              432
Wet op de omzetbelasting 1968                        413
Burgerlijk Wetboek Boek 6                            410
Wetboek van Burgerlijke Rechtsvordering              402
Algemene wet inzake rijksbelastingen                 400
Wetboek van Strafvordering, Artikel 365a             397
Wetboek van Strafvordering, Artikel 359a             338
Algemene wet bestuursrecht, Artikel 8:41             337
Algemene wet bestuursrech

## Investigate legislation network

In [8]:
legislation.columns

Index(['link_id', 'source', 'article', 'link_type', 'link_type_label',
       'title'],
      dtype='object')

In [9]:
legislation.groupby('title')['article'].nunique().sort_values(ascending=False)

title
Wetboek van Strafvordering                                                                  81
Wetboek van Strafrecht                                                                      70
Algemene wet bestuursrecht                                                                  68
Wet inkomstenbelasting 2001                                                                 54
Algemene wet inzake rijksbelastingen                                                        50
Wet milieubeheer                                                                            49
Wegenverkeerswet 1994                                                                       46
Gemeentewet                                                                                 45
Wet op de vennootschapsbelasting 1969                                                       43
Wetboek van Burgerlijke Rechtsvordering                                                     41
Invorderingswet 1990                        

In [10]:
legislation_title_references = legislation[['source', 'title']].drop_duplicates()

In [11]:
legislation_title_references.shape

(71834, 2)

In [21]:
legislation_title_references.to_csv(os.path.join(inputpath, 'hr_case_leg_titles.csv'))

# Collapse network

In [16]:
import scipy.sparse

In [12]:
ltr_df = legislation_title_references.set_index(['source', 'title'])
ltr_df['cnt'] = 1
case_leg_adj_df = ltr_df.unstack(fill_value=0)

In [17]:
case_leg_adj_m = scipy.sparse.coo_matrix(case_leg_adj_df.values)

In [18]:
case_leg_adj_m.shape

(25446, 7171)

In [20]:
case_to_case = case_leg_adj_m.dot(case_leg_adj_m.T)
leg_to_leg = case_leg_adj_m.T.dot(case_leg_adj_m)

In [32]:
case_to_case = case_to_case.asformat('coo')

In [34]:
leg_to_leg = leg_to_leg.tocoo()

In [77]:
leg_to_leg = scipy.sparse.triu(leg_to_leg, 1)

case_to_case = scipy.sparse.triu(case_to_case, 1)

In [79]:
case_to_case, leg_to_leg

(<25446x25446 sparse matrix of type '<class 'numpy.int64'>'
 	with 86460934 stored elements in COOrdinate format>,
 <7171x7171 sparse matrix of type '<class 'numpy.int64'>'
 	with 70999 stored elements in COOrdinate format>)

In [80]:
leg_labels = pd.Series(case_leg_adj_df.columns.get_level_values(1)).as_matrix()
sources = leg_labels[leg_to_leg.row]
targets = leg_labels[leg_to_leg.col]
values = leg_to_leg.data

In [81]:
len(sources), len(targets), len(values)

(70999, 70999, 70999)

In [82]:
leg_to_leg_df = pd.DataFrame(data={'source': sources, 'target': targets, 'cnt': values}) # columns=['source', 'target', 'cnt'])
leg_to_leg_df.head()

Unnamed: 0,cnt,source,target
0,1,Aanbestedingswet 2012,"Aanbestedingswet 2012, Artikel 4.30"
1,1,Aanbestedingswet 2012,"Aanbestedingswet 2012, Artikel 1.4"
2,1,Aanbestedingswet 2012,Wet implementatie rechtsbeschermingsrichtlijne...
3,1,Aanbestedingswet 2012,Wet implementatie rechtsbeschermingsrichtlijne...
4,1,Aanbestedingswet 2012,Wet implementatie rechtsbeschermingsrichtlijne...


In [84]:
leg_to_leg_df.to_csv(os.path.join(inputpath, '2018-01-03 leg_to_leg_titles.csv'), index=False)

In [85]:
leg_to_leg_df_min10 = leg_to_leg_df[leg_to_leg_df['cnt']>=10]
leg_to_leg_df_min10.to_csv(os.path.join(inputpath, '2018-01-03 leg_to_leg_titles_min10.tsv'), index=False, sep='\t')

In [None]:
case_to_case_min2 

In [86]:
case_labels = pd.Series(case_leg_adj_df.index).as_matrix()
sources = case_labels[case_to_case.row]
targets = case_labels[case_to_case.col]
values = case_to_case.data

In [88]:
# TODO: this is a very dense network
case_to_case_df = pd.DataFrame([sources, targets, values], columns=['source', 'target', 'cnt'])
case_to_case_df.head()

MemoryError: 

## Create subnetwork

In [3]:
def create_subnetwork(articles, legislation, nodes_in, links_in):
    case_ids = legislation[legislation['title'].isin(articles)]['source'].unique()
    nodes_sub = nodes_in[nodes_in['id'].isin(case_ids)]
    links_sub = links_in[links_in['source'].isin(case_ids)]
    links_sub = links_sub[links_sub['target'].isin(case_ids)]
    return nodes_sub, links_sub

In [4]:
nodes_wga, links_wga = create_subnetwork(
    ["Burgerlijk Wetboek Boek 7, Artikel 658", "Burgerlijk Wetboek Boek 7, Artikel 611"], 
    legislation, cases, links)
print(nodes_wga.shape, links_wga.shape)

NameError: name 'legislation' is not defined