In [None]:
%load_ext rich

In [None]:
from corpus_x import Statute
from corpus_x.resources import STATUTE_PATH
STATUTE_PATH

In [3]:
from sqlpyd import Connection
c = Connection(DatabasePath="x.db", WAL=True)

In [None]:
from corpus_pax.utils import delete_tables_with_prefix
delete_tables_with_prefix(c=c, target_prefixes=["lex_tbl"])

In [None]:
Statute.make_tables(c)

In [None]:
Statute.add_rows(c)

In [None]:
from corpus_x.statutes import StatuteFoundInUnit
StatuteFoundInUnit.update_statute_ids(c)  

In [None]:
from corpus_x.codifications import Codification, CodeRow
Codification.make_tables(c)

In [None]:
Codification.add_rows(c)

In [None]:
from corpus_x.codifications import CodeRow
[CodeRow.set_update_units(c, row["id"]) for row in c.db[CodeRow.__tablename__].rows]

## Corpus-X

### Preparatory steps from files to db

The pre-processed data can now be used to insert related Statutes and Citations of each Opinion back into the database. 

The statute and inclusion tables need to be created before the pre-processed data can be inserted.

In [None]:
from corpus_x.inclusions import Inclusion
Inclusion.make_tables(c) # note that statutes need to exist first

### Move content from files to db

Collect the pre-processed data and insert the same into the created database tables. 

Estimate at the end of 2022 (factors to consider):

1. the last time data was scraped as raw files,
2. the time separate opinions were manually included

Result: about ~484k `CitationsInOpinions` and ~99k `StatutesInOpinions` records. 

In [None]:
from corpus_x.inclusions import populate_db_with_inclusions
populate_db_with_inclusions(c) 

### Ensure existence of component elements

What exists in the database are records of statutes but not the statutes themselves. In other words, the foreign key included in the `StatuteInOpinions` table does not yet have a counterpart in the `StatuteRow` table. 

Note that the `CitationInOpinions` will have a counterpart in the `DecisionRow` table since this was processed first.

In [None]:
from corpus_x.inclusions import StatuteInOpinion, CitationInOpinion
StatuteInOpinion.add_statutes(c) # takes 2-3 minutes to store 500 objects
StatuteInOpinion.update_statute_ids(c)
CitationInOpinion.update_decision_ids(c)

In [1]:
from corpus_x.inclusions import StatuteInOpinion, CitationInOpinion

Executing <Task pending name='Task-3' coro=<Kernel.dispatch_queue() running at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py:512> wait_for=<Future pending cb=[Task.task_wakeup()] created at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/queues.py:248> cb=[IOLoop.add_future.<locals>.<lambda>() at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/ioloop.py:687] created at /Users/mv/.pyenv/versions/3.11.0/lib/python3.11/asyncio/tasks.py:670> took 0.394 seconds


In [4]:
StatuteInOpinion.add_statutes(c)

Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/ra/386/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id')
Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/act/3815/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id')
Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/const/1987/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id')
Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/ra/7659/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id')
Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/spain/penal/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id')
Did not make statute content_file=PosixPath('/Users/mv/code/corpus/statutes/roc/cpr/details.yaml'); e=IntegrityError('UNIQUE constraint failed: lex_tbl_statutes.id

Executing <Task pending name='Task-3' coro=<Kernel.dispatch_queue() running at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py:512> wait_for=<Future pending cb=[Task.task_wakeup()] created at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/queues.py:248> cb=[IOLoop.add_future.<locals>.<lambda>() at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/ioloop.py:687] created at /Users/mv/.pyenv/versions/3.11.0/lib/python3.11/asyncio/tasks.py:670> took 126.824 seconds


In [5]:
StatuteInOpinion.update_statute_ids(c)

<sqlite3.Cursor at 0x1077e7c80>

Executing <Task pending name='Task-3' coro=<Kernel.dispatch_queue() running at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py:512> wait_for=<Future pending cb=[Task.task_wakeup()] created at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/queues.py:248> cb=[IOLoop.add_future.<locals>.<lambda>() at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/ioloop.py:687] created at /Users/mv/.pyenv/versions/3.11.0/lib/python3.11/asyncio/tasks.py:670> took 0.893 seconds


In [6]:
CitationInOpinion.update_decision_ids(c)

<sqlite3.Cursor at 0x1125dc5f0>

Executing <Task pending name='Task-3' coro=<Kernel.dispatch_queue() running at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/ipykernel/kernelbase.py:512> wait_for=<Future pending cb=[Task.task_wakeup()] created at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/queues.py:248> cb=[IOLoop.add_future.<locals>.<lambda>() at /Users/mv/Code/corpus-x/.venv/lib/python3.11/site-packages/tornado/ioloop.py:687] created at /Users/mv/.pyenv/versions/3.11.0/lib/python3.11/asyncio/tasks.py:670> took 10.959 seconds


With `StatuteRow` and `CitationRow` tables already in existence, can proceed to add the `CodeRow` table.

### Add Codifications 

In [None]:
from corpus_x.codifications import Codification, CodeStatuteEvent
Codification.make_tables(c) 
Codification.add_rows(c) # takes about 1-2 minutes


Determine Codifications that are missing affector paths, i.e. improper use of `item`, `caption`, `content` in matching an event to a Statute unit.

In [None]:
from corpus_x.codifications import CodeStatuteEvent
if matches := CodeStatuteEvent.fetch_unmaterialized(c):
    print(f"Violating {len(matches)=}; review violators via SQL.")