In [1]:
%cd -q ../..

In [None]:
import asyncio
import json
import os
import sqlite3
from pathlib import Path
from dotenv import load_dotenv

import aiohttp

from scotus_metalang.diachronic_analysis import cap

In [40]:
# Retrieve all null author opinions from API based on opinions table
connection = sqlite3.connect("api_log.db")

with connection:
    rows = connection.execute("""--sql
                              SELECT cases.selected_case_id, cases.docket_number
                              FROM cases
                              INNER JOIN opinions ON cases.docket_number = opinions.docket_number
                              WHERE opinions.cap_author IS NULL
                              """).fetchall()
case_ids_with_null_author_op = {row[0]: row[1] for row in rows}
len(case_ids_with_null_author_op)

68

In [21]:
load_dotenv()
CAP_TOKEN = os.environ["CAP_TOKEN"]


In [46]:
async def save_null_author_opinions(case_id, docket_number: str, session):
    case_json = await cap.case_json_by_id(case_id, session)
    status = case_json["casebody"]["status"]

    if status != "ok":
        # E.g. 'error_limit_exceeded'
        raise RuntimeError(f"Bad API response status: {status}")

    if not cap.opinions_key_exists(case_json):
        print(f"opinion key doesn't exist for: {docket_number}")

    for i, opinion in enumerate(case_json["casebody"]["data"]["opinions"]):
        if opinion["author"] is not None:
            continue

        save_dir = Path(f"data/cap/null_authors")
        Path.mkdir(save_dir, parents=True, exist_ok=True)

        simplified_json = {"cap_id": case_id, "docket_number": docket_number, "decision_date": case_json["decision_date"],
                       "author": None, "opinion_type": opinion["type"].lower(), "text": opinion["text"]}

        with open(Path(save_dir, f"{docket_number}_{i}.json"), "w") as f:
            json.dump(simplified_json, f)


In [45]:
async def main():
    connector = aiohttp.TCPConnector(limit_per_host=10)
    headers={"Authorization": f"Token {CAP_TOKEN}"}
    async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
        for i, (case_id, docket_number) in enumerate(case_ids_with_null_author_op.items()):
            result = await save_null_author_opinions(case_id, docket_number, session)
            print("done with ", i)

In [None]:
x = await main()

x = asyncio.create_task(main())
await x
y = x.result()

In [47]:
await main()

done with  0
done with  1
done with  2
done with  3
done with  4
done with  5
done with  6
done with  7
done with  8
done with  9
done with  10
done with  11
done with  12
done with  13
done with  14
done with  15
done with  16
done with  17
done with  18
done with  19
done with  20
done with  21
done with  22
done with  23
done with  24
done with  25
done with  26
done with  27
done with  28
done with  29
done with  30
done with  31
done with  32
done with  33
done with  34
done with  35
done with  36
done with  37
done with  38
done with  39
done with  40
done with  41
done with  42
done with  43
done with  44
done with  45
done with  46
done with  47
done with  48
done with  49
done with  50
done with  51
done with  52
done with  53
done with  54
done with  55
done with  56
done with  57
done with  58
done with  59
done with  60
done with  61
done with  62
done with  63
done with  64
done with  65
done with  66
done with  67


In [48]:
import csv

In [57]:
with open("data/cap/null_authors/fix.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["docket_number", "cap_id", "first_100_chars"])
    for filepath in Path("data/cap/null_authors").glob("*.json"):
        with open(filepath, "r") as g:
            opinion = json.load(g)
        writer.writerow([opinion["docket_number"], opinion["cap_id"], opinion["text"][:100].replace("\n", "\\")])

In [None]:
load_dotenv()
CAP_TOKEN = os.environ["CAP_TOKEN"]
async def main():
    connector = aiohttp.TCPConnector(limit_per_host=10)
    headers={"Authorization": f"Token {CAP_TOKEN}"}
    async with aiohttp.ClientSession(connector=connector, headers=headers) as session:
        api_response = await cap.cases_by_docket_number(docket_number, session)
        case_id = cap.id_of_longest_casebody(api_response)
        case_json = await cap.case_json_by_id(case_id, session)
# Case not found
if api_response["count"] == 0:
    case_params = {"docket_number": docket_number,
                    "case_status": "not_found",
                    "selected_case_id": None,
                    "decision_date": None}
    return case_params, []



if (cap_author := opinion["author"]) is None:
        return opinion_params

In [None]:
simplified_json = {"cap_id": case_id, "docket_number": docket_number, "decision_date": case_json["decision_date"],
                       "author": None, "opinion_type": opinion["type"].lower(), "text": opinion["text"]}

In [None]:
# Save all null_author opinions to null_author folder

# Create CSV with [docket_number, author, text[:100]]

# Open CSV and add authors where applicable, otherwise n/a

# Read CSV and move/rename opinions that are not actually null into appropriate author folders