In [1]:
from eyecite import get_citations
from eyecite.models import FullCaseCitation, ShortCaseCitation
import polars as pl

In [2]:
## Get all case law
wl = pl.concat([pl.read_csv("./Cases/WestLawMatch.csv"), pl.read_csv("./Cases/WestLawNotMatch.csv")])

In [3]:
## Those that are matched
df = pl.read_csv("./Cases/CourtListenerOpinions.csv")
## All fair use cases
fair_use = [c for c in wl["Citation"]]

In [4]:
## Get Fair Use Case Matches
def get_fair_use_citations(doc, fair_use = fair_use):

    citations = [c.matched_text() for c in get_citations(doc) if type(c) is FullCaseCitation or type(c) is ShortCaseCitation]

    cited = set(citations).intersection(set(fair_use))

    return cited

In [5]:
## Construct a list of all cited cases
df = df.with_columns(
    pl.col("Document").map_elements(get_fair_use_citations).alias("CitedCases")
)

In [7]:
df.head()

OpinionURL,CourtListenerCaseName,Citation,ClusterID,DocketID,SubOpinions,PrecedentialStatus,OpinionType,Document,CitedCases
str,str,str,i64,i64,str,str,str,str,object
"""/opinion/73319…","""Gym Door Repai…","""206 F.Supp.3d …",7331944,64320429,"""https://www.co…","""Published""","""020lead""","""<opinion type=…","{'471 U.S. 539', '206 F.Supp.3d 869'}"
"""/opinion/73316…","""Bell v. Moawad…","""326 F.Supp.3d …",7331657,64320141,"""https://www.co…","""Published""","""020lead""","""<opinion type=…","{'336 F.3d 811', '471 U.S. 539', '869 F.3d 848', '796 F.2d 1148', '464 U.S. 417', '227 F.3d 1110', '512 F.3d 522', '74 F.Supp.3d 605', '510 U.S. 569', '508 F.3d 1146', '725 F.3d 1170', '239 F.3d 1004', '447 F.3d 769'}"
"""/opinion/73319…","""Gym Door Repai…","""331 F.Supp.3d …",7331944,64320429,"""https://www.co…","""Published""","""020lead""","""<opinion type=…","{'471 U.S. 539', '206 F.Supp.3d 869'}"
"""/opinion/48608…","""Midlevelu, LLC…","""989 F.3d 1205""",4860821,59701417,"""https://www.co…","""Published""","""010combined""","""<pre class=""in…","{'804 F.3d 202', '471 U.S. 539', '755 F.3d 87', '902 F.2d 829', '510 U.S. 569', '918 F.3d 723', '495 U.S. 207'}"
"""/opinion/24565…","""Infinity Broad…","""150 F.3d 104""",2456581,2328910,"""https://www.co…","""Published""","""010combined""","""<div> <center>…",set()


In [8]:
## Obtain "adjacency list" of cases and their cited counterparts
df_citation = df.select(["CourtListenerCaseName", "CitedCases"])

citation_rel = df_citation.with_columns(
                pl.col("CitedCases").map_elements(list)
                ).explode("CitedCases").unique().sort("CourtListenerCaseName")

In [9]:
name_citation_map = wl.select(["Title", "Citation"]).unique().rename({"Title": "CitedCaseName"})

In [None]:
citation_rel.rename({"CitedCases": "Citation"}).join(name_citation_map, on = ["Citation"], how = "inner").write_csv("./Cases/CL_CitationRelationship.csv")

In [12]:
## For WestLaw No Match
wl_nm = pl.read_csv("./Cases/WLNM_Opinion.csv")

## Get citations
wl_nm = wl_nm.with_columns(
    pl.col("Opinion").map_elements(get_fair_use_citations).alias("CitedCases")
)

## Obtain "adjacency list" of cases and their cited counterparts
wl_nm_citation = wl_nm.select(["Title", "CitedCases"])

wl_nm_citation_rel = wl_nm_citation.with_columns(
                pl.col("CitedCases").map_elements(list)
                ).explode("CitedCases").unique().sort("Title")

In [14]:
wl_nm_citation_rel.rename({"CitedCases": "Citation"}).join(name_citation_map, on = ["Citation"], how = "inner").write_csv("./Cases/WL_CitationRelationship.csv")