In [2]:
import polars as pl
import requests
import os
import urllib.parse
from fuzzywuzzy import fuzz

API_KEY = os.environ["CL_API_KEY"]

In [3]:
fact_pattern = pl.read_csv("./Cases/Westlaw/Fair Use Fact Pattern Precision Search.csv", truncate_ragged_lines=True)
defense = pl.read_csv("./Cases/Westlaw/Fair Use Defense Precision Search.csv", truncate_ragged_lines=True)

In [None]:
westlaw = pl.concat([fact_pattern, defense])
westlaw = westlaw.select(["Title", "Court Line", "Citation", "Filed Date"]).unique()

westlaw.head()

Title,Court Line,Citation
str,str,str
"""Lombardo v. Dr…","""United States …","""279 F.Supp.3d …"
"""Hosseinzadeh v…","""United States …","""276 F.Supp.3d …"
"""TCA Television…","""United States …","""839 F.3d 168"""
"""Perfect 10, In…","""United States …","""508 F.3d 1146"""
"""Basic Books, I…","""United States …","""758 F.Supp. 15…"


In [None]:
def find_opinion_by_case_name(case_name):

    case_name = urllib.parse.quote(case_name)

    URL = f"https://www.courtlistener.com/api/rest/v4/search/?type=o&q='{case_name}'"

    # defining a params dict for the parameters to be sent to the API
    header =  {"Authorization": f"Token {API_KEY}"}

    # sending get request and saving the response as response object
    r = requests.get(url = URL, headers = header)

    if r.ok:

        try:
            url = r.json()["results"][0]['absolute_url']
            case_name = r.json()["results"][0]['caseName']

            return url, case_name
        
        except IndexError:
            
            return None, None
        

In [82]:
westlaw = westlaw.with_columns(
    pl.col("Title").map_elements(find_opinion_by_case_name).alias("cases")
)

In [86]:
westlaw = westlaw.with_columns(
    pl.col("cases").map_elements(lambda x: x[0]).alias("OpinionURL"),
    pl.col("cases").map_elements(lambda x: x[1]).alias("CourtListenerCaseName")
).drop("cases")

In [None]:
westlaw = westlaw.with_columns(
    pl.struct(["Title", "CourtListenerCaseName"]).map_elements(lambda x: fuzz.token_set_ratio(x["Title"], x["CourtListenerCaseName"])).alias("Match")
).filter( pl.col("Match") < 95 ).sort( by = "Match")

In [77]:
def find_opinion_by_citation(case_citation):

    header =  {"Authorization": f"Token {API_KEY}"}

    URL = f"https://www.courtlistener.com/api/rest/v4/citation-lookup/"

    data = {"text": case_citation}

    # sending get request and saving the response as response object
    r = requests.post(url = URL, headers = header, data = data)

    if r.ok:
        response = r.json()

        
        if len(response) == 1:
            response = response[0]

            if response["status"] != 200:
                return None
            
            else:

                cluster_id = response["clusters"][0]["id"]
                url = response["clusters"][0]["absolute_url"]
                docket_id = response["clusters"][0]["docket_id"]

                return cluster_id, url, docket_id
        
        else:
            
            return None