In [34]:
from __future__ import annotations

import sys
import platform

from pathlib import Path
from datetime import datetime

In [35]:
root = Path.cwd().resolve()
if root.name == "notebooks":
    root = root.parent

data = root / "data"
raw = data / "raw"
interim = data / "interim"
processed = data / "processed"

reports = root / "reports"
figures = reports / "figures"
tables = reports / "tables"
notes = reports / "notes"

print("Project Root:", root)
print("Raw Data Folder:", raw)

Project Root: /Users/Sumaitat/Coding Projects/ML_SupremeCourtOutcomePrediction
Raw Data Folder: /Users/Sumaitat/Coding Projects/ML_SupremeCourtOutcomePrediction/data/raw


In [36]:
required = [
    root / "README.md",
    root / "src",
    root / "notebooks",
    data,
    reports,
]

missing = [p for p in required if not p.exists()]
if missing:
    raise FileNotFoundError("Missing Paths:\n" + "\n".join(str(p) for p in missing))

print("Project structure is correct.")

Project structure is correct.


In [37]:
print("Python Version:", sys.version.split()[0])
print("Python Executable:", sys.executable)
print("Working Directory:", Path.cwd().resolve())
print("Virtual Environment Active:", ".venv" in sys.executable)
print("OS:", platform.platform())

Python Version: 3.14.2
Python Executable: /Users/Sumaitat/Coding Projects/ML_SupremeCourtOutcomePrediction/.venv/bin/python
Working Directory: /Users/Sumaitat/Coding Projects/ML_SupremeCourtOutcomePrediction/notebooks
Virtual Environment Active: True
OS: macOS-26.2-arm64-arm-64bit-Mach-O


In [38]:
conv_fp = raw / "supreme.conversations.txt"
votes_fp = raw / "supreme.votes.txt"
out_fp = raw / "supreme.outcome.txt"
gender_fp = raw / "supreme.gender.txt"

files = [conv_fp, votes_fp, out_fp, gender_fp]
for fp in files:
    if not fp.exists():
        raise FileNotFoundError(f"Missing dataset file: {fp.name}")
    print("Found file:", fp.name)

sep = "+++$+++"


def peek(fp: Path, n: int = 2) -> list[str]:
    with fp.open("r", encoding="utf-8", errors="replace") as f:
        return [next(f).rstrip("\n") for _ in range(n)]

Found file: supreme.conversations.txt
Found file: supreme.votes.txt
Found file: supreme.outcome.txt
Found file: supreme.gender.txt
Checking file formats.


In [39]:
for name, fp, show in [
    ("Conversations", conv_fp, 160),
    ("Outcome", out_fp, None),
    ("Votes", votes_fp, 160),
    ("Gender", gender_fp, None),
]:
    lines = peek(fp, 2)
    cnt = [len(s.split(sep)) for s in lines]
    print(f"{name} fields per line:", cnt)
    if show is None:
        print("Sample:", lines[0])
    else:
        print("Sample:", lines[0][:show])

try:
    import src  # noqa: F401
    print("Imports from src work.")
except Exception:
    if str(root) not in sys.path:
        sys.path.insert(0, str(root))
    import src  # noqa: F401
    print("Imports from src work after fixing sys.path.")

for p in [figures, tables, notes, interim, processed]:
    p.mkdir(parents=True, exist_ok=True)

Conversations fields per line: [8, 8]
Sample: 02-1472 +++$+++ 2 +++$+++ FALSE +++$+++ JUSTICE STEVENS +++$+++ JUSTICE +++$+++ PETITIONER +++$+++  +++$+++ We will now hear argument in the Cherokee Nation aga
Outcome fields per line: [2, 2]
Sample: 04-373 +++$+++ RESPONDENT
Votes fields per line: [10, 10]
Sample: 02-1472 +++$+++ THOMAS::PETITIONER +++$+++ STEVENS::PETITIONER +++$+++ KENNEDY::PETITIONER +++$+++ REHNQUIST::NA +++$+++ GINSBURG::PETITIONER +++$+++ OCONNOR::P
Gender fields per line: [2, 2]
Sample: BRUNSTAD +++$+++ male
Imports from src work.
