In [319]:
%%writefile  README.md
# The Investigation Kit (TIK) — Orwell-style Investigator Workbench

## Overview
TIK is a desktop investigation workbench that mirrors the UI/UX rhythm of *Orwell*: a two-pane layout (Reader/Listener/Insider/Profiler/Objectives/Log), auto-highlighted "datachunks" in sources, drag-drop of datachunks into a subject profile, conflict grouping & resolution, objectives evaluation, graph & timeline, and an advisor overlay.

### Why **DataChunk-first**
Instead of mutating entity fields directly, every extracted fact is stored as a **DataChunk** with provenance (source_id), confidence, and an optional `conflict_set_id`. This preserves auditability, enables reversible decisions, and allows parallel contradictory evidence until resolution rules (or the analyst) choose a winner.

## Architecture
- **UI (PyQt6)**: `ui/qt` — QMainWindow Shell → `QSplitter` with Left (Profile/Graph/Timeline tabs) & Right (Reader via `QWebEngineView`). Drag-drop from HTML `<span data-chunk>` into profile fields.
- **API (FastAPI)**: `backend/app` — routers for cases, entities, sources, chunks, objectives, graph. Services: `highlight` (spaCy+regex), `conflict` (group & winner), `objective_rules`.
- **DB (SQLAlchemy + Alembic)**: Core schema below; demo uses SQLite, Postgres ready.
- **Search**: simple ILIKE; trigram stub with upgrade path to ES/PG trigram.
- **NLP**: spaCy (optional at runtime) + regex to produce HTML with `<span draggable data-chunk='...'>`.

### Core Schema (simplified)
- `case`
- `entity` (type, attrs JSON)
- `source` (kind, raw, parsed_html, case_id, meta)
- `data_chunk` (field, value, source_id, entity_id?, confidence, conflict_set_id?, case_id)
- `edge` (src_entity, dst_entity, type, evidence JSON)
- `objective` (rule JSON, status)
- `audit` (who, what, when, details JSON)

## Flow
**Open Case → Reader highlight → DragDrop → Commit DataChunk → Conflict grouping → Objectives evaluate → Graph/Timeline update → Advisor overlay**

## Quickstart
```bash
make setup
make db.init
make db.seed
make dev


Overwriting README.md


In [320]:
%%writefile requirements.txt
fastapi==0.111.0
uvicorn[standard]==0.30.1
pydantic==2.7.3
pydantic-settings==2.3.4
SQLAlchemy==2.0.31
alembic==1.13.2
psycopg2-binary==2.9.9
jinja2==3.1.4
orjson==3.10.6
python-dotenv==1.0.1
loguru==0.7.2
httpx==0.27.0

# NLP
spacy==3.7.4
# demo uses en_core_web_sm via scripts/load_spacy.sh

# UI
PyQt6==6.7.0
PyQt6-WebEngine==6.7.0
networkx==3.3

# Tests
pytest==8.2.0


Overwriting requirements.txt


In [321]:
%%writefile .env.example
# Database: choose one
SQLITE_URL=sqlite:///./tik_demo.db
# Example Postgres URL (uncomment to use)
# POSTGRES_URL=postgresql+psycopg2://user:password@localhost:5432/tik

API_HOST=127.0.0.1
API_PORT=8000

SPACY_MODEL=en_core_web_sm
LOG_LEVEL=INFO


Overwriting .env.example


In [322]:
%%writefile backend/app/main.py
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger

from backend.app.config import settings
from backend.app.db import init_db
from backend.app.routers import cases, entities, sources, chunks, objectives, graph

app = FastAPI(title="TIK API", version="0.1.0")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True, allow_methods=["*"], allow_headers=["*"],
)

@app.on_event("startup")
def on_startup():
    logger.remove()
    logger.add(lambda msg: print(msg, end=""), level=settings.LOG_LEVEL)
    init_db()
    logger.info("API started")

@app.get("/health")
def health():
    return {"status": "ok"}

app.include_router(cases.router, prefix="/cases", tags=["cases"])
app.include_router(entities.router, prefix="/entities", tags=["entities"])
app.include_router(sources.router, prefix="/sources", tags=["sources"])
app.include_router(chunks.router, prefix="/chunks", tags=["chunks"])
app.include_router(objectives.router, prefix="/objectives", tags=["objectives"])
app.include_router(graph.router, prefix="/graph", tags=["graph"])


Overwriting backend/app/main.py


In [323]:
%%writefile backend/app/config.py
from pydantic_settings import BaseSettings, SettingsConfigDict

class Settings(BaseSettings):
    SQLITE_URL: str = "sqlite:///./tik_demo.db"
    POSTGRES_URL: str | None = None
    API_HOST: str = "127.0.0.1"
    API_PORT: int = 8000
    SPACY_MODEL: str = "en_core_web_sm"
    LOG_LEVEL: str = "INFO"

    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")

    @property
    def database_url(self) -> str:
        return self.POSTGRES_URL or self.SQLITE_URL

settings = Settings()


Overwriting backend/app/config.py


In [324]:
%%writefile backend/app/db.py
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, DeclarativeBase
from backend.app.config import settings

class Base(DeclarativeBase):
    pass

engine = create_engine(settings.database_url, future=True)
SessionLocal = sessionmaker(bind=engine, autoflush=False, autocommit=False, future=True)

def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

def init_db() -> None:
    # real DDL via Alembic; this ensures engine is reachable early
    with engine.connect() as _:
        pass


Overwriting backend/app/db.py


In [325]:
%%writefile backend/app/models.py
from __future__ import annotations
from sqlalchemy import String, Integer, Float, ForeignKey, JSON, DateTime, Text
from sqlalchemy.orm import Mapped, mapped_column, relationship
from datetime import datetime
from backend.app.db import Base

class Case(Base):
    __tablename__ = "cases"
    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    name: Mapped[str] = mapped_column(String(200), unique=True, nullable=False)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

    entities: Mapped[list[Entity]] = relationship(back_populates="case", cascade="all,delete")
    sources: Mapped[list[Source]] = relationship(back_populates="case", cascade="all,delete")

class Entity(Base):
    __tablename__ = "entities"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    type: Mapped[str] = mapped_column(String(50), default="person")
    name: Mapped[str] = mapped_column(String(200))
    attrs: Mapped[dict] = mapped_column(JSON, default=dict)

    case: Mapped[Case] = relationship(back_populates="entities")
    chunks: Mapped[list[DataChunk]] = relationship(back_populates="entity", cascade="all,delete")

class Source(Base):
    __tablename__ = "sources"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    kind: Mapped[str] = mapped_column(String(50))  # news|chat|email|post
    title: Mapped[str] = mapped_column(String(300))
    raw: Mapped[str] = mapped_column(Text)
    parsed_html: Mapped[str | None] = mapped_column(Text, nullable=True)
    meta: Mapped[dict] = mapped_column(JSON, default=dict)

    case: Mapped[Case] = relationship(back_populates="sources")
    chunks: Mapped[list[DataChunk]] = relationship(back_populates="source", cascade="all,delete")

class DataChunk(Base):
    __tablename__ = "data_chunks"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    source_id: Mapped[int] = mapped_column(ForeignKey("sources.id"), index=True)
    entity_id: Mapped[int | None] = mapped_column(ForeignKey("entities.id"), nullable=True)
    field: Mapped[str] = mapped_column(String(100))
    value: Mapped[str] = mapped_column(String(500))
    confidence: Mapped[float] = mapped_column(Float, default=0.5)
    conflict_set_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)

    source: Mapped[Source] = relationship(back_populates="chunks")
    entity: Mapped[Entity | None] = relationship(back_populates="chunks")

class Edge(Base):
    __tablename__ = "edges"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    src_entity_id: Mapped[int] = mapped_column(ForeignKey("entities.id"))
    dst_entity_id: Mapped[int] = mapped_column(ForeignKey("entities.id"))
    type: Mapped[str] = mapped_column(String(50))
    evidence: Mapped[list[dict]] = mapped_column(JSON, default=list)

class Objective(Base):
    __tablename__ = "objectives"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    name: Mapped[str] = mapped_column(String(200))
    rule: Mapped[dict] = mapped_column(JSON, default=dict)  # e.g., {"all_of":[{"field_exists":["dob"]}]}
    status: Mapped[str] = mapped_column(String(20), default="pending")  # pending|met|failed

class Audit(Base):
    __tablename__ = "audit"
    id: Mapped[int] = mapped_column(Integer, primary_key=True)
    case_id: Mapped[int] = mapped_column(ForeignKey("cases.id"), index=True)
    action: Mapped[str] = mapped_column(String(100))
    actor: Mapped[str] = mapped_column(String(100), default="system")
    details: Mapped[dict] = mapped_column(JSON, default=dict)
    created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)


Overwriting backend/app/models.py


In [326]:
%%writefile backend/app/schemas.py
from pydantic import BaseModel, Field
from typing import Any

class CaseIn(BaseModel):
    name: str

class CaseOut(BaseModel):
    id: int
    name: str

class EntityIn(BaseModel):
    case_id: int
    type: str = "person"
    name: str
    attrs: dict[str, Any] = Field(default_factory=dict)

class EntityOut(BaseModel):
    id: int
    case_id: int
    type: str
    name: str
    attrs: dict[str, Any]

class SourceIn(BaseModel):
    case_id: int
    kind: str
    title: str
    raw: str

class SourceOut(BaseModel):
    id: int
    case_id: int
    kind: str
    title: str

class ChunkIn(BaseModel):
    case_id: int
    source_id: int
    entity_id: int | None = None
    field: str
    value: str
    confidence: float = 0.5

class ChunkOut(BaseModel):
    id: int
    field: str
    value: str
    conflict_set_id: str | None = None
    status: str

class ObjectiveIn(BaseModel):
    case_id: int
    name: str
    rule: dict

class ObjectiveOut(BaseModel):
    id: int
    name: str
    status: str
    rule: dict


Overwriting backend/app/schemas.py


In [327]:
%%writefile backend/app/repo.py
from __future__ import annotations
from sqlalchemy.orm import Session
from sqlalchemy import select, update
from backend.app import models
from backend.app.services.conflict import detect_and_group_conflict

def create_case(db: Session, name: str) -> models.Case:
    obj = models.Case(name=name)
    db.add(obj); db.commit(); db.refresh(obj)
    return obj

def list_cases(db: Session) -> list[models.Case]:
    return list(db.scalars(select(models.Case).order_by(models.Case.id)))

def create_entity(db: Session, data: dict) -> models.Entity:
    obj = models.Entity(**data)
    db.add(obj); db.commit(); db.refresh(obj)
    return obj

def get_entity(db: Session, entity_id: int) -> models.Entity | None:
    return db.get(models.Entity, entity_id)

def update_entity_attrs(db: Session, entity_id: int, attrs: dict) -> models.Entity | None:
    ent = db.get(models.Entity, entity_id)
    if not ent: return None
    ent.attrs.update(attrs)
    db.commit(); db.refresh(ent)
    return ent

def create_source(db: Session, data: dict) -> models.Source:
    obj = models.Source(**data)
    db.add(obj); db.commit(); db.refresh(obj)
    return obj

def list_sources(db: Session, case_id: int) -> list[models.Source]:
    stmt = select(models.Source).where(models.Source.case_id == case_id).order_by(models.Source.id)
    return list(db.scalars(stmt))

def get_source(db: Session, source_id: int) -> models.Source | None:
    return db.get(models.Source, source_id)

def commit_chunk(db: Session, data: dict) -> tuple[models.DataChunk, str]:
    obj = models.DataChunk(**data)
    db.add(obj); db.flush()
    status = detect_and_group_conflict(db, obj)
    db.commit(); db.refresh(obj)
    return obj, status

def list_objectives(db: Session, case_id: int) -> list[models.Objective]:
    stmt = select(models.Objective).where(models.Objective.case_id == case_id).order_by(models.Objective.id)
    return list(db.scalars(stmt))

def upsert_objective(db: Session, case_id: int, name: str, rule: dict) -> models.Objective:
    stmt = select(models.Objective).where(models.Objective.case_id==case_id, models.Objective.name==name)
    obj = db.scalars(stmt).first()
    if obj:
        obj.rule = rule
    else:
        obj = models.Objective(case_id=case_id, name=name, rule=rule)
        db.add(obj)
    db.commit(); db.refresh(obj)
    return obj


Overwriting backend/app/repo.py


In [328]:
%%writefile backend/app/routers/cases.py
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from backend.app.db import get_db
from backend.app.schemas import CaseIn, CaseOut
from backend.app import repo

router = APIRouter()

@router.get("/", response_model=list[CaseOut])
def list_cases(db: Session = Depends(get_db)):
    return [CaseOut(id=c.id, name=c.name) for c in repo.list_cases(db)]

@router.post("/", response_model=CaseOut)
def create_case(data: CaseIn, db: Session = Depends(get_db)):
    c = repo.create_case(db, data.name)
    return CaseOut(id=c.id, name=c.name)


Overwriting backend/app/routers/cases.py


In [329]:
%%writefile backend/app/routers/entities.py
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from backend.app.db import get_db
from backend.app.schemas import EntityIn, EntityOut
from backend.app import repo

router = APIRouter()

@router.post("/", response_model=EntityOut)
def create_entity(data: EntityIn, db: Session = Depends(get_db)):
    e = repo.create_entity(db, data.model_dump())
    return EntityOut(id=e.id, case_id=e.case_id, type=e.type, name=e.name, attrs=e.attrs)

@router.get("/{entity_id}", response_model=EntityOut)
def get_entity(entity_id: int, db: Session = Depends(get_db)):
    e = repo.get_entity(db, entity_id)
    if not e: raise HTTPException(404, "Entity not found")
    return EntityOut(id=e.id, case_id=e.case_id, type=e.type, name=e.name, attrs=e.attrs)

@router.patch("/{entity_id}/attrs", response_model=EntityOut)
def patch_attrs(entity_id: int, attrs: dict, db: Session = Depends(get_db)):
    e = repo.update_entity_attrs(db, entity_id, attrs)
    if not e: raise HTTPException(404, "Entity not found")
    return EntityOut(id=e.id, case_id=e.case_id, type=e.type, name=e.name, attrs=e.attrs)


Overwriting backend/app/routers/entities.py


In [330]:
%%writefile backend/app/routers/sources.py
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from backend.app.db import get_db
from backend.app.schemas import SourceIn, SourceOut
from backend.app import repo
from backend.app.services.highlight import parse_to_highlight_html

router = APIRouter()

@router.post("/", response_model=SourceOut)
def create_source(data: SourceIn, db: Session = Depends(get_db)):
    s = repo.create_source(db, data.model_dump())
    return SourceOut(id=s.id, case_id=s.case_id, kind=s.kind, title=s.title)

@router.get("/by_case/{case_id}", response_model=list[SourceOut])
def list_sources(case_id: int, db: Session = Depends(get_db)):
    return [SourceOut(id=s.id, case_id=s.case_id, kind=s.kind, title=s.title) for s in repo.list_sources(db, case_id)]

@router.get("/{source_id}/html")
def get_highlight_html(source_id: int, db: Session = Depends(get_db)):
    s = repo.get_source(db, source_id)
    if not s: raise HTTPException(404, "Source not found")
    html = s.parsed_html or parse_to_highlight_html(s)
    return {"html": html}


Overwriting backend/app/routers/sources.py


In [331]:
%%writefile backend/app/routers/chunks.py
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from backend.app.db import get_db
from backend.app.schemas import ChunkIn, ChunkOut
from backend.app import repo

router = APIRouter()

@router.post("/", response_model=ChunkOut)
def commit_chunk(data: ChunkIn, db: Session = Depends(get_db)):
    chunk, status = repo.commit_chunk(db, data.model_dump())
    return ChunkOut(id=chunk.id, field=chunk.field, value=chunk.value, conflict_set_id=chunk.conflict_set_id, status=status)


Overwriting backend/app/routers/chunks.py


In [332]:
%%writefile backend/app/routers/objectives.py
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from backend.app.db import get_db
from backend.app.schemas import ObjectiveIn, ObjectiveOut
from backend.app import repo
from backend.app.services.objective_rules import evaluate_objectives

router = APIRouter()

@router.get("/by_case/{case_id}", response_model=list[ObjectiveOut])
def list_objectives(case_id: int, db: Session = Depends(get_db)):
    objs = repo.list_objectives(db, case_id)
    return [ObjectiveOut(id=o.id, name=o.name, status=o.status, rule=o.rule) for o in objs]

@router.post("/", response_model=ObjectiveOut)
def upsert(obj: ObjectiveIn, db: Session = Depends(get_db)):
    o = repo.upsert_objective(db, obj.case_id, obj.name, obj.rule)
    return ObjectiveOut(id=o.id, name=o.name, status=o.status, rule=o.rule)

@router.post("/evaluate/{case_id}", response_model=list[ObjectiveOut])
def evaluate(case_id: int, db: Session = Depends(get_db)):
    objs = evaluate_objectives(db, case_id)
    return [ObjectiveOut(id=o.id, name=o.name, status=o.status, rule=o.rule) for o in objs]


Overwriting backend/app/routers/objectives.py


In [333]:
%%writefile backend/app/routers/graph.py
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from sqlalchemy import select
from backend.app.db import get_db
from backend.app import models

router = APIRouter()

@router.get("/nodes_edges/{case_id}")
def nodes_edges(case_id: int, db: Session = Depends(get_db)):
    ents = list(db.scalars(select(models.Entity).where(models.Entity.case_id==case_id)))
    edges = list(db.scalars(select(models.Edge).where(models.Edge.case_id==case_id)))
    return {
        "nodes": [{"id": e.id, "label": e.name, "type": e.type} for e in ents],
        "edges": [{"id": ed.id, "src": ed.src_entity_id, "dst": ed.dst_entity_id, "type": ed.type} for ed in edges],
    }

@router.get("/timeline/{case_id}")
def timeline(case_id: int, db: Session = Depends(get_db)):
    srcs = list(db.scalars(select(models.Source).where(models.Source.case_id==case_id)))
    items = []
    for s in srcs:
        ts = s.meta.get("timestamp") if s.meta else None
        items.append({"id": s.id, "title": s.title, "kind": s.kind, "timestamp": ts})
    items.sort(key=lambda x: x.get("timestamp") or "")
    return items


Overwriting backend/app/routers/graph.py


In [334]:
%%writefile backend/app/services/highlight.py
from __future__ import annotations
import json, re
from jinja2 import Template
from loguru import logger
from typing import Iterable
try:
    import spacy
except Exception:
    spacy = None

HMTL_TEMPLATE = """
<!doctype html><html><head><meta charset="utf-8">
<style>body{font-family:system-ui;margin:12px;} .chunk{background:#e6f2ff;border-radius:4px;padding:0 2px;cursor:grab}</style>
</head><body>
<h3>{{ title }}</h3>
<div id="content">{{ body|safe }}</div>
<script>
document.querySelectorAll('.chunk').forEach(function(el){
  el.setAttribute('draggable','true');
  el.addEventListener('dragstart', function(e){
    const payload = el.getAttribute('data-chunk');
    try{
      e.dataTransfer.setData('application/json', payload);
      e.dataTransfer.setData('text/plain', payload);
    }catch(err){}
  });
});
</script>
</body></html>
"""

def _regex_spans(text: str) -> Iterable[tuple[int,int,str,str]]:
    patterns = [
        (r"\b(\d{4}-\d{2}-\d{2})\b", "dob"),
        (r"\b\d{1,3}\s+[A-Za-z][\w\s]+(Street|St|Ave|Road|Rd)\b", "address"),
    ]
    for pat, field in patterns:
        for m in re.finditer(pat, text):
            yield m.start(1) if m.lastindex else m.start(), m.end(), field, m.group(0)

def _spacy_spans(text: str):
    if spacy is None:
        return []
    try:
        nlp = spacy.blank("en")
    except Exception:
        return []
    doc = nlp(text)
    res = []
    for ent in doc.ents:
        if ent.label_ in ("PERSON",):
            res.append((ent.start_char, ent.end_char, "name", ent.text))
    return res

def _inject_spans(text: str, title: str, case_id: int, source_id: int) -> str:
    spans = list(_regex_spans(text)) + list(_spacy_spans(text))
    spans.sort(key=lambda x: x[0])
    html = []
    i = 0
    for s, e, field, val in spans:
        if s > i: html.append(text[i:s])
        payload = {"case_id": case_id, "source_id": source_id, "field": field, "value": val, "confidence": 0.7}
        data = json.dumps(payload)
        html.append(f"<span class='chunk' data-chunk='{data}'>{text[s:e]}</span>")
        i = e
    html.append(text[i:])
    return Template(HMTL_TEMPLATE).render(title=title, body="".join(html))

def parse_to_highlight_html(source) -> str:
    try:
        return _inject_spans(source.raw, source.title, source.case_id, source.id)
    except Exception as e:
        logger.exception("highlight error")
        return Template(HMTL_TEMPLATE).render(title=source.title, body=source.raw)


Overwriting backend/app/services/highlight.py


In [335]:
%%writefile backend/app/services/conflict.py
from __future__ import annotations
from sqlalchemy.orm import Session
from sqlalchemy import select, update
from backend.app import models
import uuid

def detect_and_group_conflict(db: Session, chunk: models.DataChunk) -> str:
    # conflict: same entity + field but different value
    stmt = select(models.DataChunk).where(
        models.DataChunk.case_id==chunk.case_id,
        models.DataChunk.entity_id==chunk.entity_id,
        models.DataChunk.field==chunk.field,
        models.DataChunk.id != chunk.id,
    )
    others = list(db.scalars(stmt))
    different = [c for c in others if c.value != chunk.value]
    if not different:
        return "ok"
    # group them
    cid = next((c.conflict_set_id for c in different if c.conflict_set_id), None) or uuid.uuid4().hex[:16]
    chunk.conflict_set_id = cid
    for c in different:
        if not c.conflict_set_id:
            c.conflict_set_id = cid
    return "conflict"


Overwriting backend/app/services/conflict.py


In [336]:
%%writefile backend/app/services/objective_rules.py
from __future__ import annotations
from sqlalchemy.orm import Session
from sqlalchemy import select
from backend.app import models

def field_exists(db: Session, case_id: int, field: str) -> bool:
    stmt = select(models.DataChunk).where(models.DataChunk.case_id==case_id, models.DataChunk.field==field)
    return db.scalars(stmt).first() is not None

def evaluate_objectives(db: Session, case_id: int) -> list[models.Objective]:
    objs = db.scalars(select(models.Objective).where(models.Objective.case_id==case_id)).all()
    for o in objs:
        rule = o.rule or {}
        result = True
        if "all_of" in rule:
            result = all(_eval_clause(db, case_id, c) for c in rule["all_of"])
        elif "any_of" in rule:
            result = any(_eval_clause(db, case_id, c) for c in rule["any_of"])
        else:
            result = _eval_clause(db, case_id, rule) if rule else False
        o.status = "met" if result else "pending"
    db.commit()
    return objs

def _eval_clause(db: Session, case_id: int, clause: dict) -> bool:
    if "field_exists" in clause:
        return all(field_exists(db, case_id, f) for f in clause["field_exists"])
    return False


Overwriting backend/app/services/objective_rules.py


In [337]:
%%writefile backend/app/seed.py
from sqlalchemy.orm import Session
from backend.app.db import SessionLocal
from backend.app import models
from backend.app.services.highlight import parse_to_highlight_html

def main():
    db: Session = SessionLocal()
    with db.begin():
        case = models.Case(name="Case-001")
        db.add(case); db.flush()

        alice = models.Entity(case_id=case.id, type="person", name="Alice Meyer", attrs={"role":"suspect"})
        bob = models.Entity(case_id=case.id, type="person", name="Bob Tran", attrs={"role":"accomplice"})
        db.add_all([alice, bob]); db.flush()

        s1 = models.Source(case_id=case.id, kind="news", title="Local News",
                           raw="Alice Meyer, born 1990-05-20, lives at 12 River Road. She met Bob yesterday.",
                           meta={"timestamp":"2024-12-01T10:00:00Z"})
        s2 = models.Source(case_id=case.id, kind="chat", title="Chat log",
                           raw="A: My birthday is 1991-05-20. Address: 14 River Rd. B: ok",
                           meta={"timestamp":"2024-12-02T09:00:00Z"})
        db.add_all([s1, s2]); db.flush()

        # pre-parse highlight HTML
        s1.parsed_html = parse_to_highlight_html(s1)
        s2.parsed_html = parse_to_highlight_html(s2)

        # seed objectives
        obj1 = models.Objective(case_id=case.id, name="Identify DOB and Address",
                                rule={"all_of":[{"field_exists":["dob"]},{"field_exists":["address"]}]})
        db.add(obj1)

    db.close()
    print("Seeded demo data: Case-001 with 2 entities, 2 sources, 1 objective")

if __name__ == "__main__":
    main()


Overwriting backend/app/seed.py


In [338]:
%%writefile alembic.ini
[alembic]
script_location = backend/migrations
sqlalchemy.url = sqlite:///./tik_demo.db

[loggers]
keys = root,sqlalchemy,alembic

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console

[logger_sqlalchemy]
level = WARN
handlers = console
qualname = sqlalchemy.engine

[logger_alembic]
level = INFO
handlers = console
qualname = alembic

[handler_console]
class = StreamHandler
args = (sys.stderr,)

[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s


Overwriting alembic.ini


In [339]:
%%writefile backend/migrations/env.py
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from backend.app.db import Base
from backend.app import models  # noqa

config = context.config
if config.config_file_name is not None:
    fileConfig(config.config_file_name)

target_metadata = Base.metadata

def run_migrations_offline():
    context.configure(url=config.get_main_option("sqlalchemy.url"), target_metadata=target_metadata, literal_binds=True)
    with context.begin_transaction():
        context.run_migrations()

def run_migrations_online():
    connectable = engine_from_config(config.get_section(config.config_ini_section), prefix='sqlalchemy.', poolclass=pool.NullPool)
    with connectable.connect() as connection:
        context.configure(connection=connection, target_metadata=target_metadata)
        with context.begin_transaction():
            context.run_migrations()

if context.is_offline_mode():
    run_migrations_offline()
else:
    run_migrations_online()


Overwriting backend/migrations/env.py


In [340]:
%%writefile backend/migrations/versions/0001_init.py
from alembic import op
import sqlalchemy as sa

revision = "0001_init"
down_revision = None
branch_labels = None
depends_on = None

def upgrade():
    op.create_table("cases",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("name", sa.String(200), nullable=False, unique=True),
        sa.Column("created_at", sa.DateTime)
    )
    op.create_table("entities",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("type", sa.String(50)),
        sa.Column("name", sa.String(200)),
        sa.Column("attrs", sa.JSON),
    )
    op.create_table("sources",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("kind", sa.String(50)),
        sa.Column("title", sa.String(300)),
        sa.Column("raw", sa.Text),
        sa.Column("parsed_html", sa.Text),
        sa.Column("meta", sa.JSON),
    )
    op.create_table("data_chunks",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("source_id", sa.Integer, sa.ForeignKey("sources.id")),
        sa.Column("entity_id", sa.Integer, sa.ForeignKey("entities.id")),
        sa.Column("field", sa.String(100)),
        sa.Column("value", sa.String(500)),
        sa.Column("confidence", sa.Float),
        sa.Column("conflict_set_id", sa.String(64)),
        sa.Column("created_at", sa.DateTime),
    )
    op.create_table("edges",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("src_entity_id", sa.Integer, sa.ForeignKey("entities.id")),
        sa.Column("dst_entity_id", sa.Integer, sa.ForeignKey("entities.id")),
        sa.Column("type", sa.String(50)),
        sa.Column("evidence", sa.JSON),
    )
    op.create_table("objectives",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("name", sa.String(200)),
        sa.Column("rule", sa.JSON),
        sa.Column("status", sa.String(20)),
    )
    op.create_table("audit",
        sa.Column("id", sa.Integer, primary_key=True),
        sa.Column("case_id", sa.Integer, sa.ForeignKey("cases.id")),
        sa.Column("action", sa.String(100)),
        sa.Column("actor", sa.String(100)),
        sa.Column("details", sa.JSON),
        sa.Column("created_at", sa.DateTime),
    )

def downgrade():
    for t in ["audit","objectives","edges","data_chunks","sources","entities","cases"]:
        op.drop_table(t)


Overwriting backend/migrations/versions/0001_init.py


In [341]:
%%writefile ui/qt/app.py
import os, sys
from PyQt6.QtWidgets import QApplication
from dotenv import load_dotenv
from loguru import logger
from ui.qt.shell import MainWindow

def main():
    load_dotenv()
    api_base = f"http://{os.getenv('API_HOST','127.0.0.1')}:{os.getenv('API_PORT','8000')}"
    app = QApplication(sys.argv)
    app.setApplicationName("TIK")
    w = MainWindow(api_base=api_base)
    w.show()
    logger.info(f"TIK UI started, API={api_base}")
    sys.exit(app.exec())

if __name__ == "__main__":
    main()


Overwriting ui/qt/app.py


In [342]:
%%writefile ui/qt/shell.py
from PyQt6.QtCore import QTimer, Qt, QTime
from PyQt6.QtGui import QAction
from PyQt6.QtWidgets import QMainWindow, QToolBar, QLabel, QDialog
from ui.qt.workspace import Workspace
from ui.qt.objectives_dialog import ObjectivesDialog
from ui.qt.advisor_overlay import AdvisorOverlay
import httpx

class MainWindow(QMainWindow):
    def __init__(self, api_base: str):
        super().__init__()
        self.api_base = api_base
        self.setWindowTitle("The Investigation Kit")
        self.resize(1200, 800)

        self.toolbar = QToolBar("Main")
        self.addToolBar(self.toolbar)
        self._add_actions()

        self.status_label = QLabel("Ready")
        self.clock = QLabel()
        self.statusBar().addWidget(self.status_label)
        self.statusBar().addPermanentWidget(self.clock)
        self._start_clock()

        self.workspace = Workspace(api_base=self.api_base)
        self.setCentralWidget(self.workspace)

        self.overlay = AdvisorOverlay(self)
        self.overlay.hide()

        self.poll_timer = QTimer(self)
        self.poll_timer.timeout.connect(self._poll_objectives)
        self.poll_timer.start(3000)

    def _add_actions(self):
        act_obj = QAction("Objectives", self)
        act_obj.triggered.connect(self._open_objectives)
        self.toolbar.addAction(act_obj)

        act_log = QAction("Log", self)
        self.toolbar.addAction(act_log)

    def _open_objectives(self):
        dlg = ObjectivesDialog(self.api_base, self)
        dlg.exec()

    def _start_clock(self):
        timer = QTimer(self)
        timer.timeout.connect(lambda: self.clock.setText(QTime.currentTime().toString("HH:mm:ss")))
        timer.start(1000)

    def _poll_objectives(self):
        try:
            with httpx.Client(timeout=2.0) as c:
                case_id = self.workspace.current_case_id()
                if not case_id: return
                resp = c.post(f"{self.api_base}/objectives/evaluate/{case_id}")
                objs = resp.json()
                any_met = any(o["status"]=="met" for o in objs)
                self.overlay.setVisible(any_met)
        except Exception:
            pass


Overwriting ui/qt/shell.py


In [343]:
%%writefile ui/qt/workspace.py
from PyQt6.QtWidgets import QWidget, QSplitter, QTabWidget, QVBoxLayout
from PyQt6.QtCore import Qt
from ui.qt.profile_panel import ProfilePanel
from ui.qt.graph_panel import GraphPanel
from ui.qt.timeline_panel import TimelinePanel
from ui.qt.source_view import SourceView
import httpx

class Workspace(QWidget):
    def __init__(self, api_base: str):
        super().__init__()
        self.api_base = api_base
        self._case_id = None
        self._entity_id = None

        layout = QVBoxLayout(self)
        splitter = QSplitter(Qt.Orientation.Horizontal)
        self.left_tabs = QTabWidget()
        self.profile = ProfilePanel(self.api_base, self._provide_ids)
        self.graph = GraphPanel(self.api_base, self._provide_ids)
        self.timeline = TimelinePanel(self.api_base, self._provide_ids)
        self.left_tabs.addTab(self.profile, "Profile")
        self.left_tabs.addTab(self.graph, "Graph")
        self.left_tabs.addTab(self.timeline, "Timeline")

        self.reader = SourceView(self.api_base, self._on_chunk_dragged)

        splitter.addWidget(self.left_tabs)
        splitter.addWidget(self.reader)
        splitter.setSizes([500, 700])
        layout.addWidget(splitter)

        self._bootstrap_demo()

    def _provide_ids(self):
        return self._case_id, self._entity_id

    def _on_chunk_dragged(self, payload: dict):
        # UI side augment: fill entity_id if known
        payload["entity_id"] = self._entity_id
        with httpx.Client(timeout=3.0) as c:
            r = c.post(f"{self.api_base}/chunks/", json=payload)
            _ = r.json()
        self.profile.refresh_entity()
        self.graph.refresh()
        self.timeline.refresh()

    def current_case_id(self):
        return self._case_id

    def _bootstrap_demo(self):
        # pick first case and first entity for MVP
        with httpx.Client(timeout=5.0) as c:
            cases = c.get(f"{self.api_base}/cases/").json()
            if not cases:
                self._case_id = None
                return
            self._case_id = cases[0]["id"]
            # load sources list, choose first
            srcs = c.get(f"{self.api_base}/sources/by_case/{self._case_id}").json()
            if srcs:
                self.reader.load_source(srcs[0]["id"])
            # create/select entity
            ent = c.post(f"{self.api_base}/entities/", json={"case_id": self._case_id, "type":"person","name":"Target","attrs":{}}).json()
            self._entity_id = ent["id"]
            self.profile.set_entity(ent)
            self.graph.refresh()
            self.timeline.refresh()


Overwriting ui/qt/workspace.py


In [344]:
%%writefile ui/qt/profile_panel.py
from PyQt6.QtWidgets import QWidget, QVBoxLayout, QLabel, QFormLayout, QFrame
from PyQt6.QtCore import Qt, QMimeData
from PyQt6.QtGui import QDragEnterEvent, QDropEvent
import httpx

class DropField(QLabel):
    def __init__(self, title: str, api_base: str, get_ids):
        super().__init__(f"<i>drop here</i>")
        self.setFrameShape(QFrame.Shape.StyledPanel)
        self.setMinimumHeight(28)
        self.setAcceptDrops(True)
        self.title = title
        self.api_base = api_base
        self.get_ids = get_ids

    def dragEnterEvent(self, e: QDragEnterEvent):
        if e.mimeData().hasFormat("application/json") or e.mimeData().hasText():
            e.acceptProposedAction()

    def dropEvent(self, e: QDropEvent):
        txt = e.mimeData().data("application/json").data().decode() if e.mimeData().hasFormat("application/json") else e.mimeData().text()
        import json
        payload = json.loads(txt)
        case_id, entity_id = self.get_ids()
        payload["case_id"] = case_id
        payload["entity_id"] = entity_id
        payload["field"] = self.title.lower()
        with httpx.Client(timeout=3.0) as c:
            r = c.post(f"{self.api_base}/chunks/", json=payload)
            _ = r.json()
        e.acceptProposedAction()
        self.parent().refresh_entity()

class ProfilePanel(QWidget):
    def __init__(self, api_base: str, get_ids):
        super().__init__()
        self.api_base = api_base
        self.get_ids = get_ids
        self.entity = None

        layout = QVBoxLayout(self)
        self.header = QLabel("<b>Target</b>")
        layout.addWidget(self.header)
        form = QFormLayout()
        self.name = QLabel("-")
        self.dob = DropField("DOB", api_base, get_ids)
        self.address = DropField("Address", api_base, get_ids)
        form.addRow("Name", self.name)
        form.addRow("DOB", self.dob)
        form.addRow("Address", self.address)
        layout.addLayout(form)

    def set_entity(self, ent: dict):
        self.entity = ent
        self._render()

    def refresh_entity(self):
        if not self.entity: return
        import httpx
        e = httpx.get(f"{self.api_base}/entities/{self.entity['id']}").json()
        self.entity = e
        self._render()

    def _render(self):
        self.header.setText(f"<b>{self.entity.get('name','Target')}</b>")
        attrs = self.entity.get("attrs", {})
        self.name.setText(self.entity.get("name","-"))
        self.dob.setText(attrs.get("dob","<i>drop here</i>"))
        self.address.setText(attrs.get("address","<i>drop here</i>"))


Overwriting ui/qt/profile_panel.py


In [345]:
%%writefile ui/qt/graph_panel.py
from PyQt6.QtWidgets import QWidget, QVBoxLayout, QGraphicsView, QGraphicsScene, QGraphicsEllipseItem, QGraphicsTextItem
from PyQt6.QtCore import QRectF
import httpx, math

class GraphPanel(QWidget):
    def __init__(self, api_base: str, get_ids):
        super().__init__()
        self.api_base = api_base
        self.get_ids = get_ids
        layout = QVBoxLayout(self)
        self.view = QGraphicsView()
        layout.addWidget(self.view)
        self.refresh()

    def refresh(self):
        case_id, _ = self.get_ids()
        if not case_id: return
        data = httpx.get(f"{self.api_base}/graph/nodes_edges/{case_id}").json()
        scene = QGraphicsScene()
        nodes = data["nodes"]; edges = data["edges"]
        N = max(1, len(nodes))
        radius = 150
        pos = {}
        for i, n in enumerate(nodes):
            angle = 2*math.pi*i/N
            x, y = 200+radius*math.cos(angle), 200+radius*math.sin(angle)
            pos[n["id"]] = (x,y)
            item = QGraphicsEllipseItem(QRectF(x-20,y-20,40,40))
            scene.addItem(item)
            label = QGraphicsTextItem(n["label"])
            label.setPos(x-20, y+20)
            scene.addItem(label)
        for e in edges:
            # (simple; skip drawing lines for MVP brevity)
            pass
        self.view.setScene(scene)


Overwriting ui/qt/graph_panel.py


In [346]:
%%writefile ui/qt/timeline_panel.py
from PyQt6.QtWidgets import QWidget, QVBoxLayout, QListWidget, QListWidgetItem
import httpx

class TimelinePanel(QWidget):
    def __init__(self, api_base: str, get_ids):
        super().__init__()
        self.api_base = api_base
        self.get_ids = get_ids
        layout = QVBoxLayout(self)
        self.list = QListWidget()
        layout.addWidget(self.list)

    def refresh(self):
        case_id, _ = self.get_ids()
        if not case_id: return
        items = httpx.get(f"{self.api_base}/graph/timeline/{case_id}").json()
        self.list.clear()
        for it in items:
            t = it.get("timestamp","")
            txt = f"[{t}] {it['kind'].upper()} - {it['title']}"
            QListWidgetItem(txt, self.list)


Overwriting ui/qt/timeline_panel.py


In [347]:
%%writefile ui/qt/source_view.py
from PyQt6.QtWebEngineWidgets import QWebEngineView
from PyQt6.QtCore import QUrl
import httpx

HTML_EMPTY = "<html><body><i>No source loaded</i></body></html>"

class SourceView(QWebEngineView):
    def __init__(self, api_base: str, on_dragged):
        super().__init__()
        self.api_base = api_base
        self.on_dragged = on_dragged
        self.setHtml(HTML_EMPTY)

    def load_source(self, source_id: int):
        r = httpx.get(f"{self.api_base}/sources/{source_id}/html", timeout=5.0)
        html = r.json()["html"]
        self.setHtml(html, baseUrl=QUrl("http://local/"))


Overwriting ui/qt/source_view.py


In [348]:
%%writefile ui/qt/dragdrop.py
# Helper kept for future extension (currently logic lives in DropField)
# Intentionally minimal in MVP to reduce complexity.


Overwriting ui/qt/dragdrop.py


In [349]:
%%writefile ui/qt/objectives_dialog.py
from PyQt6.QtWidgets import QDialog, QVBoxLayout, QListWidget, QPushButton, QHBoxLayout
import httpx

class ObjectivesDialog(QDialog):
    def __init__(self, api_base: str, parent=None):
        super().__init__(parent)
        self.api_base = api_base
        self.setWindowTitle("Objectives")
        self.resize(400,300)
        layout = QVBoxLayout(self)
        self.list = QListWidget()
        layout.addWidget(self.list)
        btns = QHBoxLayout()
        self.btn_refresh = QPushButton("Refresh")
        self.btn_refresh.clicked.connect(self.refresh)
        btns.addWidget(self.btn_refresh)
        layout.addLayout(btns)
        self.refresh()

    def refresh(self):
        # MVP: use case_id=1 for simplicity; in real app pass context
        case_id = self.parent().workspace.current_case_id() if hasattr(self.parent(),"workspace") else 1
        objs = httpx.get(f"{self.api_base}/objectives/by_case/{case_id}").json()
        self.list.clear()
        for o in objs:
            self.list.addItem(f"{o['name']} — {o['status']}")


Overwriting ui/qt/objectives_dialog.py


In [350]:
%%writefile ui/qt/advisor_overlay.py
from PyQt6.QtWidgets import QWidget, QLabel
from PyQt6.QtCore import Qt

class AdvisorOverlay(QWidget):
    def __init__(self, parent=None):
        super().__init__(parent)
        self.setAttribute(Qt.WidgetAttribute.WA_TransparentForMouseEvents)
        self.setStyleSheet("background: rgba(0,0,0,0.35);")
        self.label = QLabel("Objective met! Review findings.", self)
        self.label.setStyleSheet("color:white; font-size:20px; background:rgba(0,0,0,0.6); padding:8px; border-radius:8px;")
        self.label.adjustSize()

    def resizeEvent(self, ev):
        self.setGeometry(0, 0, self.parent().width(), self.parent().height())
        self.label.move(int(self.width()/2 - self.label.width()/2), 24)


Overwriting ui/qt/advisor_overlay.py


In [351]:
%%writefile ui/qt/assets/theme.qss
QMainWindow { background: #0f1419; color: #e6edf3; }
QToolBar { background: #17212b; spacing: 6px; }
QStatusBar { background: #17212b; color: #9da5b4; }
QTabBar::tab { background: #1f2933; color: #e6edf3; padding: 6px 10px; border-top-left-radius: 6px; border-top-right-radius: 6px; }
QTabBar::tab:selected { background: #283341; }
QLabel { color: #e6edf3; }
QFrame { border: 1px solid #3b4652; border-radius: 6px; }


Overwriting ui/qt/assets/theme.qss


In [352]:
%%writefile tests/test_highlight.py
from backend.app.services.highlight import _inject_spans

def test_inject_adds_spans():
    text = "DOB 1990-01-01 lives at 10 Maple St"
    html = _inject_spans(text, "t", 1, 1)
    assert "data-chunk" in html
    assert "1990-01-01" in html
    assert "Maple" in html


Overwriting tests/test_highlight.py


In [353]:
%%writefile tests/test_commit_conflict.py
from sqlalchemy.orm import Session
from backend.app.db import SessionLocal
from backend.app import models
from backend.app.repo import commit_chunk

def setup_case(db: Session):
    c = models.Case(name="T")
    db.add(c); db.flush()
    e = models.Entity(case_id=c.id, type="person", name="X", attrs={})
    s = models.Source(case_id=c.id, kind="news", title="t", raw="x")
    db.add_all([e,s]); db.flush()
    return c,e,s

def test_conflict_grouping():
    db = SessionLocal()
    with db.begin():
        c,e,s = setup_case(db)
        ch1,_ = commit_chunk(db, {"case_id":c.id,"source_id":s.id,"entity_id":e.id,"field":"dob","value":"1990-01-01","confidence":0.7})
        ch2,_ = commit_chunk(db, {"case_id":c.id,"source_id":s.id,"entity_id":e.id,"field":"dob","value":"1991-01-01","confidence":0.7})
        assert ch1.conflict_set_id == ch2.conflict_set_id
    db.close()


Overwriting tests/test_commit_conflict.py


In [354]:
%%writefile tests/test_objectives.py
from sqlalchemy.orm import Session
from backend.app.db import SessionLocal
from backend.app import models
from backend.app.services.objective_rules import evaluate_objectives

def test_field_exists_objective():
    db: Session = SessionLocal()
    with db.begin():
        c = models.Case(name="C1"); db.add(c); db.flush()
        s = models.Source(case_id=c.id, kind="news", title="t", raw="x"); db.add(s); db.flush()
        db.add(models.DataChunk(case_id=c.id, source_id=s.id, field="dob", value="1990-01-01", confidence=0.7))
        o = models.Objective(case_id=c.id, name="Need DOB", rule={"field_exists":["dob"]})
        db.add(o)
    objs = evaluate_objectives(db, c.id)
    assert any(ob.status=="met" for ob in objs)
    db.close()


Overwriting tests/test_objectives.py


In [355]:
%%writefile scripts/run_api.sh
#!/usr/bin/env bash
set -euo pipefail
. .venv/bin/activate
uvicorn backend.app.main:app --reload --host 0.0.0.0 --port "${API_PORT:-8000}"


Writing scripts/run_api.sh


In [356]:
%%writefile scripts/run_ui.sh
#!/usr/bin/env bash
set -euo pipefail
. .venv/bin/activate
python ui/qt/app.py


Writing scripts/run_ui.sh


In [357]:
%%writefile scripts/dev_all.sh
#!/usr/bin/env bash
set -e
. .venv/bin/activate
(uvicorn backend.app.main:app --reload --port "${API_PORT:-8000}") &
API_PID=$!
sleep 1
python ui/qt/app.py
kill $API_PID || true


Writing scripts/dev_all.sh


In [358]:
%%writefile scripts/load_spacy.sh
#!/usr/bin/env bash
set -e
. .venv/bin/activate
python - <<'PY'
import os, subprocess, sys
model = os.getenv("SPACY_MODEL","en_core_web_sm")
try:
    __import__(model.replace("-","_"))
    print("spaCy model already present:", model)
except Exception:
    print("Downloading spaCy model:", model)
    subprocess.check_call([sys.executable, "-m", "spacy", "download", model])
PY


Writing scripts/load_spacy.sh


# Cách chạy nhanh (Local)
make setup //
make db.init//
make db.seed//
make dev//


In [360]:
%%writefile constraints-spacy312-win64.txt
# spaCy 3.7.4 pins for Python 3.12 on Windows x86_64
spacy==3.7.4
thinc==8.2.5
srsly==2.5.1
cymem==2.0.11
preshed==3.0.10
murmurhash==1.0.13
blis==0.7.11
wasabi==1.1.3
weasel==0.3.4
confection==0.1.5
langcodes==3.5.0
catalogue==2.0.10
pathy==0.11.0
cloudpathlib==0.16.0
tqdm==4.67.1
smart-open==6.4.0
spacy-legacy==3.0.12
spacy-loggers==1.0.5
typer==0.9.4



Writing constraints-spacy312-win64.txt
