In [1]:
from __future__ import annotations

import datetime
from importlib import reload
from pathlib import Path
from typing import (
    TypeVar,
)

import rich
from mysoc_validator import Transcript

from parl_motion_detector import agreements, mapper, motions
from parl_motion_detector.downloader import get_latest_for_date

T = TypeVar("T")

data_dir = Path("..", "data")

tests_path = Path("..", "data", "tests", "mapper")

In [None]:
reload(motions)
reload(agreements)
reload(mapper)

"""
- Standard opposition day motion with amendment - 2023-06-27 - tick
- Passing of net zero target (agreement on SI) - 2019-06-24 - tick
- Gaze ceasefire votes (amended motion, two agreements in a row, motion to sit in silence) - 2024-02-21 - tick
- Day with lots of divisions on amendments to be connected (and random other votes) - 2024-04-24 - done
- Fracking Opposition Day Amendment - timetable change - complicated motion.  - 2022-10-19 - done
- Vote on government agenda - 2024-07-23 - done
- disagreeing with lords amendments - 2024-04-22
"""

year = 2024
current_date = datetime.datetime.now().date()
chamber = Transcript.Chamber.COMMONS
# all dates in year to date
dates_in_year = [
    datetime.date(year, 1, 1) + datetime.timedelta(days=i) for i in range(365)
]
# all dates in year to dat
dates_in_year = [x.isoformat() for x in dates_in_year if x <= current_date]

# dates_in_year = ["2023-10-25"]

for debate_date in dates_in_year:
    try:
        transcript_path = get_latest_for_date(
            datetime.date.fromisoformat(debate_date), download_path=data_dir
        )
    except FileNotFoundError:
        continue
    # fix 2019 error
    txt = transcript_path.read_text()
    if "21&#10;14" in txt:
        txt = txt.replace("21&#10;14", "2114")
        transcript_path.write_text(txt)
    transcript = Transcript.from_xml_path(transcript_path)

    mm = mapper.MotionMapper(
        transcript, debate_date=debate_date, data_dir=data_dir, chamber=chamber
    )

    mm.assign()
    results = mm.export()
    results.to_data_dir(data_dir / "interim" / "results")

2024-01-01
2024-01-02
2024-01-03
2024-01-04
2024-01-05
2024-01-06
2024-01-07
2024-01-08
2024-01-09
2024-01-10
2024-01-11
2024-01-12
2024-01-13
2024-01-14
2024-01-15
2024-01-16
2024-01-17
2024-01-18
2024-01-19
2024-01-20
2024-01-21
2024-01-22
2024-01-23
2024-01-24
2024-01-25
2024-01-26
2024-01-27
2024-01-28
2024-01-29
2024-01-30
2024-01-31
2024-02-01
2024-02-02
2024-02-03
2024-02-04
2024-02-05
2024-02-06
2024-02-07
2024-02-08
2024-02-09
2024-02-10
2024-02-11
2024-02-12
2024-02-13
2024-02-14
2024-02-15
2024-02-16
2024-02-17
2024-02-18
2024-02-19
2024-02-20
2024-02-21
2024-02-22
2024-02-23
2024-02-24
2024-02-25
2024-02-26
2024-02-27
2024-02-28
2024-02-29
2024-03-01
2024-03-02
2024-03-03
2024-03-04
2024-03-05
2024-03-06
2024-03-07
2024-03-08
2024-03-09
2024-03-10
2024-03-11
2024-03-12
2024-03-13
2024-03-14
2024-03-15
2024-03-16
2024-03-17
2024-03-18
2024-03-19
2024-03-20
2024-03-21
2024-03-22
2024-03-23
2024-03-24
2024-03-25
2024-03-26
2024-03-27
2024-03-28
2024-03-29
2024-03-30
2024-03-31

2024-10-15
2024-10-16
2024-10-17
2024-10-18
2024-10-19
2024-10-20
2024-10-21
2024-10-22
2024-10-23
2024-10-24
2024-10-25
2024-10-26
2024-10-27
2024-10-28
2024-10-29
2024-10-30
2024-10-31
2024-11-01
2024-11-02
2024-11-03
2024-11-04
2024-11-05
2024-11-06
2024-11-07
2024-11-08
2024-11-09
2024-11-10
2024-11-11
2024-11-12


In [2]:
rh = mapper.ResultsHolder.from_data_dir_composite(
    data_dir / "interim" / "results", date="2024", chamber=Transcript.Chamber.COMMONS
)

rh.export(data_dir / "processed" / "parquet")

In [5]:
results = mm.export()
rich.print(results)
results.to_data_dir(data_dir / "interim" / "results")

In [4]:
for agreement in mm.found_agreements:
    if agreement.major_heading_id == "uk.org.publicwhip/debate/2023-03-06d.72.0":
        rich.print(agreement)

In [5]:
rich.print(mm.found_motions)