In [7]:
# This notebook imports legacy publicwhip policy information into yaml files that power new database

from pathlib import Path
import os
import re
import pandas as pd
from datetime import date

while not (Path.cwd() / "pyproject.toml").exists():
    os.chdir("..")

from twfy_votes.internal.db import duck_core
from twfy_votes.apps.legacy.data_sources import duck as tables_duck
from twfy_votes.apps.policies.models import (
    PolicyDirection,
    PartialPolicy,
    PolicyStrength,
    PartialDivision,
    PartialPolicyDecisionLink,
    PolicyStatus,
    PolicyGroup,
    StrengthMeaning,
)

duck = await duck_core.get_core()

await duck.compile(tables_duck).run()

In [2]:
query = """
select dream_id as id, description as policy_description from pw_dyn_dreammp
"""

full_desc_df = await duck.compile(query).df()

In [3]:
query = """
select * from pw_dyn_dreamvote order by dream_id, house, division_date, division_number
"""


def get_direction(s: str) -> PolicyDirection:
    match s:
        case "aye" | "aye3":
            return PolicyDirection.AGREE
        case "no" | "no3":
            return PolicyDirection.AGAINST
        case "both":
            return PolicyDirection.NEUTRAL
        case _:
            raise ValueError(f"Unknown vote direction {s}")


def get_strength(s: str) -> PolicyStrength:
    match s:
        case "aye" | "no" | "both":
            return PolicyStrength.WEAK
        case "aye3" | "no3":
            return PolicyStrength.STRONG
        case _:
            raise ValueError(f"Unknown vote strength {s}")


def get_partial(s: pd.Series) -> PartialDivision:
    return PartialDivision(
        chamber_slug=s["house"],
        date=s["division_date"],
        division_number=int(s["division_number"]),
    )


def get_links(s: pd.Series) -> PartialPolicyDecisionLink:
    return PartialPolicyDecisionLink(
        division=s["partial_division"], alignment=s["direction"], strength=s["strength"]
    )


votes_df = await duck.compile(query).df()
votes_df["direction"] = votes_df["vote"].apply(get_direction)
votes_df["strength"] = votes_df["vote"].apply(get_strength)
votes_df["partial_division"] = votes_df.apply(get_partial, axis=1)
votes_df["links"] = votes_df.apply(get_links, axis=1)

# create a dict vote_lookup that maps from dream_id to a list of PolicyDivisionLink objects

vote_lookup = votes_df.groupby("dream_id").agg({"links": list}).to_dict()["links"]

In [10]:
df = pd.read_csv(Path("data", "raw", "proposed_policies_2023.csv"))  # type: ignore


def fix_wording(s: str) -> str:
    # split on new line and only get first line
    s = s.split("\n")[0]
    # remove '[MP name] generally voted for more '
    s = s.replace("[MP name] generally voted for ", "").strip()
    # replace **, ** with open and close bold tags
    s = re.sub(r"\*\*(.*?)\*\*", r"<b>\1</b>", s)
    return s


df["candidate_wording"] = df["candidate_wording"].apply(fix_wording)


def get_group_ids(s: str) -> list[PolicyGroup]:
    descs = s.split("|")

    desc_to_item = {y: x for x, y in PolicyGroup.policy_descs.items()}
    return [desc_to_item[x] for x in descs]


df["group_ids"] = df["group"].apply(get_group_ids)

df

Unnamed: 0,policy_id,title,group,status,candidate_wording,desc,group_ids
0,856,Powers of Government Ministers,Constitutional Reform,draft,more <b>powers for government ministers</b>,There have been votes in Parliament on whether Gove...,[reform]
1,6917,Powers of the Monarch,Constitutional Reform,rejected,more <b>powers for the monarch</b>,There have been votes in Parliament on whether the ...,[reform]
2,6788,Increase the State Pension Age for Women More ...,"Welfare, Benefits and Pensions",candidate,<b>increasing the state pension age for women ...,There have been votes in Parliament on the spe...,[welfare]
3,6971,Suspending MPs or Reducing their Salaries,Miscellaneous Topics,draft,<b>suspending MPs or reducing their salaries</...,There have been votes in Parliament on whether to s...,[misc]
4,6789,Powers of the Devolved Administration in North...,Constitutional Reform,candidate,<b>more powers for</b> the devolved administra...,There have been votes in Parliament on the pow...,[reform]
5,6831,Employment rights,Taxation and Employment,rejected,<b>employment rights</b>,There have been votes in Parliament on making ...,[taxation]
6,6844,Child Protection,Home Affairs,rejected,<b>child protection</b> measures,There have been votes in Parliament on protect...,[home]
7,6852,Sentencing,Justice,rejected,<b>tougher sentences for crimes</b>,There have been votes in Parliament on the max...,[justice]
8,6854,Fire Safety,Housing,candidate,<b>fire safety measures</b>,There have been votes in Parliament on fire sa...,[housing]
9,6885,Support for Small and Medium Sized Businesses,Business and the Economy,rejected,support for <b>small and medium sized business...,There have been votes in Parliament on specifi...,[business]


In [15]:
policies: list[PartialPolicy] = []

for _, row in df.iterrows():
    if row["status"] == "rejected":
        continue
    policy = PartialPolicy(
        chamber_id="commons",
        status=row["status"],
        strength_meaning=StrengthMeaning.CLASSIC,
        id=row["policy_id"],
        name=row["title"],
        context_description=row["candidate_wording"],
        highlightable=False,
        policy_description=row["desc"],
        group_ids=row["group_ids"],
        decision_links_refs=vote_lookup[row["policy_id"]],
    )
    policies.append(policy)

In [16]:
print(policy)

id=6780 name='Openness and Transparency' chamber_id='commons' context_description='<b>openness and transparency</b>.' policy_description="Parliament has voted on access to information laws, and occasionally on whether specific material should be published. There have also been votes on what information companies must disclose as well as on the transparency of our political system, including representatives' interests, allowances and expenses." notes='' status='draft' group_ids=['misc'] strength_meaning='classic' highlightable=False decision_links_refs=[PartialPolicyDecisionLink(division=PartialDivision(chamber_slug='commons', date=datetime.date(1999, 12, 7), division_number=12, key='commons-1999-12-07-12'), agreement=None, alignment='against', strength='strong', status='active', notes='', decision_type='division', decision_key='commons-1999-12-07-12'), PartialPolicyDecisionLink(division=PartialDivision(chamber_slug='commons', date=datetime.date(1999, 12, 7), division_number=13, key='co

In [None]:
data_path = Path("data", "policies")
for x in policies:
    x.to_path(data_path / f"{x.id}.yml", x.model_dump_reduced())