In [1]:
import datetime
from pathlib import Path

from mini_transcript_search import Criteria, ModelHandler

data_dir = Path("..", "data")

handler = ModelHandler(
    use_local_model=False,
    override_stored=False,
    storage_path=Path(data_dir, "parlparse_xmls"),
)


search_criteria = Criteria(
    [
        "register of members interests",
        "May I draw attention to my interests as registered in the Register of Members Financial Interests",
        "May I draw attention to my entry in the Register of Members’ Financial Interests?",
        "May I draw attention to my interests in register?",
        "May I draw attention to my interests as declared in the register?",
        "I refer Members to my registered interest.",
    ],
    score_type="nearest",
)

start_date = datetime.date(2023, 9, 1)
end_date = datetime.date(2024, 9, 26)

# the last week starting yesterday
last_year = ModelHandler.DateRange(start_date=start_date, end_date=end_date)
results = handler.query(
    search_criteria,
    threshold=0.4,
    date_range=last_year,
    chamber=ModelHandler.Chamber.COMMONS,
    transcript_type=ModelHandler.TranscriptType.DEBATES,
)

# dump csv
df = results.df()

df.head()

100%|██████████| 280/280 [00:38<00:00,  7.28it/s]


Unnamed: 0,search_query,date_range_start,date_range_end,distance,matched_text,speaker_name,person_id,chamber,transcript_type,speech_id,debate_url
0,register of members interests,2023-09-01,2024-09-26,0.044254,May I draw the House’s attention to my entry i...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-07-23d.605.0#d60...,https://www.theyworkforyou.com/debates/?id=202...
1,register of members interests,2023-09-01,2024-09-26,0.044254,May I draw the House’s attention to my entry i...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-07-23d.605.0#d60...,https://www.theyworkforyou.com/debates/?id=202...
2,register of members interests,2023-09-01,2024-09-26,0.064175,I draw Members’ attention to my entry in the R...,Robert Syms,uk.org.publicwhip/person/10582,house-of-commons,debates,uk.org.publicwhip/debate/2023-10-23c.669.0#c66...,https://www.theyworkforyou.com/debates/?id=202...
3,register of members interests,2023-09-01,2024-09-26,0.064296,May I start by drawing the Committee’s attenti...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-01-17c.907.0#c90...,https://www.theyworkforyou.com/debates/?id=202...
4,register of members interests,2023-09-01,2024-09-26,0.064296,May I start by drawing the Committee’s attenti...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-01-17c.907.0#c90...,https://www.theyworkforyou.com/debates/?id=202...


In [4]:
df.to_parquet(
    data_dir / "interim" / "register_of_members_financial_interests_one_year.parquet",
    index=False,
)

df.to_excel(
    data_dir / "interim" / "register_of_members_financial_interests_one_year.xlsx",
    index=False,
)

df.head()

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


Unnamed: 0,search_query,date_range_start,date_range_end,distance,matched_text,speaker_name,person_id,chamber,transcript_type,speech_id,debate_url
0,register of members interests,2023-09-01,2024-09-26,0.044254,May I draw the House’s attention to my entry i...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-07-23d.605.0#d60...,https://www.theyworkforyou.com/debates/?id=202...
1,register of members interests,2023-09-01,2024-09-26,0.044254,May I draw the House’s attention to my entry i...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-07-23d.605.0#d60...,https://www.theyworkforyou.com/debates/?id=202...
2,register of members interests,2023-09-01,2024-09-26,0.064175,I draw Members’ attention to my entry in the R...,Robert Syms,uk.org.publicwhip/person/10582,house-of-commons,debates,uk.org.publicwhip/debate/2023-10-23c.669.0#c66...,https://www.theyworkforyou.com/debates/?id=202...
3,register of members interests,2023-09-01,2024-09-26,0.064296,May I start by drawing the Committee’s attenti...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-01-17c.907.0#c90...,https://www.theyworkforyou.com/debates/?id=202...
4,register of members interests,2023-09-01,2024-09-26,0.064296,May I start by drawing the Committee’s attenti...,David Simmonds,uk.org.publicwhip/person/25892,house-of-commons,debates,uk.org.publicwhip/debate/2024-01-17c.907.0#c90...,https://www.theyworkforyou.com/debates/?id=202...


In [3]:
len(df)

87221