In [1]:
from discovery_utils.getters import hansard
from datetime import datetime, timedelta
import pandas as pd

In [2]:
Hansard = hansard.HansardGetter()
debates_df = Hansard.get_debates_parquet()
labelstore_df = Hansard.get_labelstore()
debates_df.date.max()

2024-12-03 17:30:42,972 - discovery_utils.getters.hansard - INFO - Downloading debates parquet file: data/policy_scanning_data/enriched/HansardDebates.parquet
2024-12-03 17:31:54,420 - discovery_utils.getters.hansard - INFO - Attempting to download label store: data/policy_scanning_data/enriched/HansardDebates_LabelStore_keywords.csv


'2024-11-29'

In [3]:
people_dict = Hansard.get_people_metadata()

2024-12-03 17:33:25,904 - discovery_utils.getters.hansard - INFO - Downloading people metadata
2024-12-03 17:33:28,899 - discovery_utils.getters.hansard - INFO - Successfully downloaded and saved people metadata


## Get the relevant debates

In [35]:
# end_date = "2024-11-08"
end_date = "2024-10-10"
# Convert the string to a datetime object
end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")
# Subtract one week (7 days)
one_week_ago = end_date_dt - timedelta(weeks=1)
# Convert back to string format if needed
start_date = one_week_ago.strftime("%Y-%m-%d")

weekly_speeches_df = (
    debates_df[(debates_df["date"] >= start_date) & (debates_df["date"] <= end_date)]
    .merge(labelstore_df[["id", "mission_labels", "topic_labels"]], left_on="speech_id", right_on="id")
)

## Find relevant debates

1. See if keywords were mentioned in the debates
2. See if debate title is relevant?

In [28]:
from discovery_utils.utils import keywords

In [84]:
weekly_speeches_df = (
    weekly_speeches_df
    .drop_duplicates(subset=["speakername", "speech"])
    .assign(headings = lambda df: df.major_heading.fillna("") + " " + df.minor_heading.fillna(""))
)
unique_headings = weekly_speeches_df.headings.unique()
debate_titles_df = (
    pd.DataFrame(data={'id': range(len(unique_headings)), 'text': unique_headings})
    .assign(text = lambda df: df.text.str.lower())
)

In [85]:
labels_df = keywords.enrich_keyword_labels(debate_titles_df, "ASF", split_sentences=False)
labels_df

Unnamed: 0,id,topic_label,mission_label
0,17,CCUS,ASF
1,22,Wind,ASF


In [86]:
debate_titles_df.merge(labels_df, on="id", how="left").dropna(subset=["mission_label"])

Unnamed: 0,id,text,topic_label,mission_label
17,17,"carbon capture, usage and storage",CCUS,ASF
22,22,energy security and net zero onshore wind energy,Wind,ASF


In [87]:
orgs_df = pd.DataFrame(people_df['organizations'])[['id', 'name']]

In [88]:
# keep the most recent membership
parties_df = (
    pd.DataFrame(people_df['memberships'])
    .sort_values('start_date', ascending=False)
    .drop_duplicates('person_id')
    .merge(orgs_df, left_on='on_behalf_of_id', right_on='id', how='left', suffixes=("_", "_org"))
)[['person_id', 'post_id', 'start_date', 'start_reason', 'name_org']]
parties_df.query('person_id == "uk.org.publicwhip/person/11545"')

Unnamed: 0,person_id,post_id,start_date,start_reason,name_org
125,uk.org.publicwhip/person/11545,uk.org.publicwhip/cons/2423,2024-07-05,general_election,Labour


In [89]:
mission_debates = weekly_speeches_df.query("mission_labels == 'ASF'")
mission_debates = mission_debates.merge(parties_df[['person_id', 'name_org']], on = 'person_id', how='left', suffixes=('', '_person'))
debates_dates = mission_debates[['date', 'major_heading']].drop_duplicates().sort_values('date')

In [93]:
debate_titles_df = mission_debates.groupby("major_heading").agg(counts=("speech_id", "count")).reset_index()
debate_titles_df

Unnamed: 0,major_heading,counts
0,Business of the House,2
1,"Carbon Capture, Usage and Storage",15
2,Energy Security and Net Zero,25
3,Farming and Food Security,1
4,Prime Minister,2


## Summarise the debates

In [99]:
debates_dates.iloc[1]

date                               2024-10-08
major_heading    Energy Security and Net Zero
Name: 15, dtype: object

In [107]:
row = debates_dates.iloc[0]
debate = row.major_heading
print("======")
print(debate)
print(row.date)
print("======")
speech = mission_debates.query("major_heading == @debate").to_dict(orient='records')
mission_debates.query("major_heading == @debate")
for s in speech:
    print(s['date'])
    print(f"{s['speakername']} ({s['name_org']})")
    print(f"Topics: {s['topic_labels']}")
    print(s['speech'])
    print("-----")

Carbon Capture, Usage and Storage
2024-10-07
2024-10-07
Bill Esterson (Labour)
Topics: Hydrogen energy
Unlike the shadow Secretary of State, I am very pleased that the Secretary of State has announced jobs in Teesside—jobs from which my constituents in the north-west of England will potentially benefit. I am also very pleased that we have a Government who are committed to an industrial strategy, and who believe in Government working in partnership with business. The Secretary of State mentioned just how important it is that we have this technology if we are to decarbonise; he quoted James Richardson in making the case. It will be crucial for the abatement of heavy industries such as chemicals, glass—the Secretary of State went to visit a glass factory in the north-west on Friday—and cement, but it will also be crucial for hydrogen production, for the new gas-fired power stations and, indeed, for converting waste into energy. How long does he think we will need this technology for the a

In [74]:
# for _, row in debates_dates.iterrows():
#     debate = row.major_heading
#     print("======")
#     print(debate)
#     print(row.date)
#     print("======")
#     speech = mission_debates.query("major_heading == @debate").to_dict(orient='records')
#     for s in speech:
#         print(s['date'])
#         print(f"{s['speakername']} ({s['name_org']})")
#         print(s['speech'])
#         print("-----")

## Testing utils

In [246]:
from src import synthesis_utils
import importlib
importlib.reload(synthesis_utils);

In [309]:
row = debates_dates.iloc[1]
debate = row.major_heading
print(debate)

Energy Security and Net Zero


In [310]:
_weekly_speeches_df = weekly_speeches_df.merge(parties_df[['person_id', 'name_org']], on = 'person_id', how='left', suffixes=('', '_person'))
debs = _weekly_speeches_df.query("major_heading == @debate").sort_values('speech_id').to_dict(orient='records')
debate_text = ""
for s in debs:
    debate_text = debate_text + f"{s['speakername']} ({s['name_org']})" + "\n"
    debate_text = debate_text + s['speech'] + "\n"
    debate_text = debate_text + "-----" + "\n"

In [311]:
debate_instance = synthesis_utils.Debate(heading=debate, content=debate_text)

In [312]:
synthesis_utils.is_debate_relevant(debate_instance, synthesis_utils.config['mission_statements']['ASF'])

{'is_relevant': True, 'heading': 'Energy Security and Net Zero'}

In [313]:
result = synthesis_utils.summarise_debate_with_structure(debate_instance, synthesis_utils.config['mission_statements']['ASF'])

In [314]:
result.positives

['Simon Opher (Lab) highlighted the excitement and support for sustainable goals in Stroud, particularly the community energy programme for solar panels on schools and public buildings.',
 'Ed Miliband (Lab) discussed the Great British Energy plans to work with local schools and hospitals to implement solar panel programmes, which would help decarbonise and reduce bills.',
 "Alex Barros-Curtis (Lab) celebrated Cantonian high school achieving operational net zero and the Fairwater community campus's energy-efficient buildings, aligning with Cardiff's One Planet strategy.",
 'Sarah Jones (Lab) noted that carbon capture and storage could create 4,000 jobs in the short term and up to 50,000 jobs over the next decade, contributing to economic growth in low-carbon sectors.']

In [315]:
result_ = synthesis_utils.generate_crisp_summary(result)

In [316]:
print(result_.summary)

- The debate centers on the UK government's goal of achieving 100% sustainability by 2030, focusing on renewable energy and community involvement.
- Positives include community energy programs in Stroud (Opher, Lab), Great British Energy plans for schools and hospitals (Miliband, Lab), and job creation through carbon capture (Jones, Lab).
- Negatives involve concerns over airport expansions contradicting net zero targets (Wilson, Lib Dem), potential conflicts of interest in climate envoy appointment (Morrissey, Con), and rising energy bills (Coutinho, Con).
- Next steps include assessing offshore wind cabling routes (Miliband, Lab), a warm homes initiative for renters (Fahnbulleh, Lab/Co-op), and improving grid connections for renewable projects (Shanks, Lab).


In [331]:
importlib.reload(synthesis_utils);

In [322]:
result = synthesis_utils.generate_direct_summary(debate_instance, synthesis_utils.config['mission_statements']['ASF'])

In [323]:
print(result.summary)

- The debate focused on the UK government's commitment to achieving 100% sustainable energy by 2030, with speakers highlighting initiatives like community solar projects in Stroud and Cardiff, and the importance of decarbonising public buildings (Opher, Miliband, Barros-Curtis, Labour).  
- Positives included the announcement of 131 renewable electricity projects capable of powering 11 million homes, and plans for carbon capture and storage expected to create up to 50,000 jobs over the next decade (Shanks, Jones, Labour).  
- Negatives raised included concerns over airport expansions contradicting net zero targets, and the legacy of the previous government’s inaction on energy infrastructure, which has led to delays in renewable projects (Wilson, Coutinho, Conservative; Miliband, Labour).  
- Next steps involve assessing alternative cabling routes for offshore wind farms, enhancing grid capacity, and implementing a warm homes plan to tackle fuel poverty, with a focus on community energ

In [334]:
result__ = synthesis_utils.generate_nesta_summary(debate_instance)

In [335]:
print(result__.summary)

1. Miatta Fahnbulleh (Labour/Co-operative) announced the Warm Homes Plan, which aims to upgrade homes across the UK with grants and low-interest loans for insulation and low-carbon heating, targeting fuel poverty and improving energy efficiency.
2. Ed Miliband (Labour) highlighted the importance of Great British Energy's plans to work with local schools and hospitals to implement solar panel programmes, which will help decarbonise public buildings and reduce energy bills.
3. Michael Shanks (Labour) discussed the government's commitment to halving the development time for new transmission infrastructure, essential for achieving clean power by 2030 and meeting future electricity demands.
4. Sarah Jones (Labour) emphasized the government's proactive approach to carbon capture and storage, which is expected to create thousands of jobs and support the transition to a low-carbon economy.
5. Miatta Fahnbulleh (Labour/Co-operative) stated that the commitment to improve energy efficiency standa

## Simple pipeline

In [127]:
import importlib
from src import synthesis_utils
importlib.reload(synthesis_utils);
import numpy as np
from typing import Literal, Tuple, List, Dict

from src import logging
from discovery_utils.utils import keywords

In [8]:
from slack_sdk.webhook import WebhookClient
import os
slack_webhook = WebhookClient(os.environ["SLACK_WEBHOOK_URL_TESTING"])

In [18]:
from typing import List, Dict

def mission_header(mission: str) -> Dict:
    """Construct mission header block"""
    if mission == "ASF":
        mission_header = ":potted_plant: *A Sustainable Future*"
    elif mission == "AFS":
        mission_header = ":hatched_chick: *A Fairer Start*"
    elif mission == "AHL":
        mission_header = ":mending_heart: *A Healthier Life*"
    else:
        raise ValueError(f"Invalid mission: {mission}")

    return {"type": "section", "text": {"type": "mrkdwn", "text": mission_header}}

def message_header(message_date:str, data_start_date:str, data_end_date:str) -> List[Dict]:
    """Construct message header block
    
    Args:
        message_date (str): Date when posting the message, in format DD-MM-YYYY
        data_start_date (str): Start date of the data, in format DD-MM-YYYY
        data_end_date (str): End date of the data, in format DD-MM-YYYY

    Returns:
        List[Dict]: List of blocks including a header and a context block, where 
            context block indicates data sources and date range
    """
    header = {
        "type": "header",
        "text": {
            "type": "plain_text",
            "text": f"Policy update {message_date}",
        }
    }
    context = {
        "type": "context",
        "elements": [
            {
                "type": "mrkdwn",
                "text": f"House of Commons debates ({data_start_date} - {data_end_date})",
            }
        ]
    }
    return [header, context]
    

def divider() -> Dict:
    """Construct a divider block"""
    return {"type": "divider"}

def _bullet_point_string(points: List[str]) -> str:
    """Construct a string of bullet points from a list of strings"""
    return "\n".join([f"• {point}" for point in points])     

def debate_summary(debate: Dict) -> Dict:
    """Construct a block for a single debate summary
    
    Args:
        debate (Dict): Dictionary with keys "title", "summary", "positives", "negatives", and "next_steps".
            For example: {
                "title": "Title of the debate",
                "purpose": "Summary of the debate",
                "positives": ["Positive point 1", "Positive point 2"],
                "negatives": ["Negative point 1", "Negative point 2"],
                "next_steps": ["Next step 1", "Next step 2"],
            }
    """
    summary = {
        "type": "section",
        "text": {
            "type": "mrkdwn",
            "text": f"*{debate['title']}*\n{debate['purpose']}."
        }
    }
    positives = {
        "type": "section",
        "text":{
             "type": "mrkdwn",
             "text": f"*Positives*\n{_bullet_point_string(debate['positives'])}"
        }
    }
    negatives = {
        "type": "section",
        "text":{
             "type": "mrkdwn",
             "text": f"*Negatives*\n{_bullet_point_string(debate['negatives'])}"
        }
    }
    next_steps = {
        "type": "section",
        "text":{
             "type": "mrkdwn",
             "text": f"*Next Steps*\n{_bullet_point_string(debate['next_steps'])}"
        }
    }
    return [summary, positives, negatives, next_steps, divider()]

def quote_block(quote: Dict) -> Dict:
    """Construct a block with a quote
    
    Args:
        quote (Dict): Dictionary with keys "name", "party", "category", "debate", and "text".
    """
    return {
        "type": "section",
        "text": {
            "type": "mrkdwn",
            "text": f"*{quote['name']}* ({quote['party']}) mentioned *{quote['category']}* in *{quote['debate']}*\n\n> {quote['text']}"
        }
    }

In [10]:

def people_party_memberships(people_dict: dict) -> pd.DataFrame:
    """Get the most recent party membership for each person
    
    Args:
        people_dict: The dictionary of people metadata from the Hansard data
    
    Returns:
        A DataFrame that includes 'person_id' and 'name_org' columns
    """
    orgs_df = pd.DataFrame(people_dict['organizations'])[['id', 'name']]
    return (
        pd.DataFrame(people_dict['memberships'])
        .sort_values('start_date', ascending=False)
        .drop_duplicates('person_id')
        .merge(orgs_df, left_on='on_behalf_of_id', right_on='id', how='left', suffixes=("_", "_org"))
    )[['person_id', 'post_id', 'start_date', 'start_reason', 'name_org']] 

def get_weekly_start_date(end_date: str, weeks:int=1) -> str:
    """Get the start date for a weekly period ending at the specified end_date
    
    Args:
        end_date: The end date of the period, in the format "YYYY-MM-DD"
        weeks: The number of weeks to go back

    Returns:
        The start date of the period, in the format "YYYY-MM-DD"
    """
    data_end_date = datetime.strptime(end_date, "%Y-%m-%d")
    weeks_ago = data_end_date - timedelta(weeks=weeks)
    return weeks_ago.strftime("%Y-%m-%d")


def get_speeches_for_period(
    debates_df: pd.DataFrame,
    labelstore_df: pd.DataFrame,
    start_date: str,
    end_date: str
) -> pd.DataFrame:
    """Get the speeches for a given period
    
    Args:
        debates_df: The DataFrame with debates
        labelstore_df: The DataFrame with labels
        start_date: The start date of the period, in the format "YYYY-MM-DD"
        end_date: The end date of the period, in the format "YYYY-MM-DD"
    """
    return (
            debates_df
            .query("date >= @start_date and date <= @end_date")
            .merge(labelstore_df[["id", "mission_labels", "topic_labels"]], left_on="speech_id", right_on="id")
            .drop_duplicates(subset=["speakername", "speech"])
            .assign(
                headings=lambda df: np.where(
                    df.minor_heading.notna() & (df.minor_heading != ""),  # Check if minor_heading is not empty
                    df.major_heading.fillna("") + ": " + df.minor_heading,  # Combine both
                    df.major_heading.fillna("")  # Use only major_heading
                )
            )
            .merge(parties_df[['person_id', 'name_org']], on='person_id', how='left', suffixes=('', '_person'))
        )

def get_debates_headings(debates_df: pd.DataFrame) -> pd.DataFrame:
    """Get the counts of debate speeches by major and minor headings"""
    return (
        debates_df
        .fillna({"major_heading": "", "minor_heading": ""})
        .groupby(['date', 'major_heading', 'minor_heading', 'headings'])
        .agg(counts=('speech_id', 'count'))
        .sort_values('date')
        .reset_index() 
    )

def get_debates_major_headings(debates_df: pd.DataFrame) -> pd.DataFrame:
    """Get the counts of debate speeches by major headings"""
    return (
        debates_df
        .groupby(["date", "major_heading"])
        .agg(counts=("speech_id", "count"))
        .sort_values("date")
        .reset_index()
    )


def get_debate_text(debates_df: pd.DataFrame, debate_title: str) -> str:
    """Get the text of a debate given its title
    
    Args:
        debates_df: The DataFrame with debates
        debate_title: The title of the debate
    
    Returns:
        The text of the debate
    """
    _debate = (
        debates_df
        .query("major_heading == @debate_title")
        .sort_values('speech_id')
        .to_dict(orient='records')
    )
    debate_text = debate_title + "\n-----\n"
    for speech in _debate:
        debate_text += f"{speech['speakername']} ({speech['name_org']})" + "\n"
        debate_text += speech['speech'] + "\n"
        debate_text += "-----" + "\n"
    return debate_text


def relevance_check(df: pd.DataFrame, threshold: int = 10, filter: Literal[None, 'relevant', 'not relevant']=None) -> pd.DataFrame:
    """Filter speeches by relevance threshold"""
    df = (
        df
        .assign(relevant = lambda df: df['counts'] >= threshold)
    )
    if filter == 'relevant':
        return df.query("relevant")
    elif filter == 'not relevant':
        return df.query("not relevant")
    else:
        return df

In [172]:
import re

def get_keyword_hits(speech: str, keywords_dict: dict) -> Tuple[List, List, List]:
    """Get keywords and sentences where they appear in a speech"""
    hits_keywords = []
    hits_sentences = []
    hits_categories = []
    marked_sentences = []
    sents = keywords.split_sentences([speech], ids=[0])[0]

    for cat in keywords_dict:
        for kw in keywords_dict[cat]:
            hits = keywords.find_keyword_hits(kw, sents)
            for i, hit in enumerate(hits):
                if hit:
                    hits_keywords.append(kw)
                    hits_sentences.append(sents[i])
                    hits_categories.append(cat)

                    # Add asterisks around the full words containing the matched keyword
                    marked_sentence = sents[i]
                    for keyword in kw:
                        # Regex to find substrings and expand to full words
                        pattern = r'\b(\S*' + re.escape(keyword) + r'\S*)\b'
                        marked_sentence = re.sub(pattern, r'*\1*', marked_sentence)
                    
                    marked_sentences.append(marked_sentence)   
                
    # print(f"Hit for {hits_keywords} in sentences {hits_sentences}")
    return hits_categories, hits_keywords, marked_sentences

In [97]:
from datetime import datetime, timedelta

def get_all_fridays_last_month(today):
    # Convert today's date to a datetime object
    today = datetime.strptime(today, "%Y-%m-%d")

    # Get the first day of the current month
    first_day_this_month = today.replace(day=1)

    # Get the last day of the previous month
    last_day_previous_month = first_day_this_month - timedelta(days=1)

    # Get the first day of the previous month
    first_day_previous_month = last_day_previous_month.replace(day=1)

    # Find all Fridays in the previous month
    fridays = []
    current_date = first_day_previous_month
    while current_date <= last_day_previous_month:
        if current_date.weekday() == 4:  # 4 corresponds to Friday
            fridays.append(current_date)
        current_date += timedelta(days=1)
    fridays = [date.strftime("%Y-%m-%d") for date in fridays]

    return fridays

# Get all Fridays of the past month
fridays_last_month = get_all_fridays_last_month("2024-11-03")

# Display the result
fridays_last_month

['2024-10-04', '2024-10-11', '2024-10-18', '2024-10-25']

In [11]:
parties_df = people_party_memberships(people_dict)

In [109]:
keywords_dict = {}
for mission in ['ASF', 'AFS', 'AHL']:
    keywords_dict[mission] = keywords.get_keywords(mission)

In [110]:
# for message_date in fridays_last_month:
#     mission = 'AHL'
#     # message_date = datetime.now().strftime("%Y-%m-%d")
#     # message_date = "2024-10-10"
#     # message_date = "2024-11-10"
#     data_end_date = message_date
#     data_start_date = get_weekly_start_date(data_end_date, weeks=1)

#     # Get the speeches of the preceding week
#     weekly_speeches_df = get_speeches_for_period(
#         debates_df=debates_df,
#         labelstore_df=labelstore_df,
#         start_date=data_start_date,
#         end_date=data_end_date
#     )
#     # Select only debates related to one of the missions
#     mission_debates_df = weekly_speeches_df.query("mission_labels == @mission")
#     # Get the major headings
#     mission_debates_major_headings_df = get_debates_major_headings(mission_debates_df)
#     # Filter the debates by relevance (simple threshold)
#     debates_to_summarise_df = relevance_check(mission_debates_major_headings_df, filter='relevant')
#     # Get unique debate titles
#     debate_titles = debates_to_summarise_df.major_heading.unique()

#     print(message_date)
#     print(debate_titles)

In [177]:
# mission = 'ASF'
mission = 'AFS'
# message_date = datetime.now().strftime("%Y-%m-%d")
# message_date = "2024-11-08"
message_date = "2024-11-22"
# message_date = "2024-10-10"
data_end_date = message_date
data_start_date = get_weekly_start_date(data_end_date, weeks=1)

# Get the speeches of the preceding week
weekly_speeches_df = get_speeches_for_period(
    debates_df=debates_df,
    labelstore_df=labelstore_df,
    start_date=data_start_date,
    end_date=data_end_date
)
# Select only debates related to one of the missions
mission_debates_df = weekly_speeches_df.query("mission_labels == @mission")
# Get the major headings
mission_debates_major_headings_df = get_debates_major_headings(mission_debates_df)
# Filter the debates by relevance (simple threshold)
debates_to_summarise_df = relevance_check(mission_debates_major_headings_df, filter='relevant')
# Get unique debate titles
debate_titles = debates_to_summarise_df.major_heading.unique()
debates_to_summarise_df

Unnamed: 0,date,major_heading,counts,relevant
1,2024-11-18,Children’s Social Care,12,True


In [178]:
importlib.reload(synthesis_utils);

In [179]:
debate_dicts = []
for debate_title in debate_titles:
    logging.info(f"Summarising debate: {debate_title}")
    # Prepare the debate text
    debate_text = get_debate_text(weekly_speeches_df, debate_title)
    debate_instance = synthesis_utils.Debate(heading=debate_title, content=debate_text)   
    # Call LLM
    result = synthesis_utils.summarise_debate_with_structure(debate_instance, None)
    # Prepare the debate dictionary
    debate_dict = result.model_dump()
    debate_dict['title'] = debate_title
    debate_dicts.append(debate_dict)


2024-12-04 11:51:47,229 - root - INFO - Summarising debate: Children’s Social Care


In [180]:
# Debates with few mentions
quotes = []
debates_not_relevant_df = relevance_check(mission_debates_major_headings_df, filter='not relevant')

for debate_title in debates_not_relevant_df.major_heading.to_list():
    df = mission_debates_df.query("major_heading == @debate_title")
    for _, row in df.iterrows():
        cat_hits, kw_hits, sentences = get_keyword_hits(row['speech'], keywords_dict[mission])
        text = " .. ".join(sentences)

        quote = {
            "name": row['speakername'],
            "party": row['name_org'],
            "category": row['topic_labels'],
            "debate": debate_title,
            "text": text,
            "keywords": kw_hits
        }
        quotes.append(quote)



In [182]:
# quotes

In [183]:
debate_blocks = [debate_summary(debate) for debate in debate_dicts]
debate_blocks = [item for sublist in debate_blocks for item in sublist]

quote_blocks = [quote_block(quote) for quote in quotes]

blocks = message_header(message_date, data_start_date, data_end_date)
blocks += [mission_header(mission)]
blocks += debate_blocks
blocks += quote_blocks

In [184]:
# pretty print the blocks
import json

print(json.dumps(blocks, indent=2))

[
  {
    "type": "header",
    "text": {
      "type": "plain_text",
      "text": "Policy update 2024-11-22"
    }
  },
  {
    "type": "context",
    "elements": [
      {
        "type": "mrkdwn",
        "text": "House of Commons debates (2024-11-15 - 2024-11-22)"
      }
    ]
  },
  {
    "type": "section",
    "text": {
      "type": "mrkdwn",
      "text": ":hatched_chick: *A Fairer Start*"
    }
  },
  {
    "type": "section",
    "text": {
      "type": "mrkdwn",
      "text": "*Children\u2019s Social Care*\nTo discuss the Government's plans for reforming children's social care, focusing on improving outcomes for vulnerable children and addressing issues of profiteering in the sector.."
    }
  },
  {
    "type": "section",
    "text": {
      "type": "mrkdwn",
      "text": "*Positives*\n\u2022 Labour's commitment to early intervention and family support (Phillipson, Lab)\n\u2022 Investment of \u00a390 million to expand capacity for children in care (Phillipson, Lab)\n\u202

In [92]:
importlib.reload(synthesis_utils);

In [185]:
# Send the message with blocks
response = slack_webhook.send(blocks=blocks)

In [156]:
response.status_code

200

In [613]:
# add title as another key


In [514]:
# pd.set_option('display.max_colwidth', 100)
# get_speeches_for_period(
#     debates_df=debates_df,
#     labelstore_df=labelstore_df,
#     start_date=get_weekly_start_date(end_date, weeks=1),
#     end_date=end_date
# )[['headings']]

In [492]:
mission = 'ASF'
# end_date = "2024-11-08"
end_date = "2024-10-10"
# end_date = "2024-11-14"

# Convert the string to a datetime object
end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")
# Subtract one week (7 days)
one_week_ago = end_date_dt - timedelta(weeks=1)
# Convert back to string format if needed
start_date = one_week_ago.strftime("%Y-%m-%d")



mission_debates = weekly_speeches_df.query("mission_labels == @mission")
mission_debates = mission_debates.merge(parties_df[['person_id', 'name_org']], on = 'person_id', how='left', suffixes=('', '_person'))
debates_dates = mission_debates[['date', 'major_heading']].drop_duplicates().sort_values('date')

debate_titles_df = mission_debates.groupby(["major_heading"]).agg(counts=("speech_id", "count")).reset_index()
debates_dates = debates_dates.merge(debate_titles_df, on="major_heading", how="left").sort_values('date')
debates_dates

Unnamed: 0,date,major_heading,counts
0,2024-10-07,"Carbon Capture, Usage and Storage",15
1,2024-10-08,Energy Security and Net Zero,25
2,2024-10-08,Farming and Food Security,1
3,2024-10-09,Prime Minister,2
4,2024-10-10,Business of the House,2


In [421]:
for debate in debates_dates.major_heading.to_list():
    _weekly_speeches_df = weekly_speeches_df.merge(parties_df[['person_id', 'name_org']], on = 'person_id', how='left', suffixes=('', '_person'))
    debs = _weekly_speeches_df.query("major_heading == @debate").sort_values('speech_id').to_dict(orient='records')
    debate_text = ""
    for s in debs:
        debate_text = debate_text + f"{s['speakername']} ({s['name_org']})" + "\n"
        debate_text = debate_text + s['speech'] + "\n"
        debate_text = debate_text + "-----" + "\n"    
    debate_instance = synthesis_utils.Debate(heading=debate, content=debate_text)    
    is_relevant = synthesis_utils.is_debate_relevant(debate_instance, synthesis_utils.config['mission_statements']['ASF'])['is_relevant']
    if is_relevant:
        result = synthesis_utils.generate_direct_summary(debate_instance, None)
        # result_simple = synthesis_utils.generate_simple_summary(debate_instance, None)
        # nesta_result = synthesis_utils.generate_nesta_summary(debate_instance)
        print(debate)
        print("------")
        print("Summary")
        print("------")
        print(result.summary)
        # print("------")
        # print("Simple summary")
        # print("------")
        # print(result_simple.summary)
        # print("------")        
        # print("What is relevant to Nesta?")
        # print("------")
        # print(nesta_result.summary)
        print("======")
        print("======")
    

Carbon Capture, Usage and Storage
------
Summary
------
Main purpose: Discussion on the UK Government's carbon capture programme and its implications for energy and jobs.

Positives: £21.7 billion funding for carbon capture projects (Miliband, Lab). Creation of 4,000 jobs in industrial heartlands (Miliband, Lab). Commitment to decarbonising hard-to-abate sectors (Miliband, Lab). Support for existing industries and clean energy transition (Heylings, Lib Dem).

Negatives: Criticism of previous government’s inaction on carbon capture (Coutinho, Con). Concerns over job losses in steel and energy sectors (Coutinho, Con). Doubts about the effectiveness of carbon capture technology (Adrian, Green). Potential increase in global emissions due to imports (Coutinho, Con).

Next steps: Further announcements on carbon capture sites expected (Miliband, Lab). Engagement with stakeholders on job creation and training (Miliband, Lab). Commitment to support Acorn project in Scotland (Doogan, SNP).
Energ

## Keyword quotes

In [445]:
debates_with_mentions_df = debates_dates[debates_dates.counts < 10]

In [446]:
debates_with_mentions_df

Unnamed: 0,date,major_heading,counts
2,2024-10-08,Farming and Food Security,1
3,2024-10-09,Prime Minister,2
4,2024-10-10,Business of the House,2


In [461]:
_weekly_speeches_df = _weekly_speeches_df.assign(ordering = list(range(len(_weekly_speeches_df))))

In [467]:
debate = debates_with_mentions_df.iloc[2].major_heading
df = (
    _weekly_speeches_df
    .query("major_heading == @debate")
    .query("@mission in mission_labels")
)
_weekly_speeches_df.index
# for each debate get the speech and one speech before
df = df.sort_values('speech_id')
for i in range(len(df)):
    speech_id = df.iloc[i].speech_id
    speech = _weekly_speeches_df.query("speech_id == @speech_id").iloc[0]
    speech_index = speech.ordering
    # get the previous speech
    speech_previous = _weekly_speeches_df.query("ordering == @speech_index - 1").iloc[0]
    speech_next = _weekly_speeches_df.query("ordering == @speech_index + 1").iloc[0]
    print("------") 
    print(speech_previous.speakername)   
    print(speech_previous.speech)
    print("------")  
    print(speech.speakername)  
    print(speech.speech)
    print("------")    
    print(speech_next.speakername)  
    print(speech_next.speech)
    print("------")        

------
Lucy Powell
The hon. Member would not expect me to comment on the individual cases that he outlines. We do have Home Office questions coming up on 21 October and I am sure that he can raise those matters there. One thing that we have found since the election is that morale and leadership in our police forces are not what they could be, which is why the Home Secretary is  embarking on a recruitment drive and reforms in our police force to ensure that we have all the police that we need.
------
Chris Murray
Around half a million people rely on heat networks, which is good in achieving our climate goals, but heat networks are not included in the Ofgem price cap. My constituents in Craigmillar are therefore facing a 400% increase in their energy bills. That is terrifying for them and completely unacceptable. Heat networks should have the same protections as traditional heating systems. Will the Leader of the House allow a debate in Government time so that we can address this terribl

In [469]:
keywords_df = keywords.get_keywords(mission)

In [471]:
text = "Around half a million people rely on heat networks, which is good in achieving our climate goals, but heat networks are not included in the Ofgem price cap. My constituents in Craigmillar are therefore facing a 400% increase in their energy bills. That is terrifying for them and completely unacceptable. Heat networks should have the same protections as traditional heating systems. Will the Leader of the House allow a debate in Government time so that we can address this terrible anomaly?"

In [480]:
sents = keywords.split_sentences([text], ids=[0])
sents = sents[0]

In [481]:
for cat in keywords_df:
    for kw in keywords_df[cat]:
        hits = keywords.find_keyword_hits(kw, sents)
        for i, hit in enumerate(hits):
            if hit:
                print(f"Hit for {kw} in sentence {sents[i]}")

Hit for ['heat network'] in sentence Around half a million people rely on heat networks, which is good in achieving our climate goals, but heat networks are not included in the Ofgem price cap.


In [470]:
# Write a function that finds sentences that contain the keywords 
# and extracts them
keywords_df

{'Bioenergy': [['bioenergy']],
 'Biomass heating': [['biomass boiler'], ['biomass heat']],
 'Built environment': [['decarbon', 'build'],
  ['decarbon', 'built'],
  ['decarbon', 'built environment'],
  ['decarbon', 'home'],
  ['decarbon', 'house'],
  ['low carbon', 'build'],
  ['low carbon', 'built'],
  ['low carbon', 'built environment'],
  ['low carbon', 'home'],
  ['low carbon', 'house']],
 'CCUS': [['carbon capture'], ['carbon capture', 'storage']],
 'Decarbonisation - General': [['climate tech'],
  ['green tech'],
  ['net zero material'],
  ['sustainability']],
 'District heating': [['district', 'heat'], ['heat network']],
 'Energy efficiency': [['energy efficiency', 'build'],
  ['energy efficiency', 'built'],
  ['energy efficiency', 'home'],
  ['energy efficiency', 'house'],
  ['energy management'],
  ['insulation', 'build'],
  ['insulation', 'home'],
  ['insulation', 'house'],
  ['retrofit'],
  ['smart meter'],
  ['smart thermostat']],
 'Energy grid': [['demand response'], ['elec

# Sending stuff to Slack


In [486]:
message_date = "03-12-2024"
data_start_date = "01-12-2024"
data_end_date = "03-12-2024"
debate =  {
    "title": "Title of the debate",
    "summary": "Summary of the debate",
    "positives": ["Positive point 1", "Positive point 2"],
    "negatives": ["Negative point 1", "Negative point 2"],
    "next_steps": ["Next step 1", "Next step 2"],
}

quote = {
    "name": "John Doe",
    "party": "Labour",
    "category": "Climate change",
    "debate": "Title of the debate",
    "text": "This is a quote from the debate."
}

KeyError: 'SLACK_WEBHOOK_URL_TESTING'