# Voting records storage

This notebook creates a csv with today's date of all the current policy positions and calculated party distance of MPs in a local instance of TWFY.

This is meant to be run before and after making changes to the system of voting records comparison to evaulate how changes the information that is displayed.

In [1]:
import pandas as pd
import urllib.request
from bs4 import BeautifulSoup
from pathlib import Path
from datetime import date
from dataclasses import dataclass
test_url = "http://127.0.0.1:8000/mps/"
twfy_instance = "http://127.0.0.1:8000"


In [2]:

def get_text(v):
    if v:
        return v.get_text().strip()
    else:
        return v

@dataclass
class TWFYAnalysis:
    twfy_instance: str

    def get_mps(self):

        mps_page_url = self.twfy_instance + "/mps/"
        print(mps_page_url)
        html = urllib.request.urlopen(mps_page_url).read()

        soup = BeautifulSoup(html)

        urls = [x.attrs["href"] for x in soup.find_all("a", {"class":"people-list__person"})]
        return urls

    def get_df_from_mp(self, url: str):
        """
        extract policy from mp page
        """
        _, _, mp_id, mp_name, mp_con = url.split("/")
        print(mp_name)
        policy_page = self.twfy_instance + url + "/votes"

        html = urllib.request.urlopen(policy_page).read()

        soup = BeautifulSoup(html)
        total = []
        for li in soup.find_all("li", {"class":"vote-description"}):

            data_attributes = ([x for x in li.attrs if "data-" in x])
            results = {x.replace("data-",""):li.attrs[x] for x in data_attributes}
            results["evidence"] = get_text(li.find("a", {"class":"vote-description__evidence"}))
            results["mp_id"] = mp_id
            results["mp_name"] = mp_name
            total.append(results)
        
        if total:
            df = pd.DataFrame(total)
            df = df.sort_values("policy-id")
            print (len(df))
            return df
        else:
            return None


    def get_all_votes_for_mps(self):
        mp_urls = self.get_mps()
        print(len(mp_urls))
        mp_urls = mp_urls
        dfs = [self.get_df_from_mp(url) for url in mp_urls]
        dfs = [x for x in dfs if x is not None]
        return pd.concat(dfs)


    def get_today_date(self):
        today = date.today().isoformat()
        df = self.get_all_votes_for_mps().sort_values("mp_id")
        df.to_csv(Path("data", "policy", f"mp_policy_scores_{today}.csv" ), index=False)


ta = TWFYAnalysis(twfy_instance)
ta.get_today_date()

http://127.0.0.1:8000/mps/
650
diane_abbott
99
debbie_abrahams
89
nigel_adams
93
bim_afolami
31
adam_afriyie
96
nickie_aiken
11
peter_aldous
93
rushanara_ali
93
tahir_ali
11
lucy_allan
60
rosena_allin-khan
44
mike_amesbury
31
sir_david_amess
99
fleur_anderson
11
lee_anderson
11
stuart_anderson
11
stuart_andrew
93
caroline_ansell
50
tonia_antoniazzi
31
edward_argar
60
jon_ashworth
87
sarah_atherton
11
victoria_atkins
60
gareth_bacon
11
richard_bacon
99
kemi_badenoch
31
shaun_bailey
11
siobhan_baillie
11
duncan_baker
11
steven_baker
93
harriett_baldwin
93
steve_barclay
93
hannah_bardell
60
paula_barker
11
john_baron
99
simon_baynes
11
margaret_beckett
99
orfhlaith_begley
apsana_begum
11
aaron_bell
11
hilary_benn
99
scott_benton
11
paul_beresford
99
jake_berry
93
clive_betts
99
saqib_bhatti
11
mhairi_black
60
ian_blackford
60
bob_blackman
93
kirsty_blackman
60
olivia_blake
11
paul_blomfield
93
crispin_blunt
99
peter_bone
96
steven_bonnar
11
peter_bottomley
99
andrew_bowie
31
ben_bradley
3