In [1]:
import pandas as pd
from pathlib import Path
from climate_mrp_polling.convert_polling import convert_polling_from_con_to_la

# this moved into convert_specific_polling.py

In [2]:
polling_df = (
    pd.read_excel(
        str(
            Path(
                "..",
                "data",
                "raw",
                "polling",
                "RenewableUK-MRP-Constituency-Topline.xlsx",
            )
        )
    )
    .drop(columns=["Variable", "Name"])
    .rename(columns={"Group": "PCON2010"})
    .set_index("PCON2010")
)

polling_df = polling_df / 100
polling_df = polling_df.reset_index()

df = convert_polling_from_con_to_la(polling_df)

In [3]:
melted_df = df.melt(
    id_vars=["local-authority-code", "official-name"],
    value_name="percentage",
    var_name="question",
)

Unnamed: 0,local-authority-code,official-name,question,percentage
0,ABD,Aberdeenshire Council,1) Percentage of constituency who are consider...,0.366647
1,ABE,Aberdeen City Council,1) Percentage of constituency who are consider...,0.383017
2,ADU,Adur District Council,1) Percentage of constituency who are consider...,0.360000
3,AGB,Argyll and Bute Council,1) Percentage of constituency who are consider...,0.360000
4,AGY,Isle of Anglesey County Council,1) Percentage of constituency who are consider...,0.490000
...,...,...,...,...
4961,WYCA,West Yorkshire Combined Authority,10) Percentage of constituency that believe th...,0.241664
4962,WYE,Wyre Forest District Council,10) Percentage of constituency that believe th...,0.230000
4963,WYR,Wyre Borough Council,10) Percentage of constituency that believe th...,0.228425
4964,YOR,City of York Council,10) Percentage of constituency that believe th...,0.217397


In [6]:
df = pd.read_excel(
    str(
        Path(
            "..",
            "data",
            "raw",
            "polling",
            "Public-First-Poll-for-Onward-MRP-Model.xlsx",
        )
    ),
    sheet_name="MRP Results",
)
valid_columns = [
    c
    for c in df.columns
    if (c == "Constituency" or c.startswith("Q")) and not c.endswith("Winner")
]
df = df[valid_columns]


def sort_name(s: str) -> str:
    """
    Rough function to convert constituency names to a standard format
    """

    s = s.replace("Na h-Eileanan An Iar (Western Isles)", "Na h-Eileanan an Iar")
    s = s.replace(" (Yorks)", "")
    s = s.replace("Môn", "Mon")
    s = s.replace("-", " ").lower()
    s = s.replace(" & ", " and ")
    s = s.replace(" of ", " ")
    s = s.replace(" the ", " ")
    s = s.replace(",", "")
    s = s.replace(" st ", " saint ")
    s = s.replace(" st.", " saint ")
    s = s.replace(" st,", " saint ")
    s = s.replace(" of", " ")
    s = s.replace("kingston upon hull", "hull")
    s = s.replace(")", "")
    s = s.replace("(", "")

    while "  " in s:
        s = s.replace("  ", " ")

    l = s.strip().split(" ")
    l.sort()
    return " ".join(l)


lookup = pd.read_csv(
    Path(
        "..",
        "data",
        "raw",
        "Westminster_Parliamentary_Constituencies_(Dec_2020)_Names_and_Codes_in_the_United_Kingdom.csv",
    )
)
lookup["PCON20NM"] = lookup["PCON20NM"].apply(sort_name)

lookup = lookup.set_index("PCON20NM")["PCON20CD"].to_dict()

df["PCON2010"] = df["Constituency"].apply(sort_name).apply(lambda x: lookup[x])

# move the constituency column to the front
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

# drop constituency name
df = df.drop(columns=["Constituency"])

df = convert_polling_from_con_to_la(df)

melted_df = df.melt(
    id_vars=["local-authority-code", "official-name"],
    value_name="percentage",
    var_name="question",
)
melted_df["source"] = "Onward2022"
# move to front
cols = melted_df.columns.tolist()
cols = cols[-1:] + cols[:-1]
melted_df = melted_df[cols]


melted_df

Unnamed: 0,source,local-authority-code,official-name,question,percentage
0,Onward2022,ABD,Aberdeenshire Council,Q01_CON,0.224289
1,Onward2022,ABE,Aberdeen City Council,Q01_CON,0.153876
2,Onward2022,ADU,Adur District Council,Q01_CON,0.232528
3,Onward2022,AGB,Argyll and Bute Council,Q01_CON,0.190588
4,Onward2022,AGY,Isle of Anglesey County Council,Q01_CON,0.181368
...,...,...,...,...,...
37049,Onward2022,WYCA,West Yorkshire Combined Authority,Q08_DK,0.201779
37050,Onward2022,WYE,Wyre Forest District Council,Q08_DK,0.178992
37051,Onward2022,WYR,Wyre Borough Council,Q08_DK,0.184496
37052,Onward2022,YOR,City of York Council,Q08_DK,0.178778


In [10]:
guide_df = pd.read_excel(
    str(
        Path(
            "..",
            "data",
            "raw",
            "polling",
            "Public-First-Poll-for-Onward-MRP-Model.xlsx",
        )
    ),
    sheet_name="GUIDE",
)

party_qs = guide_df[["Key", "Question"]].head(11)

party_as = guide_df[["Answer Key", "Answer Options"]].head(8)
new_keys = {}
for i, row in party_qs.iterrows():
    for ia, answer_row in party_as.iterrows():
        key = row["Key"] + "_" + answer_row["Answer Key"]
        answer = row["Question"] + " -- " + answer_row["Answer Options"]
        new_keys[key] = answer

        # start at row 14
guide_df = pd.read_excel(
    str(
        Path(
            "..",
            "data",
            "raw",
            "polling",
            "Public-First-Poll-for-Onward-MRP-Model.xlsx",
        )
    ),
    sheet_name="GUIDE",
    skiprows=15,
)

# in Question column, fill a nan value with the previous value in the column
guide_df["Key"] = guide_df["Key"].fillna(method="ffill")
guide_df["Question"] = guide_df["Question"].fillna(method="ffill")
guide_df["Real answer"] = guide_df["Question"] + " -- " + guide_df["Answer Options"]
new_dict = guide_df.set_index("Answer Key")["Real answer"].to_dict()
new_keys.update(new_dict)
# convert to a dataframe
new_keys = pd.DataFrame.from_dict(new_keys, orient="index").reset_index()
new_keys.columns = ["short", "question"]
new_keys["source"] = "Onward2022"
# rearrange to source,question,short
new_keys = new_keys[["source", "question", "short"]]
new_keys

Unnamed: 0,Key,Question,Answer Key,Answer Options
0,Q01,"Voting Intention (VI) - ""And, if a general ele...",CON,Conservative
1,Q03,VI With Pledge “If I was made Conservative Par...,LAB,Labour
2,Q04,VI with pledge “If I was made Conservative Par...,LIB,Liberal Democrat
3,Q05,VI with pledge “If I was made Conservative Par...,Reform,Reform UK
4,Q06,VI with pledge “If I was made Conservative Par...,Green,Green
5,Q09,"VI Drop NZ - ""If there was a general election ...",NAT,Plaid Cymru/SNP
6,Q10,"VI if - ""The Conservative Party makes a commit...",OTH,Another party
7,Q11,"VI if - ""The Conservative Party makes a commit...",ZNV,I would not vote
8,Q12,"VI If - ""The Conservative Party makes a commit...",,
9,Q13,"VI If - ""The Conservative Party makes a commit...",,


Unnamed: 0,Answer Key,Answer Options
0,CON,Conservative
1,LAB,Labour
2,LIB,Liberal Democrat
3,Reform,Reform UK
4,Green,Green
5,NAT,Plaid Cymru/SNP
6,OTH,Another party
7,ZNV,I would not vote


Unnamed: 0,source,question,short
0,Onward2022,"Voting Intention (VI) - ""And, if a general ele...",Q01_CON
1,Onward2022,"Voting Intention (VI) - ""And, if a general ele...",Q01_LAB
2,Onward2022,"Voting Intention (VI) - ""And, if a general ele...",Q01_LIB
3,Onward2022,"Voting Intention (VI) - ""And, if a general ele...",Q01_Reform
4,Onward2022,"Voting Intention (VI) - ""And, if a general ele...",Q01_Green
...,...,...,...
92,Onward2022,"""How seriously do you take the issue of climat...",Q07_High
93,Onward2022,"""How seriously do you take the issue of climat...",Q07_DK
94,Onward2022,"""Which of the following comes closest to your ...",Q08_Keep
95,Onward2022,"""Which of the following comes closest to your ...",Q08_Scrap
