In [None]:
import json
import os
from itertools import cycle, islice
from pathlib import Path
from textwrap import fill

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import spacy
from ipywidgets import interact
from wordcloud import WordCloud

nlp = spacy.load("en_core_web_md")

In [None]:
# Set filepaths
ABSOLUTE_HERE = Path(os.getcwd()).parent
processed_data_dir = ABSOLUTE_HERE.parent.joinpath("data/processed")
data_filepath = processed_data_dir.joinpath("survey-responses.csv")
questions_filepath = processed_data_dir.joinpath("questions_metadata.json")

In [None]:
# Load datasets
with open(questions_filepath) as stream:
    question_metadata = json.load(stream)

data = pd.read_csv(data_filepath, index_col="Voter")

# Find filter columns
filters = ["All"] + question_metadata[0]["choices"]

In [None]:
def generate_colors(df):
    hex_colours = ["#e66581", "#5799c9", "#f5a252", "#9ebd6e", "#e1f0c4"]
    return list(islice(cycle(hex_colours), None, len(df)))

In [None]:
def autolabel(rects, ax):
    for rect in rects:
        x = rect.get_x() + (rect.get_width() / 2.0)
        y = rect.get_height()
        ax.annotate(
            f"{y}",
            (x, y),
            xytext=(0, 5),
            textcoords="offset points",
            ha="center",
            va="bottom",
        )

### Question 1

In [None]:
question = question_metadata[0]["question"]
choices = [
    "Other" if "Other - please share in the document" in choice else choice
    for choice in question_metadata[0]["choices"]
]

plt.figure(figsize=(12, 8))
hist = data[question].value_counts()

for choice in choices:
    if choice not in hist.index:
        hist = pd.concat([hist, pd.Series(0, index=[choice])])

hist.fillna(0, inplace=True)
hist = hist.reindex(choices)

ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

xlabels = [fill(x, 20) for x in choices]
ax.set_xticklabels(xlabels)

ax.set_title(question, fontsize=14, y=1.08)
ax.set_yticks([])
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)

autolabel(ax.patches, ax)

plt.show();

### Question 2

In [None]:
@interact
def plot_q2(group=filters):
    question = question_metadata[1]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[1]["choices"]
    ]
    question_cols = [col for col in data.columns if question in col]
    new_series = pd.Series(dtype=float)

    if group != "All":
        for i, col in enumerate(question_cols):
            tmp_series = pd.Series(
                {
                    choices[i]: data[col][
                        data[question_metadata[0]["question"]] == group
                    ].mean()
                }
            )
            new_series = pd.concat([new_series, tmp_series])
    else:
        for i, col in enumerate(question_cols):
            tmp_series = pd.Series(data={choices[i]: data[col].mean()})
            new_series = pd.concat([new_series, tmp_series])

    y_pos = np.arange(len(choices))
    new_series.fillna(0, inplace=True)

    _, ax1 = plt.subplots(figsize=(12, 8))

    hbars = ax1.barh(
        y_pos,
        new_series.sort_values(ascending=True).values,
        color=generate_colors(new_series)[::-1],
    )

    ax1.set_title(question, fontsize=14)
    ax1.set_xlim(0.5, 5.5)
    ax1.set_xticks([1, 2, 3, 4, 5])
    ax1.set_xlabel("")
    ax1.set_yticks([])
    ax1.set_ylabel("Strongly Disagree", fontsize=18)
    ax1.spines["top"].set_visible(False)

    ax1.bar_label(hbars, padding=8, fontsize=14)
    ax1.bar_label(
        hbars,
        labels=new_series.sort_values(ascending=True).index,
        label_type="center",
        color="w",
        fontsize=14,
        padding=8,
    )

    ax2 = ax1.twinx()
    ax2.set_yticks([])
    ax2.set_ylabel("Strongly Agree", fontsize=18)
    ax2.spines["top"].set_visible(False)

    plt.show();

### Question 3

In [None]:
@interact
def plot_q3(group=filters):
    question = question_metadata[2]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[2]["choices"]
    ]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices)

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]
    ax.set_xticklabels(xlabels)

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

### Question 4

In [None]:
@interact
def plot_q4(group=filters):
    question = question_metadata[3]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[3]["choices"]
    ]
    question_cols = [col for col in data.columns if question in col]
    column_mapping = {col: col.split(":")[-1].strip() for col in question_cols}

    if group != "All":
        df = data[data[question_metadata[0]["question"]] == group].loc[:, question_cols]
    else:
        df = data.loc[:, question_cols]

    df.rename(columns=column_mapping, inplace=True)
    new_series = pd.Series(dtype=float)

    for col in df.columns:
        tmp_series = pd.Series({col: len(choices) - df[col].mean()})
        new_series = pd.concat([new_series, tmp_series])

    for choice in choices:
        if choice not in df.columns:
            tmp_series = pd.Series({choice: 0})
            new_series = pd.concat([new_series, tmp_series])

    y_pos = np.arange(len(choices))
    new_series.fillna(0, inplace=True)

    _, ax1 = plt.subplots(figsize=(12, 8))

    hbars = ax1.barh(
        y_pos,
        new_series.sort_values(ascending=True).values,
        color=generate_colors(new_series)[::-1],
    )

    ax1.set_title(question, fontsize=14)
    ax1.set_xlim(0, len(choices))
    ax1.set_xticks([])
    ax1.set_xlabel("")
    ax1.set_yticks([])
    ax1.spines["top"].set_visible(False)
    ax1.spines["right"].set_visible(False)
    ax1.spines["bottom"].set_visible(False)

    ax1.bar_label(hbars, padding=8, fontsize=14)
    ax1.bar_label(
        hbars,
        labels=new_series.sort_values(ascending=True).index,
        label_type="center",
        color="w",
        fontsize=14,
        padding=8,
    )

    plt.show();

### Question 5

In [None]:
@interact
def plot_q5(group=filters):
    question = question_metadata[4]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[4]["choices"]
    ]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices)

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_xticklabels(xlabels)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

### Question 6

In [None]:
@interact
def plot_q6(group=filters):
    answer_embs = []

    question = question_metadata[5]["question"]
    question_cols = [col for col in data.columns if question in col]

    if group != "All":
        df = data[question_cols][data[question_metadata[0]["question"]] == group]
    else:
        df = data[question_cols]

    for i, row in df.iterrows():
        answers = row.dropna().values
        for answer in answers:
            if isinstance(answer, str) and len(answer) > 3:
                answer_emb = nlp(answer).vector
                answer_embs.append([i, answer, answer_emb])

    comment_words = " ".join([x[1] for x in answer_embs])

    plt.figure(figsize=(8, 8), facecolor=None)
    plt.title(question, fontsize=16, y=1.08)
    plt.axis("off")

    if comment_words:
        wordcloud = WordCloud(
            width=800, height=800, background_color="white", min_font_size=10
        ).generate(comment_words)

        plt.imshow(wordcloud)
        plt.tight_layout(pad=0)
    else:
        plt.tight_layout(pad=0);

### Question 7

In [None]:
@interact
def plot_q7(group=filters):
    answer_embs = []

    question = question_metadata[6]["question"]
    question_cols = [col for col in data.columns if question in col]

    if group != "All":
        df = data[question_cols][data[question_metadata[0]["question"]] == group]
    else:
        df = data[question_cols]

    for i, row in df.iterrows():
        answers = row.dropna().values
        for answer in answers:
            if isinstance(answer, str) and len(answer) > 3:
                answer_emb = nlp(answer).vector
                answer_embs.append([i, answer, answer_emb])

    comment_words = " ".join([x[1] for x in answer_embs])

    plt.figure(figsize=(8, 8), facecolor=None)
    plt.title(question, fontsize=16, y=1.08)
    plt.axis("off")

    if comment_words:
        wordcloud = WordCloud(
            width=800, height=800, background_color="white", min_font_size=10
        ).generate(comment_words)

        plt.imshow(wordcloud)
        plt.tight_layout(pad=0)
    else:
        plt.tight_layout(pad=0);

### Question 8

In [None]:
@interact
def plot_q8(group=filters):
    question = question_metadata[7]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[7]["choices"]
    ]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices)

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_xticklabels(xlabels)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.tight_layout()
    plt.show();

### Question 9

In [None]:
@interact
def plot_q9(group=filters):
    question = question_metadata[8]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[8]["choices"]
    ]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices)

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_xticklabels(xlabels)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

### Question 10

In [None]:
@interact
def plot_q10(group=filters):
    question = question_metadata[9]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[9]["choices"]
    ]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices)

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_xticklabels(xlabels)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

### Question 11

In [None]:
@interact
def plot_q11(group=filters):
    answer_embs = []

    question = question_metadata[10]["question"]
    question_cols = [col for col in data.columns if question in col]

    if group != "All":
        df = data[question_cols][data[question_metadata[0]["question"]] == group]
    else:
        df = data[question_cols]

    for i, row in df.iterrows():
        answers = row.dropna().values
        for answer in answers:
            if isinstance(answer, str) and len(answer) > 3:
                answer_emb = nlp(answer).vector
                answer_embs.append([i, answer, answer_emb])

    comment_words = " ".join([x[1] for x in answer_embs])

    plt.figure(figsize=(8, 8), facecolor=None)
    plt.title(question, fontsize=16, y=1.08)
    plt.axis("off")

    if comment_words:
        wordcloud = WordCloud(
            width=800, height=800, background_color="white", min_font_size=10
        ).generate(comment_words)

        plt.imshow(wordcloud)
        plt.tight_layout(pad=0)
    else:
        plt.tight_layout(pad=0);

### Question 12

In [None]:
@interact
def plot_q12(group=filters):
    question = question_metadata[11]["question"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in question_metadata[11]["choices"]
    ][::-1]

    plt.figure(figsize=(12, 8))

    hist = data[question].value_counts()
    max_y = hist.max()

    if group != "All":
        hist = data[question][
            data[question_metadata[0]["question"]] == group
        ].value_counts()

    hist.fillna(0, inplace=True)
    hist = hist.reindex(choices[::-1])

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    xlabels = [fill(x, 20) for x in choices]

    ax.set_title(question, fontsize=14, y=1.08)
    ax.set_xticklabels(xlabels)
    ax.set_ylim(0, max_y)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

### Question 13

In [None]:
@interact
def plot_q13(group=filters):
    answer_embs = []

    question = question_metadata[12]["question"]
    question_cols = [col for col in data.columns if question in col]

    if group != "All":
        df = data[question_cols][data[question_metadata[0]["question"]] == group]
    else:
        df = data[question_cols]

    for i, row in df.iterrows():
        answers = row.dropna().values
        for answer in answers:
            if isinstance(answer, str) and len(answer) > 3:
                answer_emb = nlp(answer).vector
                answer_embs.append([i, answer, answer_emb])

    comment_words = " ".join([x[1] for x in answer_embs])

    plt.figure(figsize=(8, 8), facecolor=None)
    plt.title(question, fontsize=16, y=1.08)
    plt.axis("off")

    if comment_words:
        wordcloud = WordCloud(
            width=800, height=800, background_color="white", min_font_size=10
        ).generate(comment_words)

        plt.imshow(wordcloud)
        plt.tight_layout(pad=0)
    else:
        plt.tight_layout(pad=0);