In [None]:
import json
import os
from itertools import cycle, islice
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ipywidgets import interact

In [None]:
def generate_colors(df):
    hex_colours = ["#e66581", "#5799c9", "#f5a252", "#9ebd6e", "#e1f0c4"]
    return list(islice(cycle(hex_colours), None, len(df)))

In [None]:
def autolabel(rects, ax):
    for rect in rects:
        x = rect.get_x() + (rect.get_width() / 2.0)
        y = rect.get_height()
        ax.annotate(
            f"{y}",
            (x, y),
            xytext=(0, 5),
            textcoords="offset points",
            ha="center",
            va="bottom",
        )

In [None]:
def plot_choices_question(data, question, choices, num):
    plt.figure(figsize=(12, 8))
    hist = data[question].value_counts()

    for choice in choices:
        if choice not in hist.index:
            hist = pd.concat([hist, pd.Series(0, index=[choice])])

    ax = hist.plot(kind="bar", color=generate_colors(hist), rot=0)

    ax.set_title(f"Question {num + 1}: {question}", fontsize=14)
    ax.set_yticks([])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["left"].set_visible(False)

    autolabel(ax.patches, ax)

    plt.show();

In [None]:
def plot_scales_question(data, question, choices, num):
    question_cols = [col for col in data.columns if question in col]
    new_series = pd.Series(dtype=float)

    for i, col in enumerate(question_cols):
        tmp_series = pd.Series(data={choices[i]: data[col].mean()})
        new_series = pd.concat([new_series, tmp_series])

    plt.figure(figsize=(12, 8))
    ax1 = new_series.sort_values(ascending=True).plot(
        kind="barh", color=generate_colors(new_series)[::-1]
    )

    ax1.set_title(f"Question {num + 1}: {question}", fontsize=14)
    ax1.set_xlim(1, 5)
    ax1.set_xticks([])
    ax1.set_xlabel("")
    ax1.set_ylabel("Strongly Disagree", fontsize=18)
    ax1.spines["top"].set_visible(False)
    ax1.spines["bottom"].set_visible(False)

    ax2 = ax1.twinx()
    ax2.set_yticks([])
    ax2.set_ylabel("Strongly Agree", fontsize=18)
    ax2.spines["top"].set_visible(False)
    ax2.spines["bottom"].set_visible(False)

    plt.show();

In [None]:
# Set filepaths
ABSOLUTE_HERE = Path(os.getcwd()).parent
processed_data_dir = ABSOLUTE_HERE.parent.joinpath("data/processed")
data_filepath = processed_data_dir.joinpath("survey-responses.csv")
questions_filepath = processed_data_dir.joinpath("questions_metadata.json")

In [None]:
# Load datasets
with open(questions_filepath) as stream:
    question_metadata = json.load(stream)

data = pd.read_csv(data_filepath, index_col="Voter")

In [None]:
for i, qmd in enumerate(question_metadata):
    question = qmd["question"]
    choices = qmd["choices"]
    choices = [
        "Other" if "Other - please share in the document" in choice else choice
        for choice in choices
    ]

    if qmd["type"] == "choices":
        plot_choices_question(data, question, choices, i)
    elif qmd["type"] == "scales":
        plot_scales_question(data, question, choices, i)

    print("\n" * 4)

In [None]:
# Question 4 - ranking
question = question_metadata[3]["question"]
choices = question_metadata[3]["choices"]
choices = [
    "Other" if "please share in the document" in choice else choice
    for choice in choices
]

df = pd.DataFrame({})

question_cols = [col for col in data.columns if question in col]

for i, col in enumerate(question_cols):
    counts = data[col].value_counts()
    tmp_df = pd.DataFrame({choices[i]: counts.values.tolist()}, index=counts.index)
    df = pd.concat([df, tmp_df]).fillna(0).astype(int)

df = df.groupby(level=0).sum()

for choice in choices:
    if choice not in df.columns:
        df[choice] = np.zeros(len(df)).astype(int)
# df

In [None]:
for i, qmd in enumerate(question_metadata):
    print(i + 1, qmd["type"])