In [7]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl 
from IPython.display import display
import os 
import json
import importlib
import plot_questions

['Noteworthy', 'cmsy10', 'Khmer MN', 'Noto Sans Tai Viet', 'Khmer Sangam MN', 'Mukta Mahee', 'InaiMathi', 'DejaVu Serif Display', 'Noto Sans Mahajani', 'American Typewriter', 'Noto Sans Yi', 'Gujarati MT', 'STIXVariants', 'Oriya Sangam MN', 'Noto Sans Duployan', 'Party LET', 'Gurmukhi MN', 'Kohinoor Bangla', 'Noto Sans Old Italic', 'Noto Sans Kannada', 'Noto Sans Syriac', 'Athelas', 'Noto Sans Modi', 'Apple Symbols', 'Noto Sans Manichaean', 'SignPainter', 'Muna', 'Helvetica Neue', 'Charter', 'Lao MN', 'Noto Sans Hanunoo', 'Bodoni 72 Oldstyle', 'Noto Sans Coptic', 'Noto Sans Avestan', 'Kokonor', 'Noto Sans Takri', 'Noto Sans Khojki', 'Kohinoor Telugu', 'Noto Sans Psalter Pahlavi', 'Noto Sans Wancho', 'Mshtakan', 'STIXSizeThreeSym', 'Palatino', 'Malayalam Sangam MN', 'Futura', 'Malayalam MN', 'Marion', 'Noto Sans Bassa Vah', '.SF NS Rounded', 'Noto Sans Javanese', 'Noto Sans Syloti Nagri', 'Rockwell', 'STIXIntegralsUp', 'Courier New', 'Nadeem', 'Kohinoor Devanagari', 'cmss10', 'Noto Sans

# Load the responses 
We encoded all responses in a JSON format which we load here first

In [6]:
# Load responses from json


survey_set = 'combined'

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

with open(f'./data/results_{survey_set}.json', 'r') as fp:
    results =  json.loads(fp.read())

with open(f'./data/results_percentage_{survey_set}.json', 'r') as fp:
    results_percentages =  json.loads(fp.read())

In [None]:
# Latex adaption for the plot 
figsize = (2.24, 1.75) # For 3 plots in a row
# figsize = (1.88, 1.5) # For 4 plots in a row
# figsize = (1.68, 1.5) # For 4 plots in a row

# Find the fonts 
font_names = mpl.font_manager.get_font_names()
print(font_names)

rc_dict = {
        'figure.figsize': figsize, # For 3 plots in a row
        'font.size': 8,
        'grid.linewidth': 0.8,
        'grid.alpha': 0.7,
        'font.size': 8,
        'font.family': 'serif',
        'font.serif': 'Times',
    }


font = {'family' : 'serif', 'weight': 'normal', 'size': 8}

In [None]:
importlib.reload(plot_questions)
# Generate plots for demographic questions
all_questions = results.keys()

# demographic_questions = ["Please select the age group that matches your age. ", "Please select your gender. ", "Please select in which region you are located"]
# stalking_related_questions = ["Which gender did your stalker have?", "How long did the stalking continue?", "How often have you been pursued?", "Have you been a victim to ongoing harassment?", "How is your current relation to the stalker?", "What do you think has been the motivation of the stalker?", "What methods of stalking have been used?", "How has stalking impacted your life?"]
# questions = demographic_questions
# questions += stalking_related_questions
# questions  += ["Where was the tracking device located?"]

questions = all_questions


for question in questions:
    answers = results_percentages[question]
    # Convert to dataframe 
    df_question = pd.DataFrame(columns=["answers", "count"])
    df_question["answers"] = answers.keys()
    df_question["count"] = answers.values()
    if question == "Please select the age group that matches your age. ":
        # Order by age 
        df_question = df_question.sort_values(by=['answers'])
    else: 
        # Order by count 
        df_question = df_question.sort_values(by=['count'], ascending=False)
    
    if len(answers.keys()) <= 20 and len(answers.keys()) > 1:
        print(f"Trying to plot {question}")
        plot_questions.plot_choice_question_horizontal(df_question, question, survey_set=survey_set)

In [None]:
def barplot(df_plot:pd.DataFrame, ax:plt.Axes, title: str, x="Percentage", y="Tracker", bar_label_key="Count"):
    ax = sns.barplot(y=y, x=x, data=df_plot, orient="h", ax=ax)

    # # Set the title and labels
    ax.set_xlabel(None)
    ax.set_ylabel(None)
    if x == "Percentage":
        ax.xaxis.set_major_formatter(mpl.ticker.PercentFormatter())
    ax.set_ylabel(None)
    ax.set_title(title, pad=4)
    count_list = df_plot[x].values.tolist()
    max_x = df_plot[x].max()
    # print(f"Containers {ax.containers[0]}")
    # print(f"Bar labels: {labels}")
    # Format as percentage with only two decimals

    # labels = [f"{label:.2f}%" for label in count_list]
    if bar_label_key == "Count":
        labels =  [f"{x:,.0f}" for x in  df_plot[bar_label_key].tolist()]
    elif bar_label_key == "Percentage":
        labels = [f"{x:.2f}%" for x in df_plot[bar_label_key].tolist()]

    ax.bar_label(ax.containers[0], labels, fontsize=7, padding=1.2)
    # Set a higher y limit
    xlim = ax.get_xlim()
    ax.set_xlim(xlim[0], (max_x + 2 + max_x * 0.15))

In [None]:

overall_responses = 5254
other_responses = 1684

df_tracking_actors_given = pd.read_csv("./data/Tracking_actors_given.csv")
df_tracking_actors_given["Percentage"] = (df_tracking_actors_given["Count"] / overall_responses) * 100

df_tracking_actors_other = pd.read_csv("./data/Tracking_actors_coded.csv")
df_tracking_actors_other["Percentage"] = (df_tracking_actors_other["Count"] / other_responses) * 100
# Sort by count 
df_tracking_actors_given = df_tracking_actors_given.sort_values(by="Count", ascending=False)
df_tracking_actors_other = df_tracking_actors_other.sort_values(by="Count", ascending=False)

latex_width = 241
fig_size = set_size(latex_width)
no_of_answers = 6
width = fig_size[0]
height_0 = min(fig_size[1], 0.25 + 0.2 * 6)
fig, axs = plt.subplots(2, 1, figsize=(width, height_0 * 2), sharex=True, sharey="row", height_ratios=[1, 0.9])

rc_dict["figure.figsize"] = fig_size
# Set the style for the plot
sns.set(style="whitegrid", font="Times", font_scale=0.8, rc=rc_dict)
sns.set_palette(color_palette)

plt.subplots_adjust(hspace=0.12)

barplot(df_tracking_actors_given, axs[0], "Given responses", y="Response", x="Percentage")
barplot(df_tracking_actors_other, axs[1], "Other", y="Response", x="Percentage")

name = "tracking_actors.pdf"
out_dir = "./plots"
path = os.path.join(out_dir, name)
print(f"Saving figure to {path}")
plt.tight_layout()
plt.savefig(path,bbox_inches='tight', pad_inches=0.01)
plt.show()
plt.close()