# Analysing the February 2020 mybinder.org user survey responses

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

%matplotlib inline

In [None]:
# Load in the data
resps = pd.read_csv(
    "data/mybinder.org-user-survey-feb-2020.csv",
    header=0,
    names=["Timestamp", "Q1", "Q2", "Q3"],
)

# Get questions for plot titles
with open("data/mybinder.org-user-survey-feb-2020.csv", "r") as f:
    titles = f.readline().strip("\n").split('"')

titles = list(
    filter(lambda a: a != "" and a != "," and a != "Timestamp", titles)
)

# Calculate total number of responses
total_resps = len(resps)
print(f"Total number of responses to the survey: {total_resps}")

## Q1. Would you recommend mybinder.org to a friend?

In [None]:
# Count the Yes/No/Maybe responses for Question 1
bar_plot_dict = dict(Counter(resps["Q1"]))

# Remove NAN values
try:
    del bar_plot_dict[np.nan]
except KeyError:
    pass

# Create a bar plot of results
bar_plot_df = pd.Series(bar_plot_dict)
bar_plot_df.sort_values(ascending=False, inplace=True)
bar_plot_df.plot(kind="bar", title=titles[0]);

## Q3. What do you (mainly) use mybinder.org for?

In [None]:
"""
Categories available for Question 3:

- Reproducible publishing
- Pre-university teaching
- University teaching
- Workshops/training courses
- Demos and talks
- Documentation and examples
- Sharing and collaborating with a team
- Other
"""
# Count categorical responses to Question 3
raw_dict = dict(Counter(resps["Q3"]))

# Remove NAN values
try:
    del raw_dict[np.nan]
except KeyError:
    pass

# Filter out the defined categories
categories = [key for key, value in raw_dict.items() if value >= 10]

# Construct a dictionary with non-specified answers concatenated into "Other"
concat_dict = {"Other": 0}
for key, value in raw_dict.items():
    if key in categories:
        concat_dict[key] = value
    else:
        concat_dict["Other"] += 1

# Create pie plot of results
concat_df = pd.Series(concat_dict)
concat_df.sort_values(ascending=False, inplace=True)
concat_df.plot(kind="pie", title=titles[2])
plt.ylabel("");