# How many responses per contest?

This notebook will analyze

1. the number of responses recorded in the CSVs
2. the number of responses recorded in `info.png`.

In [None]:
import os
import caption_contest_data as ccd

In [None]:
contests = ccd.summary_ids()
print(contests)

In [None]:
len(contests)

In [None]:
summaries = [ccd.summary(c) for c in contests]

In [None]:
import pandas as pd
summary = pd.concat(summaries, sort=False)

In [None]:
summary.head()

In [None]:
import numpy as np
df = summary.pivot_table(index="contest", values="count", aggfunc=np.sum)
df.head()

In [None]:
if "count" in df.columns:
    df["responses"] = df["count"]
    del df["count"]
df = df.sort_values(by="contest")

In [None]:
show = df.iloc[-10:].copy()
show["responses"] /= 1e6

In [None]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
show.plot.bar(ax=ax)
ax.set_ylabel('responses (millions)')
ax.legend_.remove()

In [None]:
w = 4
fig, ax = plt.subplots(figsize=(2 * w, 5*w))
df.plot.barh(ax=ax)
ax.grid(alpha=1)
def func(value, _):
    return "{:0.1f}".format(value / 1e6)
ax.xaxis.set_major_formatter(plt.FuncFormatter(func))
ax.set_xlabel(r"Responses ($\times 10^6$)")
ax.xaxis.set_major_locator(plt.MultipleLocator(0.2e6))