# How many responses per contest?

This notebook will analyze the number of responses/captions per contest, both treating all contest equally and throughout time.

In [None]:
import os
import caption_contest_data as ccd

In [None]:
contests = ccd.summary_ids()
print(contests)

In [None]:
len(contests)

In [None]:
summaries = [ccd.summary(c) for c in contests]

In [None]:
import pandas as pd
summary = pd.concat(summaries, sort=False)

In [None]:
summary.head()

In [None]:
import numpy as np
counts = summary.pivot_table(index="contest", values="count", aggfunc=np.sum)
nunique = summary.pivot_table(index="contest", values="caption", aggfunc=lambda x: x.nunique())
df = pd.merge(counts, nunique, left_index=True, right_index=True)
df["n_unique"] = df["caption"]
df.drop(columns="caption", inplace=True)
df.head()

In [None]:
df.index.unique()

In [None]:
"{} million human responses".format(df["count"].sum() / 1e6)

In [None]:
"{} captions".format(df["n_unique"].sum())

In [None]:
"{} contests".format(len(df))

In [None]:
responses = df["count"] / 1e6
ax = responses.plot.hist(bins=20)
ax.set_xlabel("Responses (millions)")

In [None]:
responses = df["n_unique"]
ax = responses.plot.hist(bins=20)
ax.set_xlabel("Unique captions")

In [None]:
ax = df.reset_index().plot(x="contest", y="count")
ax.set_yscale("log")
ax.grid()
ax.grid(which="minor", alpha=0.4)
ax.set_ylabel("Number of responses")

In [None]:
ax = df.reset_index().plot(x="contest", y="n_unique")
ax.set_ylabel("Number of captions")