# Result analysis
Let's see if someone actually learned AI-speak!

In [1]:
import os
import json
from types import SimpleNamespace
import numpy as np
import scipy.stats
import pandas as pd

In [2]:
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [3]:
files = [file for file in os.listdir('./') if file.endswith('.json')]
results = [json.load(open(file), object_hook=lambda d: SimpleNamespace(**d)) for file in files]

In [4]:
correctness = np.array([[question.correct for question in result] for result in results])
correctness.shape

(13, 64)

In [5]:
correctness.mean()

0.27884615384615385

In [6]:
correct_count = np.cumsum(correctness, 1)
correct_count.shape

(13, 64)

In [7]:
baseline = 0.25

In [8]:
question_numbers = np.arange(1, correctness.shape[1] + 1)
go.Figure(layout=dict(
    title='Mean cumulative score',
    xaxis_title='Question number',
    yaxis_title='Mean number of correct answers up to given question',
    template='plotly_white'
)).add_trace(go.Scatter(
    name='Cumulative score',
    x=question_numbers,
    y=correct_count.mean(0)
)).add_trace(go.Scatter(
    name='Random guessing',
    x=question_numbers,
    y=question_numbers * baseline
))

In [9]:
accuracy = correctness.mean(0)
smooth_accuracy = pd.Series(accuracy, index=question_numbers).rolling(12, center=True).mean()
go.Figure(layout=dict(
    title='Mean accuracy',
    xaxis_title='Question number',
    yaxis_title='Mean accuracy',
    template='plotly_white'
)).add_trace(go.Scatter(
    name='Accuracy',
    x=question_numbers,
    y=accuracy,
    marker_color='mediumpurple',
    opacity=.25
)).add_trace(go.Scatter(
    name='Smoothed accuracy',
    x=question_numbers,
    y=smooth_accuracy,
    marker_color='mediumpurple'
)).add_trace(go.Scatter(
    name='Random guessing',
    x=question_numbers,
    y=np.full(question_numbers.shape, baseline)
))

In [10]:
def p_value(num_trials, succes_probability, num_successes):
    binomial = scipy.stats.binom(num_trials, succes_probability)
    return 1 - binomial.cdf(num_successes - 1)

print(f'grand total p-value: {p_value(np.prod(correctness.shape), 0.25, np.sum(correctness))}')
print(f'p-value in last 32 questions: {p_value(np.prod(correctness[:, -32:].shape), 0.25, np.sum(correctness[:, -32:]))}')

grand total p-value: 0.03104648685678424
p-value in last 32 questions: 0.032387603372705964


In [11]:
go.Figure(layout=dict(
    title='P-value vs question number',
    xaxis_title='Question number',
    yaxis_title='P-value over all participants',
    template='plotly_white'
)).add_trace(go.Scatter(
    x=question_numbers,
    y=[p_value(len(correctness) * (idx + 1), 0.25, count) for idx, count in enumerate(correct_count.sum(0))]
))