In [2]:
! pip install --quiet pymongo[srv,tls] 

In [31]:
from pymongo import MongoClient
from dateutil.parser import parse
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
from scipy.stats import mannwhitneyu, f_oneway
from statsmodels.iolib.summary2 import _df_to_simpletable
import os

In [2]:
client = MongoClient(os.environ.get('MONGO_HOST'), 
            username = os.environ.get('MONGO_USER'), 
                     password = os.environ.get('MONGO_PASS'))
coll = client['mab-survey'].trial

In [3]:
def make_df(l):
    played_again = [True if len(r['boxes']) > 2 else False for r in l]
    t = [i['treatment'] for i in l]
    r = [int(i['responses']['q1']['prob']) for i in l]

    df = pd.DataFrame({
        'played_again': played_again,
        'treatment': t,
        'response': r
    })
    return df

In [4]:
l1 = list(coll.find({ 'version': {'$in': ['0.2', '0.3', '0.1']}}))
l3 = list(coll.find({ 'version': {'$in': ['0.31']}}))
df1 = make_df(l1)
df3 = make_df(l3)

In [5]:
df1.shape[0], df3.shape[0]

(53, 30)

In [6]:
df1.groupby('treatment').median()

           played_again  response
treatment                        
a                  True        50
b                  True        28

In [20]:
f_oneway(df1[df1.treatment == 'a'].response, df1[df1.treatment == 'b'].response)

F_onewayResult(statistic=1.3321315302585928, pvalue=0.25380630237600216)

In [19]:
mannwhitneyu(df1[df1.treatment == 'a'].response, df1[df1.treatment == 'b'].response)

MannwhitneyuResult(statistic=277.0, pvalue=0.10357571101236501)

In [37]:
_ = (df3
     .assign(treatment = df3.treatment.map(lambda t: 'Group B' if t == 'a' else 'Group A'))
     .rename(columns = {'response': 'Estimated Probability of Winning', 'played_again': 'Played 2nd round'}))

_ = _.groupby('treatment').agg({ 'response': 'median', 'played_again': 'sum'})
_df_to_simpletable(_, float_format='%.2f')

<class 'statsmodels.iolib.table.SimpleTable'>

In [25]:
df3.groupby('treatment').sum()[['played_again']]

           played_again
treatment              
a                   9.0
b                   9.0

In [None]:
df3.groupby('treatment').median()

In [14]:
mannwhitneyu(df3[df3.treatment == 'a'].response, df3[df3.treatment == 'b'].response)

MannwhitneyuResult(statistic=103.5, pvalue=0.36146675912460824)