In [24]:
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import re
from scipy.stats import mannwhitneyu

plt.rc('font', size = 9)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=1
plt.rcParams['savefig.dpi'] = 600
sns.set_style("ticks")

In [2]:
df = pd.read_csv('/home/ngrav/project/wearables/results/train_v44.csv')
# add specifics of aug exp
df['aug_exp'] = [re.findall('l1l2(.*)_', s)[0] for s in df['exp']]

# ignore IT experiments
dt = df.loc[[True if 'cnn' in i else False for i in df['exp']], :]
dt['exp_trial'] = dt['exp'].astype(str) + '_' + dt['trial'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dt['exp_trial'] = dt['exp'].astype(str) + '_' + dt['trial'].astype(str)


In [3]:
# import evals from this
dfeval = pd.read_csv('/home/ngrav/project/wearables/results/eval_test_v44.csv')
dfeval.head()

Unnamed: 0.1,Unnamed: 0,exp_trial,y,yhat,loss,mae,rho,P_rho
0,0.0,cnnl1l2randaug_GA_1,"tensor([ 8., 34., 31., 31., 21., 6., 7., 14....","tensor([19.7984, 33.9053, 24.1894, 15.0190, 16...",100.6509780883789,8.12842845916748,0.3167976978128986,1.9596088166503975e-18
1,,exp_trial,y,yhat,loss,mae,rho,P_rho
2,0.0,cnnl1l2allaug_GA_1,"tensor([23., 33., 24., 11., 25., 8., 24., 9....","tensor([15.5564, 14.9340, 16.3986, 13.9485, 18...",109.62893676757812,8.425660133361816,0.3667706049034611,5.425507380905659e-26
3,,exp_trial,y,yhat,loss,mae,rho,P_rho
4,0.0,cnnl1l2randaug_GA_2,"tensor([34., 10., 36., 15., 10., 33., 23., 25....","tensor([21.0144, 16.8113, 20.8990, 11.3423, 13...",68.39281463623047,7.860438346862793,0.35554638257194554,7.538283269767302e-23


In [5]:
dt = dt.merge(dfeval, left_on='exp_trial', right_on='exp_trial')

In [11]:
def p_encoder(p):
    if p > 0.05:
        label = '' # n.s.
    elif p <= 0.001:
        label = '***'
    elif p <= 0.05 and p > 0.01:
        label = '*'
    elif p <= 0.01 and p > 0.001:
        label = '**'
    else: 
        label = 'Unclassified'
    return label


In [20]:
dt.loc[dt['aug_exp']==exp, 'mae'].astype(np.float32)

0    8.128428
2    7.860438
4    8.114739
5    7.747314
7    8.231212
8    7.893942
Name: mae, dtype: float32

In [27]:
exps = ['randaug', 'allaug', 'randaugperepoch', 'allaugperepoch']
df_res = pd.DataFrame(index=exps, columns=['MAE', 'Rho', 'maediffVrandaugperepoch'])
for exp in exps:
    df_res.loc[exp, 'MAE'] = '{:.2f} ({:.2f})'.format(np.mean(dt.loc[dt['aug_exp']==exp, 'mae'].astype(np.float32)), 
                                       np.std(dt.loc[dt['aug_exp']==exp, 'mae'].astype(np.float32)))
    df_res.loc[exp, 'Rho'] = '{:.2f}{} ({:.2f})'.format(np.mean(dt.loc[dt['aug_exp']==exp, 'rho'].astype(np.float32)),
                                                        p_encoder(np.mean(dt.loc[dt['aug_exp']==exp, 'P_rho'].astype(np.float32))),
                                       np.std(dt.loc[dt['aug_exp']==exp, 'rho'].astype(np.float32)))
    stat, p = mannwhitneyu(dt.loc[dt['aug_exp']==exp, 'mae'].astype(np.float32), dt.loc[dt['aug_exp']=='randaugperepoch', 'mae'].astype(np.float32))
    df_res.loc[exp, 'maediffVrandaugperepoch'] = '{:.2e}{}'.format(p, p_encoder(p))
    

In [29]:
df_res.to_csv('/home/ngrav/project/wearables/results/eval_summary_aug_exps.csv')

In [10]:
dt['aug_exp'].unique()

array(['randaug', 'allaug', 'randaugperepoch', 'allaugperepoch'],
      dtype=object)