In [1]:
%load_ext autoreload
%autoreload 2

In [19]:
import sys
sys.path.append('../../src/generic')
import csv
import os
import numpy as np
import pandas as pd
import seaborn as sns
sns.set_theme()
from sklearn.metrics import log_loss

In [3]:
from results.process_results import ResultProcessor

In [4]:
main_result_dir = "/data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf"

## Naive Baselines

In [5]:
data_dir = "/data/ddmg/redditlanguagemodeling/data/AmazonReviews/data"

In [6]:
data_df = pd.read_csv(os.path.join(data_dir, 'amazon_v2.0/reviews.csv'),
                      dtype={'reviewerID':str, 'asin':str, 'reviewTime':str,'unixReviewTime':int,
                             'reviewText':str,'summary':str,'verified':bool,'category':str, 'reviewYear':int},
                      keep_default_na=False, na_values=[], quoting=csv.QUOTE_NONNUMERIC)

In [7]:
split_df = pd.read_csv(os.path.join(data_dir, 'amazon_v2.0', 'splits', 'my_user_split.csv'))

In [8]:
# get select people
select_file = "/data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500/selected_people.txt"
with open(select_file, 'r') as f:
    people = f.read().splitlines()

In [9]:
data_df["split"] = split_df["split"]

In [10]:
select_df = data_df[data_df["reviewerID"].isin(people)]
len(select_df)

140326

In [11]:
test_df = select_df[split_df["split"] == 2]

  test_df = select_df[split_df["split"] == 2]


In [12]:
train_df = select_df[split_df["split"] == 0]

  train_df = select_df[split_df["split"] == 0]


In [13]:
def count_1(x):
    return sum(x == 1)

count_1.__name__ = "count_1"

def count_2(x):
    return sum(x == 2)

count_2.__name__ = "count_2"

def count_3(x):
    return sum(x == 3)

count_3.__name__ = "count_3"

def count_4(x):
    return sum(x == 4)

count_4.__name__ = "count_4"

def count_5(x):
    return sum(x == 5)

count_5.__name__ = "count_5"

In [14]:
count_fns = [count_1, count_2, count_3, count_4, count_5]

In [16]:
train_dist_by_user = train_df[["reviewerID", "overall"]].groupby(["reviewerID"]).agg(count_fns)
train_dist_by_user

Unnamed: 0_level_0,overall,overall,overall,overall,overall
Unnamed: 0_level_1,count_1,count_2,count_3,count_4,count_5
reviewerID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
A101S5PLO0VRHQ,5.0,29.0,20.0,24.0,25.0
A10E0V7PGY34UZ,0.0,0.0,0.0,4.0,41.0
A10O7THJ2O20AG,1.0,0.0,1.0,0.0,43.0
A11P853U6FIKAM,0.0,5.0,18.0,61.0,82.0
A12O5K3EQ4MC7Z,1.0,2.0,24.0,61.0,61.0
...,...,...,...,...,...
AYT4FJYVCHYLE,0.0,2.0,9.0,24.0,10.0
AYVW3O6W8S5S4,0.0,0.0,8.0,23.0,14.0
AZD488SA9QMYF,0.0,0.0,0.0,3.0,42.0
AZJ4DFLH9O4FZ,0.0,0.0,3.0,15.0,27.0


In [15]:
test_dist_by_user = test_df[["reviewerID", "overall"]].groupby(["reviewerID"]).agg(count_fns)
test_dist_by_user

Unnamed: 0_level_0,overall,overall,overall,overall,overall
Unnamed: 0_level_1,count_1,count_2,count_3,count_4,count_5
reviewerID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
A101S5PLO0VRHQ,2.0,11.0,11.0,5.0,6.0
A10E0V7PGY34UZ,0.0,0.0,0.0,1.0,14.0
A10O7THJ2O20AG,1.0,0.0,1.0,1.0,12.0
A11P853U6FIKAM,1.0,0.0,10.0,22.0,24.0
A12O5K3EQ4MC7Z,0.0,0.0,10.0,21.0,20.0
...,...,...,...,...,...
AYT4FJYVCHYLE,0.0,1.0,2.0,5.0,7.0
AYVW3O6W8S5S4,0.0,1.0,2.0,8.0,4.0
AZD488SA9QMYF,0.0,0.0,0.0,1.0,14.0
AZJ4DFLH9O4FZ,0.0,0.0,4.0,6.0,5.0


In [17]:
test_score_df = test_df[["reviewerID", "overall"]]

### Predict Test Distr. Baseline

In [127]:
# for each user predict test probs
users = []
perfs = []
for user, row in test_dist_by_user.iterrows():
    user_gt = test_df[test_df["reviewerID"] == user]["overall"].values
    score_counts = np.array([row["overall", "count_{}".format(i + 1)] for i in range(5)])
    norm_score_counts = np.expand_dims(score_counts / sum(score_counts), axis=0)
    preds = np.repeat(norm_score_counts, len(user_gt), axis=0)
    loss = log_loss(user_gt, preds, labels=np.arange(5) + 1)
    users.append(user)
    perfs.append(loss)

In [128]:
test_prob_df = pd.DataFrame({"reviewerID": users, "perf": perfs})

In [129]:
test_prob_df

Unnamed: 0,reviewerID,perf
0,A101S5PLO0VRHQ,1.471413
1,A10E0V7PGY34UZ,0.244930
2,A10O7THJ2O20AG,0.720125
3,A11P853U6FIKAM,1.107927
4,A12O5K3EQ4MC7Z,1.051915
...,...,...
495,AYT4FJYVCHYLE,1.171060
496,AYVW3O6W8S5S4,1.136917
497,AZD488SA9QMYF,0.244930
498,AZJ4DFLH9O4FZ,1.085189


In [130]:
print(test_prob_df.mean())
print(test_prob_df.std())
print(test_prob_df.quantile(q=[.2, .4, .6, .8, 1]))

perf    0.790699
dtype: float64
perf    0.388399
dtype: float64
         perf
0.2  0.481677
0.4  0.696330
0.6  0.927617
0.8  1.136917
1.0  1.586785


### Predict Train Distr. Baseline

In [131]:
users2 = []
perfs2 = []
for user, row in train_dist_by_user.iterrows():
    user_gt = test_df[test_df["reviewerID"] == user]["overall"].values
    score_counts = np.array([row["overall", "count_{}".format(i + 1)] for i in range(5)])
    norm_score_counts = np.expand_dims(score_counts / sum(score_counts), axis=0)
    preds = np.repeat(norm_score_counts, len(user_gt), axis=0)
    loss = log_loss(user_gt, preds, labels=np.arange(5) + 1)
    users2.append(user)
    perfs2.append(loss)

In [132]:
train_prob_df = pd.DataFrame({"reviewerID": users2, "perf": perfs2})

In [133]:
train_prob_df

Unnamed: 0,reviewerID,perf
0,A101S5PLO0VRHQ,1.537137
1,A10E0V7PGY34UZ,0.248242
2,A10O7THJ2O20AG,2.846510
3,A11P853U6FIKAM,1.679051
4,A12O5K3EQ4MC7Z,1.075978
...,...,...
495,AYT4FJYVCHYLE,1.333598
496,AYVW3O6W8S5S4,3.202199
497,AZD488SA9QMYF,0.244930
498,AZJ4DFLH9O4FZ,1.331867


In [134]:
print(train_prob_df.mean())
print(train_prob_df.std())
print(train_prob_df.quantile(q=[.2, .4, .6, .8, 1]))

perf    1.156198
dtype: float64
perf    0.966114
dtype: float64
         perf
0.2  0.527940
0.4  0.840569
0.6  1.094182
0.8  1.468325
1.0  7.833139


### Train Majority Class Baseline

In [137]:
user_maj_cls_train = train_df.groupby(["reviewerID"])[["overall"]].agg(lambda x: x.value_counts().index[0])
# predict that for test data
users = []
perfs = []
for user, row in user_maj_cls_train.iterrows():
    user_gt = test_df[test_df["reviewerID"] == user]["overall"].values
    pred_score = row["overall"]
    acc = sum(user_gt == pred_score) / len(user_gt)
    users.append(user)
    perfs.append(acc)
train_mc_df = pd.DataFrame({"reviewerID": users, "perf": perfs})
train_mc_df

Unnamed: 0,reviewerID,perf
0,A101S5PLO0VRHQ,0.314286
1,A10E0V7PGY34UZ,0.933333
2,A10O7THJ2O20AG,0.800000
3,A11P853U6FIKAM,0.421053
4,A12O5K3EQ4MC7Z,0.392157
...,...,...
495,AYT4FJYVCHYLE,0.333333
496,AYVW3O6W8S5S4,0.533333
497,AZD488SA9QMYF,0.933333
498,AZJ4DFLH9O4FZ,0.333333


In [138]:
print(train_mc_df.mean())
print(train_mc_df.std())
print(train_mc_df.quantile(q=[.2, .4, .6, .8, 1]))

perf    0.629733
dtype: float64
perf    0.219778
dtype: float64
         perf
0.2  0.411132
0.4  0.545455
0.6  0.679622
0.8  0.866667
1.0  1.000000


### Test Majority Class Baseline

In [139]:
test_mc_df = test_df.groupby(["reviewerID"])[["overall"]].agg(lambda x: x.value_counts(normalize=True).iloc[0])

In [140]:
print(test_mc_df.mean())
print(test_mc_df.std())
print(test_mc_df.quantile(q=[.2, .4, .6, .8, 1]))

overall    0.650542
dtype: float64
overall    0.195295
dtype: float64
      overall
0.2  0.466667
0.4  0.577671
0.6  0.686352
0.8  0.866667
1.0  1.000000


### Global Model

In [28]:
base_result_dir = os.path.join(main_result_dir, "from_embeds", "eval_global_model_early_stopping_across_users")
levels = ["train_seed", "eval_seed"]
global_results = ResultProcessor(base_result_dir, levels, verbose=True)

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/eval_global_model_early_stopping_across_users: Found results for 3 train_seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/eval_global_model_early_stopping_across_users/43: Found results for 1 eval_seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/eval_global_model_early_stopping_across_users/44: Found results for 1 eval_seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/eval_global_model_early_stopping_across_users/42: Found results for 1 eval_seeds


In [29]:
# collect results by user
all_results = []
for col in global_results.results_df.columns:
    results = global_results.results_df[col].values
    if col[0] == 'A':
        for val, seed in zip(results, [43, 44, 42]):
            result_dict = {}
            result_dict["user"] = col.split("_")[0]
            result_dict["result"] = "_".join(col.split("_")[1:])
            result_dict["val"] = val
            result_dict["seed"] = seed
            all_results.append(result_dict)

In [30]:
global_df = pd.DataFrame(all_results)
global_df

Unnamed: 0,user,result,val,seed
0,A101S5PLO0VRHQ,0_accuracy,0.000000,43
1,A101S5PLO0VRHQ,0_accuracy,0.000000,44
2,A101S5PLO0VRHQ,0_accuracy,0.000000,42
3,A101S5PLO0VRHQ,1_accuracy,0.272727,43
4,A101S5PLO0VRHQ,1_accuracy,0.272727,44
...,...,...,...,...
14053,A39IY0JU5JI69G,0_accuracy,,44
14054,A39IY0JU5JI69G,0_accuracy,,42
14055,A3DFMKBGQT9QIH,1_accuracy,,43
14056,A3DFMKBGQT9QIH,1_accuracy,,44


In [75]:
global_df2 = global_df.groupby(["user", "result"]).agg(["mean", "std"])["val"].reset_index()
global_df2

Unnamed: 0,user,result,mean,std
0,A101S5PLO0VRHQ,0_accuracy,0.000000,0.000000
1,A101S5PLO0VRHQ,1_accuracy,0.303030,0.052486
2,A101S5PLO0VRHQ,2_accuracy,0.757576,0.052486
3,A101S5PLO0VRHQ,3_accuracy,0.800000,0.000000
4,A101S5PLO0VRHQ,4_accuracy,0.166667,0.000000
...,...,...,...,...
4681,AZZV9PDNMCOZW,loss,1.092888,0.043024
4682,AZZV9PDNMCOZW,runtime,0.014167,0.000404
4683,AZZV9PDNMCOZW,samples,15.000000,0.000000
4684,AZZV9PDNMCOZW,samples_per_second,1060.722667,29.035503


In [76]:
global_df2 = global_df2.pivot(index='user', columns='result', values=['mean', 'std'])
global_df2

Unnamed: 0_level_0,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,...,std,std,std,std,std,std,std,std,std,std
result,0_accuracy,1_accuracy,2_accuracy,3_accuracy,4_accuracy,accuracy,loss,runtime,samples,samples_per_second,...,1_accuracy,2_accuracy,3_accuracy,4_accuracy,accuracy,loss,runtime,samples,samples_per_second,steps_per_second
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
A101S5PLO0VRHQ,0.0,0.30303,0.757576,0.800000,0.166667,0.476190,1.120820,0.026733,35.0,1308.001333,...,0.052486,0.052486,0.000000,0.000000,0.016496,0.012142,0.000451,0.0,22.170650,1.266893
A10E0V7PGY34UZ,,,,0.000000,1.000000,0.933333,0.384862,0.013967,15.0,1074.945000,...,,,0.000000,0.000000,0.000000,0.028398,0.000153,0.0,14.577506,0.971937
A10O7THJ2O20AG,0.0,,0.666667,0.000000,0.666667,0.577778,1.242837,0.015267,15.0,998.874000,...,,0.577350,0.000000,0.000000,0.038490,0.030466,0.002479,0.0,149.499662,9.966971
A11P853U6FIKAM,0.0,,0.366667,0.227273,0.833333,0.502924,0.935463,0.036100,57.0,1579.611667,...,,0.057735,0.000000,0.000000,0.010129,0.009915,0.000265,0.0,12.468323,0.437578
A12O5K3EQ4MC7Z,,,0.600000,0.841270,0.716667,0.745098,0.725279,0.034367,51.0,1487.120667,...,,0.000000,0.027493,0.028868,0.000000,0.015350,0.001457,0.0,61.878556,2.426665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AYT4FJYVCHYLE,,0.00000,0.500000,0.600000,0.857143,0.666667,0.636464,0.014167,15.0,1059.953667,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.019655,0.000379,0.0,31.285691,2.085797
AYVW3O6W8S5S4,,0.00000,0.000000,0.125000,1.000000,0.333333,1.614175,0.013867,15.0,1082.509000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.035395,0.000306,0.0,20.844639,1.389373
AZD488SA9QMYF,,,,1.000000,0.928571,0.933333,0.262011,0.013833,15.0,1085.220667,...,,,0.000000,0.000000,0.000000,0.016293,0.000153,0.0,11.056036,0.737032
AZJ4DFLH9O4FZ,,,0.500000,0.722222,0.600000,0.622222,0.609930,0.014333,15.0,1047.548333,...,,0.000000,0.096225,0.000000,0.038490,0.028343,0.000666,0.0,46.899888,3.126892


In [77]:
global_df2 = global_df2.swaplevel(i=0, j=1, axis=1)
global_df2

result,0_accuracy,1_accuracy,2_accuracy,3_accuracy,4_accuracy,accuracy,loss,runtime,samples,samples_per_second,...,1_accuracy,2_accuracy,3_accuracy,4_accuracy,accuracy,loss,runtime,samples,samples_per_second,steps_per_second
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,...,std,std,std,std,std,std,std,std,std,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
A101S5PLO0VRHQ,0.0,0.30303,0.757576,0.800000,0.166667,0.476190,1.120820,0.026733,35.0,1308.001333,...,0.052486,0.052486,0.000000,0.000000,0.016496,0.012142,0.000451,0.0,22.170650,1.266893
A10E0V7PGY34UZ,,,,0.000000,1.000000,0.933333,0.384862,0.013967,15.0,1074.945000,...,,,0.000000,0.000000,0.000000,0.028398,0.000153,0.0,14.577506,0.971937
A10O7THJ2O20AG,0.0,,0.666667,0.000000,0.666667,0.577778,1.242837,0.015267,15.0,998.874000,...,,0.577350,0.000000,0.000000,0.038490,0.030466,0.002479,0.0,149.499662,9.966971
A11P853U6FIKAM,0.0,,0.366667,0.227273,0.833333,0.502924,0.935463,0.036100,57.0,1579.611667,...,,0.057735,0.000000,0.000000,0.010129,0.009915,0.000265,0.0,12.468323,0.437578
A12O5K3EQ4MC7Z,,,0.600000,0.841270,0.716667,0.745098,0.725279,0.034367,51.0,1487.120667,...,,0.000000,0.027493,0.028868,0.000000,0.015350,0.001457,0.0,61.878556,2.426665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AYT4FJYVCHYLE,,0.00000,0.500000,0.600000,0.857143,0.666667,0.636464,0.014167,15.0,1059.953667,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.019655,0.000379,0.0,31.285691,2.085797
AYVW3O6W8S5S4,,0.00000,0.000000,0.125000,1.000000,0.333333,1.614175,0.013867,15.0,1082.509000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.035395,0.000306,0.0,20.844639,1.389373
AZD488SA9QMYF,,,,1.000000,0.928571,0.933333,0.262011,0.013833,15.0,1085.220667,...,,,0.000000,0.000000,0.000000,0.016293,0.000153,0.0,11.056036,0.737032
AZJ4DFLH9O4FZ,,,0.500000,0.722222,0.600000,0.622222,0.609930,0.014333,15.0,1047.548333,...,,0.000000,0.096225,0.000000,0.038490,0.028343,0.000666,0.0,46.899888,3.126892


In [80]:
global_df2 = global_df2[['accuracy', 'loss']]
global_df2

result,accuracy,accuracy,loss,loss
Unnamed: 0_level_1,mean,std,mean,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A101S5PLO0VRHQ,0.476190,0.016496,1.120820,0.012142
A10E0V7PGY34UZ,0.933333,0.000000,0.384862,0.028398
A10O7THJ2O20AG,0.577778,0.038490,1.242837,0.030466
A11P853U6FIKAM,0.502924,0.010129,0.935463,0.009915
A12O5K3EQ4MC7Z,0.745098,0.000000,0.725279,0.015350
...,...,...,...,...
AYT4FJYVCHYLE,0.666667,0.000000,0.636464,0.019655
AYVW3O6W8S5S4,0.333333,0.000000,1.614175,0.035395
AZD488SA9QMYF,0.933333,0.000000,0.262011,0.016293
AZJ4DFLH9O4FZ,0.622222,0.038490,0.609930,0.028343


In [81]:
global_df2.mean()

result        
accuracy  mean    0.689322
          std     0.020408
loss      mean    0.722369
          std     0.018607
dtype: float64

In [82]:
global_df2.std()

result        
accuracy  mean    0.183303
          std     0.022786
loss      mean    0.365459
          std     0.012726
dtype: float64

In [83]:
global_df2.quantile(q=[.2, .4, .6, .8, 1])

result,accuracy,accuracy,loss,loss
Unnamed: 0_level_1,mean,std,mean,std
0.2,0.533333,0.0,0.414325,0.008161
0.4,0.644444,0.001293,0.595947,0.012964
0.6,0.737354,0.024054,0.774307,0.018287
0.8,0.866667,0.03849,0.996846,0.028578
1.0,1.0,0.138778,2.115849,0.082612


### Person-Specific Models

In [36]:
base_result_dir = os.path.join(main_result_dir, "from_embeds", "person_specific_my_split_n_500_early_stopping")
levels = ["user", "seed"]
ps_results = ResultProcessor(base_result_dir, levels, verbose=True)

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping: Found results for 500 users
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2VV2MISGXFY1O: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1XC010CS7NV8Z: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AP4FQR3BIIYEW: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A27HJP68CKWXP5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AKDF3N2SXY3SP: Found results for 3 s

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2N5Z2JKJCUPFN: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A13WOT3RSXKRD5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AYVW3O6W8S5S4: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1YFB1OF0XKJOD: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A37SZWL3R0LEQ3: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3QFDDODG2X2HL: Found r

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1ZOL59JA86C7Y: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1ZR6YCZ3HDDKI: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A6VXZ1EEPRTLV: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2NDDUI5L26D3Q: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3ANKK0ES1RRKJ: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3W2VH8BN66IIR: Found r

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3KMEQP8CSCPC2: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2V8X8MTCE868O: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2WDC81C1MQUAS: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2RQOO8VYAEZZG: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3DZT870KCFD1: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AJA8J1GZ35AOI: Found re

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2UM2ABAII4QTT: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1U0RS0JIDAHDM: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1GARI2JT6EAWA: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2YOFCOEKH3KB: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A25HRZR92Z74V5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1NZLRAZJGD99W: Found r

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3U10P2GT1GE1Y: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1LT13C3BKQ9CU: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3HUO6O1NQ5JPA: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AONUCJBC85BL: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1Z7I6TXMXFP3G: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AGARMSTYE4ZYE: Found res

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/ATC0DD938W4QM: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A21Q18JIMSWIFA: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3O5UR6NHR4MRP: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2IH37N7L0QFX3: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3B56DW0T1PWII: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2SQJPUCZNHMZE: Found r

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AWOV7K1S25VT7: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A37VPW84TDLVNA: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2HPAI1FOYRPVM: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2XRZV63X79YSJ: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A26KWG162U8VBQ: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/ANAYSRE3LX8GZ: Found re

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A35K5WZ8XQY599: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1ACIK34MC3R7W: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/APGJC13RI61U0: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A9XKEV93OF0W5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3IAIWZ3FYGYI7: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2AHXQYWRVSFLX: Found re

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A13QONG1RD57UW: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2H44WVZS59KKT: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A14VYB264WN1PM: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2I4SQOSEF7WH5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1FWGW8LGG2J37: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2XHH5DF8628J8: Found 

Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/AU45V4AMZVHFL: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1UAO54QQBULXD: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A1LZQPBALDJ0Y5: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A2ZJHEB8K8JZR3: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A3O22PG4MHDI4Y: Found results for 3 seeds
Base dir /data/ddmg/redditlanguagemodeling/results/amazon_reviews/clf/from_embeds/person_specific_my_split_n_500_early_stopping/A101S5PLO0VRHQ: Found r

In [37]:
# examine performance
user_results_ps = ps_results.results_df.groupby(["user"]).agg(["mean", "std"])[["test_loss", "test_accuracy"]]
user_results_ps

Unnamed: 0_level_0,test_loss,test_loss,test_accuracy,test_accuracy
Unnamed: 0_level_1,mean,std,mean,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A101S5PLO0VRHQ,1.057531,0.014976,0.571429,0.000000
A10E0V7PGY34UZ,0.240300,0.003728,0.933333,0.000000
A10O7THJ2O20AG,0.649746,0.005187,0.800000,0.000000
A11P853U6FIKAM,1.055255,0.004417,0.385965,0.000000
A12O5K3EQ4MC7Z,0.738925,0.007110,0.705882,0.019608
...,...,...,...,...
AYT4FJYVCHYLE,1.220608,0.017522,0.288889,0.076980
AYVW3O6W8S5S4,1.252717,0.033645,0.600000,0.000000
AZD488SA9QMYF,0.239735,0.006056,0.933333,0.000000
AZJ4DFLH9O4FZ,0.560942,0.018114,0.666667,0.000000


In [38]:
print(user_results_ps.mean())
print(user_results_ps.std())

test_loss      mean    0.684446
               std     0.011241
test_accuracy  mean    0.722472
               std     0.011299
dtype: float64
test_loss      mean    0.370047
               std     0.012054
test_accuracy  mean    0.180346
               std     0.021178
dtype: float64


In [156]:
user_results_ps.quantile(q=[0, .2, .4, .6, .8, 1])

Unnamed: 0_level_0,loss,loss,accuracy,accuracy
Unnamed: 0_level_1,mean,std,mean,std
0.0,0.006371,2.3e-05,0.2,0.0
0.2,0.376093,0.00321,0.555556,0.0
0.4,0.581101,0.006072,0.682803,0.0
0.6,0.761795,0.010643,0.798125,0.0
0.8,0.996006,0.016626,0.873413,0.022553
1.0,2.207498,0.14446,1.0,0.176383


### Per-User Differences in Method Performance

In [84]:
global_df2.head(5)

result,accuracy,accuracy,loss,loss
Unnamed: 0_level_1,mean,std,mean,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A101S5PLO0VRHQ,0.47619,0.016496,1.12082,0.012142
A10E0V7PGY34UZ,0.933333,0.0,0.384862,0.028398
A10O7THJ2O20AG,0.577778,0.03849,1.242837,0.030466
A11P853U6FIKAM,0.502924,0.010129,0.935463,0.009915
A12O5K3EQ4MC7Z,0.745098,0.0,0.725279,0.01535


In [86]:
user_results_ps = user_results_ps.rename(columns={'test_loss': 'loss', 'test_accuracy': 'accuracy'})
user_results_ps.head(5)

Unnamed: 0_level_0,loss,loss,accuracy,accuracy
Unnamed: 0_level_1,mean,std,mean,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A101S5PLO0VRHQ,1.057531,0.014976,0.571429,0.0
A10E0V7PGY34UZ,0.2403,0.003728,0.933333,0.0
A10O7THJ2O20AG,0.649746,0.005187,0.8,0.0
A11P853U6FIKAM,1.055255,0.004417,0.385965,0.0
A12O5K3EQ4MC7Z,0.738925,0.00711,0.705882,0.019608


In [144]:
# add results from all methods to shared dataframe
combined_results = user_results_ps.merge(global_df2, how='inner', left_index=True, right_index=True, suffixes=["_local", "_global"])

In [105]:
combined_results.head()

Unnamed: 0_level_0,loss_local,loss_local,accuracy_local,accuracy_local,accuracy_global,accuracy_global,loss_global,loss_global
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
A101S5PLO0VRHQ,1.057531,0.014976,0.571429,0.0,0.47619,0.016496,1.12082,0.012142
A10E0V7PGY34UZ,0.2403,0.003728,0.933333,0.0,0.933333,0.0,0.384862,0.028398
A10O7THJ2O20AG,0.649746,0.005187,0.8,0.0,0.577778,0.03849,1.242837,0.030466
A11P853U6FIKAM,1.055255,0.004417,0.385965,0.0,0.502924,0.010129,0.935463,0.009915
A12O5K3EQ4MC7Z,0.738925,0.00711,0.705882,0.019608,0.745098,0.0,0.725279,0.01535


In [141]:
train_mc_df = train_mc_df.rename(columns={"reviewerID": "user", "perf": "accuracy_train_majority_cls"}).set_index('user')
train_prob_df = train_prob_df.rename(columns={"reviewerID": "user", "perf": "loss_train_prob_baseline"}).set_index('user')
test_mc_df = test_mc_df.reset_index().rename(columns={"reviewerID": "user", "overall": "accuracy_test_majority_cls"}).set_index('user')
test_prob_df = test_prob_df.rename(columns={"reviewerID": "user", "perf": "loss_test_prob_baseline"}).set_index('user')

In [145]:
# add in baselines
for df in [train_mc_df, train_prob_df, test_mc_df, test_prob_df]:
    combined_results = combined_results.merge(df, how='inner', left_index=True, right_index=True)



In [146]:
combined_results

Unnamed: 0_level_0,"(loss_local, mean)","(loss_local, std)","(accuracy_local, mean)","(accuracy_local, std)","(accuracy_global, mean)","(accuracy_global, std)","(loss_global, mean)","(loss_global, std)",accuracy_train_majority_cls,loss_train_prob_baseline,accuracy_test_majority_cls,loss_test_prob_baseline
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
A101S5PLO0VRHQ,1.057531,0.014976,0.571429,0.000000,0.476190,0.016496,1.120820,0.012142,0.314286,1.537137,0.314286,1.471413
A10E0V7PGY34UZ,0.240300,0.003728,0.933333,0.000000,0.933333,0.000000,0.384862,0.028398,0.933333,0.248242,0.933333,0.244930
A10O7THJ2O20AG,0.649746,0.005187,0.800000,0.000000,0.577778,0.038490,1.242837,0.030466,0.800000,2.846510,0.800000,0.720125
A11P853U6FIKAM,1.055255,0.004417,0.385965,0.000000,0.502924,0.010129,0.935463,0.009915,0.421053,1.679051,0.421053,1.107927
A12O5K3EQ4MC7Z,0.738925,0.007110,0.705882,0.019608,0.745098,0.000000,0.725279,0.015350,0.392157,1.075978,0.411765,1.051915
...,...,...,...,...,...,...,...,...,...,...,...,...
AYT4FJYVCHYLE,1.220608,0.017522,0.288889,0.076980,0.666667,0.000000,0.636464,0.019655,0.333333,1.333598,0.466667,1.171060
AYVW3O6W8S5S4,1.252717,0.033645,0.600000,0.000000,0.333333,0.000000,1.614175,0.035395,0.533333,3.202199,0.533333,1.136917
AZD488SA9QMYF,0.239735,0.006056,0.933333,0.000000,0.933333,0.000000,0.262011,0.016293,0.933333,0.244930,0.933333,0.244930
AZJ4DFLH9O4FZ,0.560942,0.018114,0.666667,0.000000,0.622222,0.038490,0.609930,0.028343,0.333333,1.331867,0.400000,1.085189


In [148]:
# get difference in global vs local perf
combined_results['global-local_loss'] = combined_results[('loss_global', 'mean')] - combined_results[('loss_local', 'mean')]

In [149]:
combined_results['global-local_acc'] = combined_results[('accuracy_global', 'mean')] - combined_results[('accuracy_local', 'mean')]

In [159]:
# look at raw differences
print(combined_results[['global-local_loss', 'global-local_acc']].abs().mean())
print(combined_results[['global-local_loss', 'global-local_acc']].abs().std())
print(combined_results[['global-local_loss', 'global-local_acc']].abs().quantile(q=[0, .2, .4, .6, .8, 1]))

global-local_loss    0.163934
global-local_acc     0.091426
dtype: float64
global-local_loss    0.177669
global-local_acc     0.102404
dtype: float64
     global-local_loss  global-local_acc
0.0           0.000238          0.000000
0.2           0.038691          0.021692
0.4           0.076750          0.044444
0.6           0.144716          0.087675
0.8           0.255571          0.155556
1.0           1.498613          0.866667


In [153]:
# looks at differences between each model and baselines
combined_results['global-test_prob_dist_loss'] = combined_results[('loss_global', 'mean')] - combined_results['loss_test_prob_baseline']
combined_results['global-train_prob_dist_loss'] = combined_results[('loss_global', 'mean')] - combined_results['loss_train_prob_baseline']
combined_results['local-test_prob_dist_loss'] = combined_results[('loss_local', 'mean')] - combined_results['loss_test_prob_baseline']
combined_results['local-train_prob_dist_loss'] = combined_results[('loss_local', 'mean')] - combined_results['loss_train_prob_baseline']

In [157]:
print(combined_results[['global-test_prob_dist_loss', 'local-test_prob_dist_loss']].mean())
print(combined_results[['global-test_prob_dist_loss', 'local-test_prob_dist_loss']].std())
print(combined_results[['global-test_prob_dist_loss', 'local-test_prob_dist_loss']].quantile(q=[0, .2, .4, .6, .8, 1]))

global-test_prob_dist_loss   -0.068330
local-test_prob_dist_loss    -0.106253
dtype: float64
global-test_prob_dist_loss    0.324465
local-test_prob_dist_loss     0.222716
dtype: float64
     global-test_prob_dist_loss  local-test_prob_dist_loss
0.0                   -1.067018                  -0.960262
0.2                   -0.336674                  -0.282308
0.4                   -0.151714                  -0.128114
0.6                   -0.001755                  -0.021003
0.8                    0.157169                   0.056009
1.0                    1.389568                   0.885743


In [155]:
print(combined_results[['global-train_prob_dist_loss', 'local-train_prob_dist_loss']].mean())
print(combined_results[['global-train_prob_dist_loss', 'local-train_prob_dist_loss']].std())
print(combined_results[['global-train_prob_dist_loss', 'local-train_prob_dist_loss']].quantile(q=[0, .2, .4, .6, .8, 1]))

global-train_prob_dist_loss   -0.433828
local-train_prob_dist_loss    -0.471752
dtype: float64
global-train_prob_dist_loss    0.901789
local-train_prob_dist_loss     0.823324
dtype: float64
     global-train_prob_dist_loss  local-train_prob_dist_loss
0.0                    -6.698675                   -6.290011
0.2                    -0.618539                   -0.531391
0.4                    -0.304541                   -0.288254
0.6                    -0.117212                   -0.138281
0.8                     0.057346                   -0.012891
1.0                     1.366611                    0.198085
