In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import sys
sys.path.append("../src/")
import json

import numpy as np
import pandas as pd
import math
from datasets import Dataset, list_metrics, load_metric
from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM, DataCollatorForLanguageModeling, Trainer

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

from results_analysis.analyze_exp_results import ExpAnalyzer
from results_analysis.analyze_personalized_exp_results import PerPopulationExpAnalyzer
from results_analysis.analysis_utils import plot_user_perf_by_sample_count

from runners.runner_utils import loss_to_perplexity

In [3]:
base_result_dir = "/data/ddmg/redditlanguagemodeling/results/"

In [4]:
data_df = pd.read_csv("/data/ddmg/redditlanguagemodeling/data/3_all_data_user_filtered_2021-06-21.csv")

In [5]:
val_df = data_df[data_df["data_split"] == "val"]
test_users = set(val_df["author"])
len(test_users)

50

In [6]:
train_df = data_df[data_df["data_split"] == "train"]
user_counts = train_df.groupby("author").count()["id"]
user_counts = user_counts[user_counts.index.isin(test_users)]

In [58]:
test_df = data_df[data_df["data_split"] == "test"]

## Fine-Tune All

In [12]:
rd_2b = os.path.join(base_result_dir, "2b_distilbert_finetuned_constant_lr")
ea_2b = ExpAnalyzer(rd_2b)
ea_2b.data_split_perf_df

Unnamed: 0,data_split,loss,perplexity
0,train,2.034876,7.651303
1,eval,2.131445,8.427035
2,test,2.146064,8.551135


In [14]:
ea_2b.author_result_df

Unnamed: 0,author,loss,perplexity
0,BRoccoli20,1.992776,7.335873
1,Chad_arbc,2.293011,9.904718
2,Creative310,2.572891,13.103652
3,Dotabjj,3.292901,26.920836
4,Eriflee,1.929266,6.884453
5,GaniB,1.782997,5.947657
6,HelloImLucas,2.221854,9.224417
7,IckyBelly,2.459537,11.699397
8,KarmaKingKong,2.015775,7.506542
9,Kennyv777,2.049708,7.765631


In [21]:
ea_2b.author_result_df.mean()

loss          2.168301
perplexity    9.537226
dtype: float64

In [22]:
ea_2b.author_result_df.std()

loss          0.415313
perplexity    4.424397
dtype: float64

In [10]:
ea_2b.sr_result_df.mean()

loss          2.052559
perplexity    7.906884
dtype: float64

In [11]:
ea_2b.sr_result_df.std()

loss          0.192139
perplexity    1.489724
dtype: float64

## Fine-Tune User

In [17]:
rd_3a = os.path.join(base_result_dir, "3a_distilbert_finetuned_author_constant_lr")
ea_3a = PerPopulationExpAnalyzer(rd_3a, "author")
ea_3a.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,2.622854,13.774980,binarygold
1,eval,2.746658,15.590447,binarygold
2,test,2.897412,18.127168,binarygold
0,train,2.455534,11.652651,exab
1,eval,2.328001,10.257412,exab
...,...,...,...,...
1,eval,2.812618,16.653456,Creative310
2,test,3.080813,21.776103,Creative310
0,train,4.246050,69.829067,retardedinvestor
1,eval,3.445839,31.369589,retardedinvestor


In [18]:
test_df_3a = ea_3a.data_split_perf_df[ea_3a.data_split_perf_df["data_split"] == "test"]

In [19]:
test_df_3a.mean()

loss           2.578356
perplexity    14.790691
dtype: float64

In [20]:
test_df_3a.std()

loss          0.471111
perplexity    8.140301
dtype: float64

## Fine-Tune Subreddit

In [23]:
rd_4a = os.path.join(base_result_dir, "4a_distilbert_finetuned_subreddit_constant_lr")
ea_4a = PerPopulationExpAnalyzer(rd_4a, "subreddit")
ea_4a.data_split_perf_df

Found results for 6 sub-populations


Unnamed: 0,data_split,loss,perplexity,subreddit
0,train,1.605824,4.981964,investing
1,eval,2.3677,10.672818,investing
2,test,2.260057,9.583633,investing
0,train,2.151371,8.596637,relationship_advice
1,eval,2.005614,7.430652,relationship_advice
2,test,1.91443,6.783074,relationship_advice
0,train,2.23541,9.350319,AskDocs
1,eval,1.904507,6.716098,AskDocs
2,test,2.092766,8.107306,AskDocs
0,train,2.233693,9.334278,Bitcoin


In [24]:
test_df_4a = ea_4a.data_split_perf_df[ea_4a.data_split_perf_df["data_split"] == "test"]

In [25]:
test_df_4a.mean()

loss          2.123874
perplexity    8.444052
dtype: float64

In [26]:
test_df_4a.std()

loss          0.152797
perplexity    1.262359
dtype: float64

## Multi-Stage Fine-Tune

#### lr = 5e-05

In [29]:
rd_6_55 = os.path.join(base_result_dir, "6_distilbert_finetuned_all_author", "3_3_epochs_lr_5e-5")
ea_6_55 = PerPopulationExpAnalyzer(rd_6_55, "author")
ea_6_55.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,2.080728,8.010300,wsace
1,eval,2.226663,9.268881,wsace
2,test,1.890351,6.621694,wsace
0,train,2.578425,13.176366,Dotabjj
1,eval,2.394565,10.963432,Dotabjj
...,...,...,...,...
1,eval,1.940132,6.959667,hgmichna
2,test,2.304577,10.019938,hgmichna
0,train,1.963913,7.127163,Olliebkl
1,eval,1.986492,7.289917,Olliebkl


In [30]:
test_df_6_55 = ea_6_55.data_split_perf_df[ea_6_55.data_split_perf_df["data_split"] == "test"]

In [32]:
test_df_6_55.mean()

loss          2.141876
perplexity    9.302310
dtype: float64

In [33]:
test_df_6_55.std()

loss          0.420664
perplexity    4.291948
dtype: float64

#### lr=1e-05

In [35]:
rd_6_15 = os.path.join(base_result_dir, "6_distilbert_finetuned_all_author", "3_3_epochs_lr_1e-5")
ea_6_15 = PerPopulationExpAnalyzer(rd_6_15, "author")
ea_6_15.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,2.599961,13.463215,lightlasertower
1,eval,2.205059,9.070785,lightlasertower
2,test,2.828444,16.919122,lightlasertower
0,train,2.052993,7.791189,peimom123
1,eval,1.848669,6.351360,peimom123
...,...,...,...,...
1,eval,1.804049,6.074194,Kennyv777
2,test,2.027661,7.596295,Kennyv777
0,train,2.120001,8.331147,blossbloss
1,eval,2.204881,9.069174,blossbloss


In [36]:
test_df_6_15 = ea_6_15.data_split_perf_df[ea_6_15.data_split_perf_df["data_split"] == "test"]

In [37]:
test_df_6_15.mean()

loss          2.152483
perplexity    9.392736
dtype: float64

In [38]:
test_df_6_15.std()

loss          0.416811
perplexity    4.362590
dtype: float64

#### lr=5e-06

In [39]:
rd_6_56 = os.path.join(base_result_dir, "6_distilbert_finetuned_all_author", "3_3_epochs_lr_5e-6")
ea_6_56 = PerPopulationExpAnalyzer(rd_6_56, "author")
ea_6_56.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,2.252515,9.511629,closingbell
1,eval,2.020747,7.543958,closingbell
2,test,2.169914,8.757531,closingbell
0,train,1.858269,6.412629,HelloImLucas
1,eval,1.946663,7.005270,HelloImLucas
...,...,...,...,...
1,eval,2.223102,9.235939,wsace
2,test,1.917891,6.806587,wsace
0,train,2.328436,10.261882,BRoccoli20
1,eval,2.130356,8.417864,BRoccoli20


In [40]:
test_df_6_56 = ea_6_56.data_split_perf_df[ea_6_56.data_split_perf_df["data_split"] == "test"]

In [41]:
test_df_6_56.mean()

loss          2.159230
perplexity    9.454112
dtype: float64

In [42]:
test_df_6_56.std()

loss          0.416104
perplexity    4.390839
dtype: float64

## Fine-tune All Data (just 2 epochs)

In [43]:
rd_2b_2e = os.path.join(base_result_dir, "2b_distilbert_finetuned_constant_lr_2_epochs")
ea_2b_2e = ExpAnalyzer(rd_2b_2e)
ea_2b_2e.data_split_perf_df

Unnamed: 0,data_split,loss,perplexity
0,train,2.096967,8.14144
1,eval,2.239995,9.393282
2,test,2.189264,8.92864


In [44]:
ea_2b_2e.author_result_df.mean()

loss          2.216824
perplexity    9.983422
dtype: float64

In [46]:
ea_2b_2e.author_result_df.std()

loss          0.412176
perplexity    4.454463
dtype: float64

## Fine-tune all 2 + Fine-tune user 1

In [47]:
rd_6_21 = os.path.join(base_result_dir, "6_distilbert_finetuned_all_author", "2_1_epochs_lr_5e-5")
ea_6_21 = PerPopulationExpAnalyzer(rd_6_21, "author")
ea_6_21.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,2.302821,10.002361,BRoccoli20
1,eval,2.194993,8.979938,BRoccoli20
2,test,2.007158,7.442139,BRoccoli20
0,train,2.634305,13.933625,lightlasertower
1,eval,3.119262,22.629667,lightlasertower
...,...,...,...,...
1,eval,2.485313,12.004882,hrrm
2,test,2.172009,8.775893,hrrm
0,train,2.464687,11.759801,KarmaKingKong
1,eval,2.304032,10.014476,KarmaKingKong


In [48]:
test_df_6_21 = ea_6_21.data_split_perf_df[ea_6_21.data_split_perf_df["data_split"] == "test"]

In [49]:
test_df_6_21.mean()

loss          2.201611
perplexity    9.846082
dtype: float64

In [50]:
test_df_6_21.std()

loss          0.414254
perplexity    4.437918
dtype: float64

## Fine-Tune All 6 epochs

In [52]:
rd_2c = os.path.join(base_result_dir, "2c_distilbert_finetuned_all_6_epochs")
ea_2c = ExpAnalyzer(rd_2c)
ea_2c.data_split_perf_df

Unnamed: 0,data_split,loss,perplexity
0,train,1.926833,6.867727
1,eval,2.095439,8.129012
2,test,2.075626,7.969537


In [53]:
ea_2c.author_result_df.mean()

loss          2.094127
perplexity    8.878195
dtype: float64

In [54]:
ea_2c.author_result_df.std()

loss          0.422024
perplexity    4.215700
dtype: float64

## Subreddit Fine-Tune - Per Sample Losses

In [61]:
rd_4b = os.path.join(base_result_dir, "4b_distilbert_finetuned_subreddit_sample_eval")
ea_4b = PerPopulationExpAnalyzer(rd_4b, "subreddit")
ea_4b.data_split_perf_df

Found results for 6 sub-populations


Unnamed: 0,data_split,loss,perplexity,subreddit
0,test,1.984499,7.275399,relationships
0,test,2.199799,9.023196,mentalhealth
0,test,1.91443,6.783074,relationship_advice
0,test,2.260057,9.583633,investing
0,test,2.291697,9.891705,Bitcoin
0,test,2.092766,8.107306,AskDocs


In [62]:
ea_4b.sample_losses_df

Unnamed: 0.1,Unnamed: 0,id,loss,subreddit
0,0,3k1pb8,2.197476,relationships
1,1,3qx8q8,1.726905,relationships
2,2,3s4h59,1.331028,relationships
3,3,4i9hbw,2.645278,relationships
4,4,5sz6u7,2.322477,relationships
...,...,...,...,...
71,71,jlz5y6,1.971650,AskDocs
72,72,jyjrz1,2.721585,AskDocs
73,73,k2vcqn,0.954532,AskDocs
74,74,kdp5wv,2.133715,AskDocs


In [63]:
merged_df_4b = ea_4b.sample_losses_df.merge(test_df, on="id", how="inner")

In [64]:
len(merged_df_4b)

521

In [72]:
user_results_4b = merged_df_4b.groupby("author").mean()["loss"].to_frame()

In [75]:
user_results_4b["perplexity"] = user_results_4b["loss"].apply(lambda x: loss_to_perplexity(x))

In [77]:
user_results_4b["perplexity"].mean()

10.230638442759604

In [78]:
user_results_4b["perplexity"].std()

5.537939771852542

## Sample Weighting - .5

In [8]:
rd_5 = os.path.join(base_result_dir, "5_distilbert_sw_author_.9")
ea_5 = PerPopulationExpAnalyzer(rd_5, "author")
ea_5.data_split_perf_df

Found results for 50 sub-populations


Unnamed: 0,data_split,loss,perplexity,author
0,train,,,glittertoes1
1,eval,1.714517,5.553990,glittertoes1
2,test,1.913946,6.779786,glittertoes1
0,train,,,exab
1,eval,2.010414,7.466405,exab
...,...,...,...,...
1,eval,1.746887,5.736717,goodluckchuck11
2,test,1.872536,6.504774,goodluckchuck11
0,train,,,Olliebkl
1,eval,2.026592,7.588184,Olliebkl


In [9]:
test_df_5 = ea_5.data_split_perf_df[ea_5.data_split_perf_df["data_split"] == "test"]

In [10]:
test_df_5.mean()

loss           2.233980
perplexity    10.195263
dtype: float64

In [11]:
test_df_5.std()

loss          0.419709
perplexity    4.689084
dtype: float64