# Triage all errors caught during experiments

In [1]:
# define the results file here. this file should be created by the script process_inbox.py
results_csv = "/Users/duncan/research/active_projects/reczilla/results/results.csv"

# define a function that takes a row as input an returns True if the row should be included in the meta-dataset and false otherwise
def include_row(row):
    if row["experiment_name"].startswith("neurips-"):
        return True
    else:
        return False


In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv(results_csv, sep=";")

  interactivity=interactivity, compiler=compiler, result=result)


## Filter the results based on the function "include_row"

In [3]:
### keep experiments indicated by function include_row
experiment_prefix = "full-experiment-"
keep_rows = df.apply(include_row, axis=1)
df_expt = df.loc[keep_rows, :]


# Analyze all failed jobs and exceptions

In [4]:
#### count number of results for each dataset-alg pair
ignore_datasets = ["AmazonBooksReader", "AmazonPurchaseCirclesReader", "GoogleLocalReviewsReader"]
df_tmp = df_expt.loc[~df_expt["dataset_name"].isin(ignore_datasets), :].copy()
knn_rows = df_tmp["alg_name"].str.contains("KNN")

knn_basename = df_tmp.loc[knn_rows, "alg_name"].apply(lambda x: x.split("_")[0])
knn_sim = df_tmp.loc[knn_rows, "alg_name"].apply(lambda x: x.split("_")[1])
df_tmp.loc[knn_rows, "alg_name"] = knn_basename  # either UserKNN or ItemKNN

# number of samples for each dataset-alg pair
num_samples = df_tmp.groupby(["alg_name", "dataset_name"]).size().rename("num_samples").reset_index()


In [5]:
## get a complete list of all algs and datasets
all_datasets = list(df_tmp["dataset_name"].unique())
all_algs = list(df_tmp["alg_name"].unique())

### In num_samples, make sure there is one entry for each alg-dataset pair

In [8]:
# get every combination of alg-dataset
import itertools
alg_dataset_pairs = [x for x in itertools.product(all_algs, all_datasets)]
alg_list_tmp = [x[0] for x in alg_dataset_pairs]
dataset_list_tmp = [x[1] for x in alg_dataset_pairs]

num_samples = num_samples.merge(pd.DataFrame(
    {
        "alg_name": alg_list_tmp,
        "dataset_name": dataset_list_tmp,
        }), how="outer")

# set all new rows (with no samples) to zero
num_samples.loc[num_samples["num_samples"].isna(), "num_samples"] = 0

# list of algs that only have one sample (no hyperparams)
one_sample_algs = [
    "SlopeOne",
    "TopPop",
    "GlobalEffects", 
    "Random",
    ]

# Parameterized algs with fewer than 100 samples...

In [9]:
num_samples.loc[(num_samples["num_samples"] < 100) & ~num_samples["alg_name"].isin(one_sample_algs), :]

Unnamed: 0,alg_name,dataset_name,num_samples
1527,SLIMElasticNetRecommender,MarketBiasAmazonReader,0.0
1528,P3alphaRecommender,AmazonKitchenDiningReader,0.0
1529,MatrixFactorization_BPR_Cython,AmazonDigitalMusicReader,0.0


There are only three instances of this, and they all yield zero samples..

In [54]:
# look into these... (they have )
df[(df["alg_name"] == "P3alphaRecommender") & (df["dataset_name"] == "AmazonKitchenDiningReader")]

Unnamed: 0,alg_name,alg_seed,cutoff_list,dataset_name,exception,experiment_name,hyperparameters_source,num_samples,original_split_path,param_alpha,...,test_metric_USERS_IN_GT_cut_5,test_metric_USERS_IN_GT_cut_50,test_metric_USERS_IN_GT_cut_6,test_metric_USERS_IN_GT_cut_7,test_metric_USERS_IN_GT_cut_8,test_metric_USERS_IN_GT_cut_9,time,time_on_test,time_on_train,time_on_val


In [56]:
df[(df["alg_name"] == "MatrixFactorization_BPR_Cython") & (df["dataset_name"] == "AmazonDigitalMusicReader")]

Unnamed: 0,alg_name,alg_seed,cutoff_list,dataset_name,exception,experiment_name,hyperparameters_source,num_samples,original_split_path,param_alpha,...,test_metric_USERS_IN_GT_cut_5,test_metric_USERS_IN_GT_cut_50,test_metric_USERS_IN_GT_cut_6,test_metric_USERS_IN_GT_cut_7,test_metric_USERS_IN_GT_cut_8,test_metric_USERS_IN_GT_cut_9,time,time_on_test,time_on_train,time_on_val


To look into why these experiments failed, we will need to look at the log files.

- SLIMElasticNetRecommender	+ MarketBiasAmazonReader: (log file: log_340_050922_145822.txt): this job failed due to SSH timeout, not a code issue.
- P3alphaRecommender + AmazonKitchenDiningReader: (log file: log_305_050422_230905.txt):  also failed due to SSH timeout.
- MatrixFactorization_BPR_Cython + AmazonDigitalMusicReader: (log file: log_203_050722_191431.txt): same error as the other two...

here is an example of what this error looks like:
```
launching instance neurips-llo-a-305...
Created [https://www.googleapis.com/compute/v1/projects/research-collab-naszilla/zones/us-central1-a/instances/neurips-llo-a-305].
NAME               ZONE           MACHINE_TYPE  PREEMPTIBLE  INTERNAL_IP    EXTERNAL_IP    STATUS
neurips-llo-a-305  us-central1-a  n1-highmem-2               10.128.15.206  35.238.53.180  RUNNING
successfully created instance: neurips-llo-a-305
ssh: connect to host 35.238.53.180 port 22: Connection timed out

Recommendation: To check for possible causes of SSH connectivity issues and get
recommendations, rerun the ssh command with the --troubleshoot option.

gcloud compute ssh neurips-llo-a-305 --project=research-collab-naszilla --zone=us-central1-a --troubleshoot

Or, to investigate an IAP tunneling issue:

gcloud compute ssh neurips-llo-a-305 --project=research-collab-naszilla --zone=us-central1-a --troubleshoot --tunnel-through-iap

ERROR: (gcloud.compute.ssh) [/usr/bin/ssh] exited with return code [255].
failed to run experiment during attempt 2... (exit code: 255)
trying again in 30 seconds...
ssh: connect to host 35.238.53.180 port 22: Connection timed out

Recommendation: To check for possible causes of SSH connectivity issues and get
recommendations, rerun the ssh command with the --troubleshoot option.

gcloud compute ssh neurips-llo-a-305 --project=research-collab-naszilla --zone=us-central1-a --troubleshoot

Or, to investigate an IAP tunneling issue:

gcloud compute ssh neurips-llo-a-305 --project=research-collab-naszilla --zone=us-central1-a --troubleshoot --tunnel-through-iap

ERROR: (gcloud.compute.ssh) [/usr/bin/ssh] exited with return code [255].
failed to run experiment during attempt 3... (exit code: 255)
too many SSH attempts. giving up and deleting instance.
```


**CONCLUSION**:
- these three instances are one-off SSH errors, we could re-run them if we'd like, but I wouldn't bother. 

### Now look at one-sample jobs that failed

In [58]:
num_samples.loc[(num_samples["num_samples"] < 1) & num_samples["alg_name"].isin(one_sample_algs), :]

Unnamed: 0,alg_name,dataset_name,num_samples


it appears that there are results for all one-sample algs! this is good. let's look at the exceptions:

In [72]:
one_sample_exceptions = df_tmp.loc[df_tmp["alg_name"].isin(one_sample_algs), "exception"].unique()
print(f"number of exceptions for one-sample algs: {len(one_sample_exceptions)}")
print(f"total number of exceptions: {sum(~df_tmp.loc[df_tmp['alg_name'].isin(one_sample_algs), 'exception'].isna())}")

print(df_tmp[df_tmp['alg_name'].isin(one_sample_algs) & ~df_tmp["exception"].isna()][["alg_name", "dataset_name"]])

number of exceptions for one-sample algs: 4
total number of exceptions: 32
        alg_name                       dataset_name
1515    SlopeOne     AmazonAmazonInstantVideoReader
3330    SlopeOne         AmazonSportsOutdoorsReader
6915    SlopeOne     AmazonGroceryGourmetFoodReader
7266    SlopeOne             AmazonVideoGamesReader
12103   SlopeOne   AmazonToolsHomeImprovementReader
22917   SlopeOne             AmazonAutomotiveReader
23733   SlopeOne  AmazonCellPhonesAccessoriesReader
28479   SlopeOne        AmazonPatioLawnGardenReader
29071   SlopeOne                      GowallaReader
37021   SlopeOne                      RecipesReader
37154   SlopeOne            AmazonPetSuppliesReader
41954   SlopeOne                     EpinionsReader
52218   SlopeOne            AmazonElectronicsReader
65504   SlopeOne           AmazonDigitalMusicReader
69125   SlopeOne              AmazonToysGamesReader
72451   SlopeOne               AmazonSoftwareReader
73590   SlopeOne                       Da

**Note**: only SlopeOne failed... let's see why.



In [74]:
one_sample_exceptions[1]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 299, in _evaluate_on_validation\n    recommender_instance\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/Base/Evaluation/Evaluator.py", line 253, in evaluateRecommender\n    results_dict = self._run_evaluation_on_selected_users(recommender_object, self.users_to_evaluate)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/Base/Evaluation/Evaluator.py", line 455, in _run

In [75]:
one_sample_exceptions[2]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 293, in _evaluate_on_validation\n    recommender_instance, train_time = self._fit_model(current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 283, in _fit_model\n    **current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/SurpriseAlgorithms/Wrappers.py", line 94, in fit\n    self.surpri

In [76]:
one_sample_exceptions[3]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 299, in _evaluate_on_validation\n    recommender_instance\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/Base/Evaluation/Evaluator.py", line 253, in evaluateRecommender\n    results_dict = self._run_evaluation_on_selected_users(recommender_object, self.users_to_evaluate)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/Base/Evaluation/Evaluator.py", line 455, in _run

**CONCLUSION**:
- All of these SlopeOne errors are memory errors... There are 32 total. So we would need to re-run SlopeOne on, basically, all datasets with increased memory.

## Exceptions from Multiple-sample jobs


In [102]:
# first, count the total number of exceptions for each alg & dataset pair..
multi_sample_exceptions = df_tmp.loc[~df_tmp["alg_name"].isin(one_sample_algs), "exception"].unique()
print(f"number of exceptions for multi-sample algs: {len(multi_sample_exceptions)}")
print(f"total number of exceptions: {sum(~df_tmp.loc[~df_tmp['alg_name'].isin(one_sample_algs), 'exception'].isna())}")

# print(df_tmp[~df_tmp['alg_name'].isin(one_sample_algs) & ~df_tmp["exception"].isna()][["alg_name", "dataset_name"]])
num_exceptions = df_tmp[~df_tmp['alg_name'].isin(one_sample_algs) & ~df_tmp["exception"].isna()].groupby(["alg_name", "dataset_name"]).size().rename("num_except").reset_index()
# print(df_tmp[~df_tmp['alg_name'].isin(one_sample_algs) & ~df_tmp["exception"].isna()].groupby(["alg_name", "dataset_name"]).size())

# print this out for each alg
print("total num exceptions:")
print(num_exceptions.groupby("alg_name")["num_except"].sum())
# for alg in num_exceptions["alg_name"].unique():
#     total_exceptions = num_exceptions[num_exceptions["alg_name"] == alg]["num_except"].sum()
#     print(f"total exceptions for alg {alg}: \n{total_exceptions}")
    # print(num_exceptions[num_exceptions["alg_name"]? == alg])

number of exceptions for multi-sample algs: 63
total number of exceptions: 6322
total num exceptions:
alg_name
EASE_R_Recommender                    3299
IALSRecommender                          2
ItemKNNCF                               87
MatrixFactorization_AsySVD_Cython        1
MatrixFactorization_BPR_Cython           6
MatrixFactorization_FunkSVD_Cython       2
NMFRecommender                        2620
P3alphaRecommender                      98
PureSVDRecommender                      59
RP3betaRecommender                     100
SLIM_BPR_Cython                          4
UserKNNCF                               44
Name: num_except, dtype: int64


In [184]:
# now group by dataset
print("total num exceptions by dataset:")
print(num_exceptions.groupby("dataset_name")["num_except"].sum().sort_values(ascending=False))


total num exceptions by dataset:
dataset_name
Jester2Reader                        231
AnimeReader                          229
AmazonIndustrialScientificReader     149
AmazonAmazonInstantVideoReader       142
AmazonMusicalInstrumentsReader       127
AmazonArtsCraftsSewingReader         111
AmazonDigitalMusicReader             110
AmazonToolsHomeImprovementReader     110
AmazonVideoGamesReader               110
RecipesReader                        109
AmazonAppsforAndroidReader           108
AmazonPatioLawnGardenReader          107
MovieTweetingsReader                 107
AmazonOfficeProductsReader           106
AmazonPetSuppliesReader              106
DatingReader                         106
Movielens20MReader                   106
AmazonBabyReader                     106
AmazonBeautyReader                   106
AmazonAutomotiveReader               105
BookCrossingReader                   105
GowallaReader                        104
AmazonToysGamesReader                104
AmazonKindl

Most of the exceptions come from EASE_R - let's see what they are.

There is no especially problematic dataset -- though Jester and Anime have more than others. Probably because they are larger? 

In [105]:
# look at ease-r exceptions
ease_r_excepts = df_tmp.loc[(df_tmp["alg_name"] == "EASE_R_Recommender"), "exception"].unique()
print(f"{len(ease_r_excepts)} exceptions for ease-r")

3 exceptions for ease-r


In [107]:
ease_r_excepts[1]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 293, in _evaluate_on_validation\n    recommender_instance, train_time = self._fit_model(current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 283, in _fit_model\n    **current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/EASE_R/EASE_R_Recommender.py", line 56, in fit\n    grahm_matrix 

In [108]:
ease_r_excepts[2]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 293, in _evaluate_on_validation\n    recommender_instance, train_time = self._fit_model(current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 283, in _fit_model\n    **current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/EASE_R/EASE_R_Recommender.py", line 65, in fit\n    P = np.linalg

These are all memory errors...

**CONCLUSION**: 
- there were 3299 exceptions thrown for EASE-R - all memory errors 

### How many of these exceptions are memory errors?

In [111]:
num_mem_excepts = sum(
    ~df_tmp["alg_name"].isin(one_sample_algs) 
    & (df_tmp["exception"].str.contains("memory") | df_tmp["exception"].str.contains("Memory"))
)
num_total_excepts = sum(
    ~df_tmp["alg_name"].isin(one_sample_algs) 
    & ~df_tmp["exception"].isna()
)

In [112]:
print(f"there are {num_mem_excepts} memory exceptions out of {num_total_excepts} total")

there are 3402 memory exceptions out of 6322 total


In [156]:
## if we ignore NMF, how many memory errors?
num_mem_excepts_2 = sum(
    ~df_tmp["alg_name"].isin(one_sample_algs) & (df_tmp["alg_name"] != "NMFRecommender")
    & (df_tmp["exception"].str.contains("memory") | df_tmp["exception"].str.contains("Memory"))
)
num_total_excepts_2 = sum(
    ~df_tmp["alg_name"].isin(one_sample_algs) & (df_tmp["alg_name"] != "NMFRecommender")
    & ~df_tmp["exception"].isna()
)
print(f"ignoring NMF: there are {num_mem_excepts_2} memory exceptions out of {num_total_excepts_2} total")

ignoring NMF: there are 3383 memory exceptions out of 3702 total


### Look at NMFRecommender exceptions

In [138]:
# look at NMFRecommender exceptions
nmf_r_excepts = df_tmp.loc[(df_tmp["alg_name"] == "NMFRecommender"), "exception"].unique()
nmf_excepts_small = [x for x in nmf_r_excepts if "IndexError: index" not in str(x)]
print(f"{len(nmf_r_excepts)} exceptions for NMF")

38 exceptions for NMF


In [154]:
# print(len([x for x in nmf_r_excepts if ("memory" in str(x)) or ("Memory" in str(x))]))
# print(sum(
#     (df_tmp["alg_name"] == "NMFRecommender")
#     & (df_tmp["exception"].str.contains("memory") | df_tmp["exception"].str.contains("Memory"))
# ))
# 3
# 19
# print(len([x for x in nmf_r_excepts if ("kullback-leibler" in str(x))]))
# print(sum(
#     (df_tmp["alg_name"] == "NMFRecommender")
#     & (df_tmp["exception"].str.contains("kullback-leibler"))
# ))
# 1
# 1462
# print(len([x for x in nmf_r_excepts if ("Input contains NaN," in str(x))]))
# print(sum(
#     (df_tmp["alg_name"] == "NMFRecommender")
#     & (df_tmp["exception"].str.contains("Input contains NaN,"))
# ))
# 1
# 157
# print(len([x for x in nmf_r_excepts if ("IndexError: index" in str(x))]))
# print(sum(
#     (df_tmp["alg_name"] == "NMFRecommender")
#     & (df_tmp["exception"].str.contains("IndexError: index"))
# ))
# 31
# 782
# print(len([x for x in nmf_r_excepts if ("Negative values in data passed to " in str(x))]))
# print(sum(
#     (df_tmp["alg_name"] == "NMFRecommender")
#     & (df_tmp["exception"].str.contains("Negative values in data passed to "))
# ))
# 1
# 200


**CONCLUSION**: 
- So -- there are a lot of errors in NMF...
- only a small number of memory errors (19 total!)
- a ton of bad parameter sets -- using k-l parameter when it's not allowed (1462 instances)
- a bunch of ValueError occurrences (157)
- a bunch of IndexError occurrences (782)
- NMF can't handle negative values, so 200 occurrences here.

I'd just as well ignore all of these issues for now, since some wi?ll take time to debug.

## Now ItemKNNCF

In [157]:
# look at itemknn exceptions
itemknn_excepts = df_tmp.loc[(df_tmp["alg_name"] == "ItemKNNCF"), "exception"].unique()
print(f"{len(itemknn_excepts)} exceptions for itemknn")

8 exceptions for itemknn


In [162]:
# how many memory errors?
num_mem_excepts_knn = sum(
    (df_tmp["alg_name"] == "ItemKNNCF")
    & (df_tmp["exception"].str.contains("memory") | df_tmp["exception"].str.contains("Memory"))
)
num_total_excepts_knn = sum(
    (df_tmp["alg_name"] == "ItemKNNCF")
    & ~df_tmp["exception"].isna()
)
print(f"itemKNN: there are {num_mem_excepts_knn} memory exceptions out of {num_total_excepts_knn} total")

itemKNN: there are 57 memory exceptions out of 87 total


In [167]:
nonmem_itemknn_excepts = [e for e in itemknn_excepts if ("memory" not in str(e)) and ("Memory" not in str(e))]

# all of these are negative value errors.

**CONCLUSION**:
- all of the ItemKNN errors are memory errors... only 57 of these occurred.

### Now P3alphaRecommender

In [172]:
# look at P3alphaRecommender exceptions
p3alpha = df_tmp.loc[(df_tmp["alg_name"] == "P3alphaRecommender"), "exception"].unique()
print(f"{len(p3alpha)} exceptions for P3alphaRecommender")

2 exceptions for P3alphaRecommender


In [174]:
p3alpha[1]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 293, in _evaluate_on_validation\n    recommender_instance, train_time = self._fit_model(current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 283, in _fit_model\n    **current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/GraphBased/P3alphaRecommender.py", line 138, in fit\n    self.W_s

**CONCLUSION**:
- ~100 ValueError instances occur for p3alpha

### Now RP3betaRecommender

In [175]:
# look at rp3beta exceptions
rp3beta = df_tmp.loc[(df_tmp["alg_name"] == "RP3betaRecommender"), "exception"].unique()
print(f"{len(rp3beta)} exceptions for RP3betaRecommender")

2 exceptions for RP3betaRecommender


In [177]:
rp3beta[1]

'Traceback (most recent call last):\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 402, in _objective_function\n    result_dict, result_string, recommender_instance, train_time, evaluation_time = self._evaluate_on_validation(current_fit_parameters_dict)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/RandomSearch.py", line 50, in _evaluate_on_validation\n    current_fit_parameters\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 293, in _evaluate_on_validation\n    recommender_instance, train_time = self._fit_model(current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/ParameterTuning/SearchAbstractClass.py", line 283, in _fit_model\n    **current_fit_parameters)\n  File "/home/shared/reczilla/RecSys2019_DeepLearning_Evaluation/GraphBased/RP3betaRecommender.py", line 148, in fit\n    self.W_s

**CONCLUSION**:
- ~100 ValueError instances occur with rp3beta. 

# Number of Params per dataset

In [50]:

# only keep rows with some non-NAN metrics, and without an exception
metric_list = [c for c in df_expt.columns if "_metric" in c]
all_na_metrics = df_expt.loc[:, metric_list].isna().all(axis=1)


df_expt_clean = df_expt.loc[df_expt["exception"].isna() & ~all_na_metrics, :]

knn_rows = df_expt_clean["alg_name"].str.contains("KNN")

knn_basename = df_expt_clean.loc[knn_rows, "alg_name"].apply(lambda x: x.split("_")[0])
knn_sim = df_expt_clean.loc[knn_rows, "alg_name"].apply(lambda x: x.split("_")[1])
df_expt_clean.loc[knn_rows, "alg_name"] = knn_basename  # either UserKNN or ItemKNN


num_samples = df_expt_clean.groupby(["alg_name", "dataset_name"]).size().rename("count").reset_index()

#### add row for each dataset_name and alg_name, and add these to the num_samples df. use outer merge to make sure all combos are included
## get a complete list of all algs and datasets

all_datasets = list(df_expt_clean["dataset_name"].unique())
all_algs = list(df_expt_clean["alg_name"].unique())
print(f"found {len(all_datasets)} datasets and {len(all_algs)} algs")

num_samples = num_samples.merge(pd.DataFrame(
    {
        "alg_name": alg_list_tmp,
        "dataset_name": dataset_list_tmp,
        }), how="outer")

# set all new rows (with no samples) to zero
num_samples.loc[num_samples["count"].isna(), "count"] = 0

# add a col for max samples:
num_samples.loc[:, "max_samples"] = 100
num_samples.loc[num_samples["alg_name"].isin(one_sample_algs), "max_samples"] = 1




found 85 datasets and 18 algs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [54]:
# which alg-dataset combinations have fewer than 100 samples?
num_samples.loc[num_samples["count"] < num_samples["max_samples"], :].sort_values(by="count", ascending=True)

Unnamed: 0,alg_name,dataset_name,count,max_samples
1529,NMFRecommender,AnimeReader,0.0,100
1481,P3alphaRecommender,AmazonKitchenDiningReader,0.0,100
1480,P3alphaRecommender,GowallaReader,0.0,100
1479,P3alphaRecommender,AmazonClothingShoesJewelryReader,0.0,100
1478,CoClustering,YahooMusicReader,0.0,100
1477,CoClustering,AmazonClothingShoesJewelryReader,0.0,100
1476,CoClustering,NetflixPrizeReader,0.0,100
1475,SLIM_BPR_Cython,YahooMusicReader,0.0,100
1474,SLIM_BPR_Cython,DatingReader,0.0,100
1473,SLIM_BPR_Cython,AmazonClothingShoesJewelryReader,0.0,100


In [64]:
# get only the "bad" results -- where < 10 and max_samples = 100
bad_examples = num_samples.loc[(num_samples["count"] < 9) & (num_samples["max_samples"] == 100), :].sort_values(by="count", ascending=True)

bad_examples = bad_examples.groupby("alg_name").size().rename("num_bad_datasets").reset_index()
# bad_examples.loc[:, "total_datasets"] = len(df_expt["dataset_name"].unique())
bad_examples.loc[:, "frac_bad_datasets"] = bad_examples["num_bad_datasets"] / len(df_expt_clean["dataset_name"].unique())
bad_examples.sort_values("frac_bad_datasets")

Unnamed: 0,alg_name,num_bad_datasets,frac_bad_datasets
3,ItemKNNCF,1,0.011765
13,UserKNNCF,2,0.023529
10,RP3betaRecommender,11,0.129412
8,P3alphaRecommender,12,0.141176
9,PureSVDRecommender,12,0.141176
5,MatrixFactorization_BPR_Cython,16,0.188235
0,CoClustering,21,0.247059
7,NMFRecommender,24,0.282353
6,MatrixFactorization_FunkSVD_Cython,25,0.294118
12,SLIM_BPR_Cython,32,0.376471


In [25]:
num_samples[num_samples["alg_name"] == "SLIM_BPR_Cython"]

Unnamed: 0,alg_name,dataset_name,count,max_samples
1179,SLIM_BPR_Cython,AmazonAllBeautyReader,100.0,100
1180,SLIM_BPR_Cython,AmazonAllElectronicsReader,100.0,100
1181,SLIM_BPR_Cython,AmazonAlternativeRockReader,100.0,100
1182,SLIM_BPR_Cython,AmazonAmazonFashionReader,100.0,100
1183,SLIM_BPR_Cython,AmazonAmazonInstantVideoReader,100.0,100
1184,SLIM_BPR_Cython,AmazonAppliancesReader,100.0,100
1185,SLIM_BPR_Cython,AmazonAppsforAndroidReader,99.0,100
1186,SLIM_BPR_Cython,AmazonAppstoreforAndroidReader,100.0,100
1187,SLIM_BPR_Cython,AmazonArtsCraftsSewingReader,100.0,100
1188,SLIM_BPR_Cython,AmazonAutomotiveReader,100.0,100


In [202]:
# now drop ease-r and NMF.. these are problematic..
# which alg-dataset combinations have fewer than 100 samples?
num_samples.loc[(num_samples["count"] < num_samples["max_samples"]) & ~num_samples["alg_name"].isin(["NMFRecommender", "EASE_R_Recommender"]), :].sort_values(by="count", ascending=True).reset_index()

Unnamed: 0,index,alg_name,dataset_name,count,max_samples
0,982,RP3betaRecommender,AnimeReader,50,100
1,991,RP3betaRecommender,Jester2Reader,50,100
2,817,P3alphaRecommender,Jester2Reader,51,100
3,808,P3alphaRecommender,AnimeReader,51,100
4,866,PureSVDRecommender,AmazonHomeImprovementReader,85,100
5,888,PureSVDRecommender,AmazonRockReader,87,100
6,870,PureSVDRecommender,AmazonJazzReader,89,100
7,877,PureSVDRecommender,AmazonMiscellaneousReader,90,100
8,502,MatrixFactorization_BPR_Cython,AmazonBooksReader,96,100
9,885,PureSVDRecommender,AmazonPopReader,96,100


In [31]:
# now look at SLIM_BPR_Cython specifically -- and look at a particular metric. to spot-check

METRIC = "test_metric_PRECISION_cut_10"

metric_list = [c for c in df_expt_clean.columns if "metric" in c]
df_expt_clean[(df_expt_clean["alg_name"] == "SLIM_BPR_Cython") & (df_expt_clean[METRIC].isna())][metric_list]

Unnamed: 0,param_asymmetric_alpha,param_symmetric,test_metric_ARHR_ALL_HITS_cut_1,test_metric_ARHR_ALL_HITS_cut_10,test_metric_ARHR_ALL_HITS_cut_15,test_metric_ARHR_ALL_HITS_cut_2,test_metric_ARHR_ALL_HITS_cut_20,test_metric_ARHR_ALL_HITS_cut_3,test_metric_ARHR_ALL_HITS_cut_30,test_metric_ARHR_ALL_HITS_cut_4,...,test_metric_USERS_IN_GT_cut_3,test_metric_USERS_IN_GT_cut_30,test_metric_USERS_IN_GT_cut_4,test_metric_USERS_IN_GT_cut_40,test_metric_USERS_IN_GT_cut_5,test_metric_USERS_IN_GT_cut_50,test_metric_USERS_IN_GT_cut_6,test_metric_USERS_IN_GT_cut_7,test_metric_USERS_IN_GT_cut_8,test_metric_USERS_IN_GT_cut_9
367,,,,,,,,,,,...,,,,,,,,,,
368,,,,,,,,,,,...,,,,,,,,,,
369,,,,,,,,,,,...,,,,,,,,,,
370,,,,,,,,,,,...,,,,,,,,,,
371,,,,,,,,,,,...,,,,,,,,,,
372,,,,,,,,,,,...,,,,,,,,,,
373,,,,,,,,,,,...,,,,,,,,,,
374,,,,,,,,,,,...,,,,,,,,,,
375,,,,,,,,,,,...,,,,,,,,,,
376,,,,,,,,,,,...,,,,,,,,,,


**CONCLUSION**:
- if we exclude the two problem algs - NMF and EASE-R - there are 30 cases where we don't get 100 samples. for these, we have at least 50 samples for all alg-dataset combos.  