Skip to content
This repository has been archived by the owner on Jun 22, 2022. It is now read-only.

DOC: Updates tutorial 4 #11

Merged
merged 1 commit into from Oct 14, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 23 additions & 27 deletions tutorials/4-caching-persistence.ipynb
Expand Up @@ -271,7 +271,7 @@
"\n",
"class SparseLogRegProbaTransformer(BaseTransformer):\n",
" def __init__(self):\n",
" self.estimator = LogisticRegression(penalty='l1')\n",
" self.estimator = LogisticRegression(penalty='l1', multi_class='auto', solver='liblinear')\n",
" \n",
" def fit(self, X, y):\n",
" self.estimator.fit(X, y)\n",
Expand Down Expand Up @@ -300,15 +300,15 @@
" input_data=['input'],\n",
" adapter=Adapter({'X': E('input', 'text')}),\n",
" experiment_directory=EXPERIMENT_DIR_A,\n",
" is_trainable=True)\n",
" is_fittable=True)\n",
"\n",
"tfidf_step = Step(name='TF-IDF',\n",
" transformer=StepsTfidfTransformer(),\n",
" input_steps=[count_vec_step], \n",
" experiment_directory=EXPERIMENT_DIR_A,\n",
" persist_output=True,\n",
" load_persisted_output=True, # This breaks when switching from training data to val data or test data!\n",
" is_trainable=True\n",
" is_fittable=True\n",
" )\n",
"\n",
"logreg_step = Step(name='SparseLogReg',\n",
Expand All @@ -319,7 +319,7 @@
" 'y': E('input', 'label')\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_A,\n",
" is_trainable=True)"
" is_fittable=True)"
]
},
{
Expand Down Expand Up @@ -366,7 +366,7 @@
"outputs": [],
"source": [
"# Bug workaround: manually delete saved output when switching datasets\n",
"os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n",
"os.remove(tfidf_step.experiment_directory_output_step)\n",
"preds_linear_val = logreg_step.transform(data_val)"
]
},
Expand Down Expand Up @@ -436,7 +436,7 @@
" 'y': E('input', 'label')\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_A,\n",
" is_trainable=True)"
" is_fittable=True)"
]
},
{
Expand All @@ -462,8 +462,7 @@
"outputs": [],
"source": [
"# Bug workaround: manually delete saved output when switching datasets\n",
"os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n",
"\n",
"os.remove(tfidf_step.experiment_directory_output_step)\n",
"preds_rf_fit = rf_step.fit_transform(data_fit)"
]
},
Expand All @@ -484,8 +483,7 @@
"outputs": [],
"source": [
"# Bug workaround: manually delete saved output when switching datasets\n",
"os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n",
"\n",
"os.remove(tfidf_step.experiment_directory_output_step)\n",
"preds_rf_val = rf_step.transform(data_val)"
]
},
Expand Down Expand Up @@ -552,7 +550,7 @@
" 'y_proba_2': E('RF', 'y_proba'),\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_A,\n",
" is_trainable=True)"
" is_fittable=True)"
]
},
{
Expand All @@ -577,10 +575,10 @@
"metadata": {},
"outputs": [],
"source": [
"os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF')) # Bug workaround: manually delete saved output when switching datasets\n",
"os.remove(tfidf_step.experiment_directory_output_step) # Bug workaround: manually delete saved output when switching datasets\n",
"preds_ens_val = ens_step.fit_transform(data_val)\n",
"\n",
"os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF')) # Bug workaround: manually delete saved output when switching datasets\n",
"os.remove(tfidf_step.experiment_directory_output_step) # Bug workaround: manually delete saved output when switching datasets\n",
"preds_ens_test = ens_step.transform(data_test)"
]
},
Expand Down Expand Up @@ -622,14 +620,15 @@
" input_data=['input'],\n",
" adapter=Adapter({'X': E('input', 'text')}),\n",
" experiment_directory=EXPERIMENT_DIR_B,\n",
" is_trainable=True)\n",
" is_fittable=True)\n",
"\n",
"new_tfidf_step = Step(name='TF-IDF',\n",
" transformer=StepsTfidfTransformer(),\n",
" input_steps=[new_count_vec_step], \n",
" experiment_directory=EXPERIMENT_DIR_B,\n",
" cache_output=True,\n",
" is_trainable=True)\n",
" force_fitting=False,\n",
" is_fittable=True)\n",
"\n",
"new_logreg_step = Step(name='SparseLogReg',\n",
" transformer=SparseLogRegProbaTransformer(),\n",
Expand All @@ -639,7 +638,7 @@
" 'y': E('input', 'label')\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_B,\n",
" is_trainable=True)\n",
" is_fittable=True)\n",
"\n",
"new_rf_step = Step(name='RF',\n",
" transformer=RfClfTransformer(n_estimators=200, max_depth=8),\n",
Expand All @@ -649,7 +648,7 @@
" 'y': E('input', 'label')\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_B,\n",
" is_trainable=True)\n",
" is_fittable=True)\n",
"\n",
"new_ens_step = Step(name='Ensembler',\n",
" transformer=AvgTransformer(),\n",
Expand All @@ -658,7 +657,7 @@
" 'y_proba_2': E('RF', 'y_proba')\n",
" }),\n",
" experiment_directory=EXPERIMENT_DIR_B,\n",
" is_trainable=True)"
" is_fittable=True)"
]
},
{
Expand All @@ -676,9 +675,8 @@
"metadata": {},
"outputs": [],
"source": [
"new_ens_step.clean_cache()\n",
"new_preds_ens_fit = new_ens_step.fit_transform(data_fit)\n",
"new_ens_step.clean_cache()"
"new_ens_step.clean_cache_upstream()\n",
"new_preds_ens_fit = new_ens_step.fit_transform(data_fit)"
]
},
{
Expand All @@ -694,9 +692,8 @@
"metadata": {},
"outputs": [],
"source": [
"new_ens_step.clean_cache()\n",
"new_preds_ens_val = new_ens_step.transform(data_val)\n",
"new_ens_step.clean_cache()"
"new_ens_step.clean_cache_upstream()\n",
"new_preds_ens_val = new_ens_step.transform(data_val)"
]
},
{
Expand All @@ -705,9 +702,8 @@
"metadata": {},
"outputs": [],
"source": [
"new_ens_step.clean_cache()\n",
"new_preds_ens_test = new_ens_step.transform(data_test)\n",
"new_ens_step.clean_cache()"
"new_ens_step.clean_cache_upstream()\n",
"new_preds_ens_test = new_ens_step.transform(data_test)"
]
},
{
Expand Down