diff --git a/tutorials/4-caching-persistence.ipynb b/tutorials/4-caching-persistence.ipynb index 0632d43..ede97ed 100644 --- a/tutorials/4-caching-persistence.ipynb +++ b/tutorials/4-caching-persistence.ipynb @@ -271,7 +271,7 @@ "\n", "class SparseLogRegProbaTransformer(BaseTransformer):\n", " def __init__(self):\n", - " self.estimator = LogisticRegression(penalty='l1')\n", + " self.estimator = LogisticRegression(penalty='l1', multi_class='auto', solver='liblinear')\n", " \n", " def fit(self, X, y):\n", " self.estimator.fit(X, y)\n", @@ -300,7 +300,7 @@ " input_data=['input'],\n", " adapter=Adapter({'X': E('input', 'text')}),\n", " experiment_directory=EXPERIMENT_DIR_A,\n", - " is_trainable=True)\n", + " is_fittable=True)\n", "\n", "tfidf_step = Step(name='TF-IDF',\n", " transformer=StepsTfidfTransformer(),\n", @@ -308,7 +308,7 @@ " experiment_directory=EXPERIMENT_DIR_A,\n", " persist_output=True,\n", " load_persisted_output=True, # This breaks when switching from training data to val data or test data!\n", - " is_trainable=True\n", + " is_fittable=True\n", " )\n", "\n", "logreg_step = Step(name='SparseLogReg',\n", @@ -319,7 +319,7 @@ " 'y': E('input', 'label')\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_A,\n", - " is_trainable=True)" + " is_fittable=True)" ] }, { @@ -366,7 +366,7 @@ "outputs": [], "source": [ "# Bug workaround: manually delete saved output when switching datasets\n", - "os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n", + "os.remove(tfidf_step.experiment_directory_output_step)\n", "preds_linear_val = logreg_step.transform(data_val)" ] }, @@ -436,7 +436,7 @@ " 'y': E('input', 'label')\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_A,\n", - " is_trainable=True)" + " is_fittable=True)" ] }, { @@ -462,8 +462,7 @@ "outputs": [], "source": [ "# Bug workaround: manually delete saved output when switching datasets\n", - "os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n", - "\n", + "os.remove(tfidf_step.experiment_directory_output_step)\n", "preds_rf_fit = rf_step.fit_transform(data_fit)" ] }, @@ -484,8 +483,7 @@ "outputs": [], "source": [ "# Bug workaround: manually delete saved output when switching datasets\n", - "os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF'))\n", - "\n", + "os.remove(tfidf_step.experiment_directory_output_step)\n", "preds_rf_val = rf_step.transform(data_val)" ] }, @@ -552,7 +550,7 @@ " 'y_proba_2': E('RF', 'y_proba'),\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_A,\n", - " is_trainable=True)" + " is_fittable=True)" ] }, { @@ -577,10 +575,10 @@ "metadata": {}, "outputs": [], "source": [ - "os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF')) # Bug workaround: manually delete saved output when switching datasets\n", + "os.remove(tfidf_step.experiment_directory_output_step) # Bug workaround: manually delete saved output when switching datasets\n", "preds_ens_val = ens_step.fit_transform(data_val)\n", "\n", - "os.remove(os.path.join(EXPERIMENT_DIR_A, 'outputs', 'TF-IDF')) # Bug workaround: manually delete saved output when switching datasets\n", + "os.remove(tfidf_step.experiment_directory_output_step) # Bug workaround: manually delete saved output when switching datasets\n", "preds_ens_test = ens_step.transform(data_test)" ] }, @@ -622,14 +620,15 @@ " input_data=['input'],\n", " adapter=Adapter({'X': E('input', 'text')}),\n", " experiment_directory=EXPERIMENT_DIR_B,\n", - " is_trainable=True)\n", + " is_fittable=True)\n", "\n", "new_tfidf_step = Step(name='TF-IDF',\n", " transformer=StepsTfidfTransformer(),\n", " input_steps=[new_count_vec_step], \n", " experiment_directory=EXPERIMENT_DIR_B,\n", " cache_output=True,\n", - " is_trainable=True)\n", + " force_fitting=False,\n", + " is_fittable=True)\n", "\n", "new_logreg_step = Step(name='SparseLogReg',\n", " transformer=SparseLogRegProbaTransformer(),\n", @@ -639,7 +638,7 @@ " 'y': E('input', 'label')\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_B,\n", - " is_trainable=True)\n", + " is_fittable=True)\n", "\n", "new_rf_step = Step(name='RF',\n", " transformer=RfClfTransformer(n_estimators=200, max_depth=8),\n", @@ -649,7 +648,7 @@ " 'y': E('input', 'label')\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_B,\n", - " is_trainable=True)\n", + " is_fittable=True)\n", "\n", "new_ens_step = Step(name='Ensembler',\n", " transformer=AvgTransformer(),\n", @@ -658,7 +657,7 @@ " 'y_proba_2': E('RF', 'y_proba')\n", " }),\n", " experiment_directory=EXPERIMENT_DIR_B,\n", - " is_trainable=True)" + " is_fittable=True)" ] }, { @@ -676,9 +675,8 @@ "metadata": {}, "outputs": [], "source": [ - "new_ens_step.clean_cache()\n", - "new_preds_ens_fit = new_ens_step.fit_transform(data_fit)\n", - "new_ens_step.clean_cache()" + "new_ens_step.clean_cache_upstream()\n", + "new_preds_ens_fit = new_ens_step.fit_transform(data_fit)" ] }, { @@ -694,9 +692,8 @@ "metadata": {}, "outputs": [], "source": [ - "new_ens_step.clean_cache()\n", - "new_preds_ens_val = new_ens_step.transform(data_val)\n", - "new_ens_step.clean_cache()" + "new_ens_step.clean_cache_upstream()\n", + "new_preds_ens_val = new_ens_step.transform(data_val)" ] }, { @@ -705,9 +702,8 @@ "metadata": {}, "outputs": [], "source": [ - "new_ens_step.clean_cache()\n", - "new_preds_ens_test = new_ens_step.transform(data_test)\n", - "new_ens_step.clean_cache()" + "new_ens_step.clean_cache_upstream()\n", + "new_preds_ens_test = new_ens_step.transform(data_test)" ] }, {