Skip to content

Commit

Permalink
Suggested solution
Browse files Browse the repository at this point in the history
  • Loading branch information
samnlindsay committed Jun 24, 2020
1 parent 1384fcb commit a3db597
Showing 1 changed file with 79 additions and 7 deletions.
86 changes: 79 additions & 7 deletions settings_with_m_u.ipynb
Expand Up @@ -167,7 +167,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1) \"_I'm setting up a new job and I want to use the results of another job as default where applicable_\"\n",
"## USE CASE 1) \"_I'm setting up a new job and I want to use the results of another job as default where applicable_\"\n",
"### Update input settings with saved `m` and `u` probabilities\n",
"\n",
"Potential gotchas:\n",
Expand All @@ -184,9 +184,7 @@
"source": [
"from splink.validate import _get_default_value\n",
"\n",
"def add_saved_m_and_u(settings, spark, json_path):\n",
" \n",
" #settings = complete_settings_dict(settings, spark)\n",
"def add_saved_m_and_u(settings, json_path):\n",
" saved_params = load_params_from_json(json_path)\n",
" \n",
" for comp in settings[\"comparison_columns\"]:\n",
Expand Down Expand Up @@ -222,7 +220,7 @@
"metadata": {},
"outputs": [],
"source": [
"new_settings = add_saved_m_and_u(input_settings2, spark, \"saved_params.json\")\n",
"new_settings = add_saved_m_and_u(input_settings2, \"saved_params.json\")\n",
"new_settings"
]
},
Expand All @@ -239,7 +237,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2) \"_I'm restarting/re-running a job and want to pick up where the parameters finished_\"\n",
"## USE CASE 2) \"_I'm restarting/re-running a job and want to pick up where the parameters finished_\"\n",
"### As above but `settings` also comes from \"saved_params.json\""
]
},
Expand All @@ -252,7 +250,81 @@
"# complete input settings (default m and u probs)\n",
"saved_settings = saved_params.settings\n",
"\n",
"add_saved_m_and_u(saved_settings, spark, \"saved_params.json\")"
"add_saved_m_and_u(saved_settings, \"saved_params.json\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Possible solution \n",
"Bad naming and bad coding practice notwithstanding"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_or_update_settings(json_path, settings=None):\n",
" saved_params = load_params_from_json(json_path)\n",
" \n",
" if not settings:\n",
" settings = saved_params.settings\n",
" \n",
" for comp in settings[\"comparison_columns\"]:\n",
" if \"col_name\" in comp.keys():\n",
" label = \"gamma_\"+comp[\"col_name\"]\n",
" else:\n",
" label = \"gamma_\"+comp[\"custom_name\"]\n",
" \n",
" if \"num_levels\" in comp.keys():\n",
" num_levels = comp[\"num_levels\"]\n",
" else:\n",
" num_levels = _get_default_value(\"num_levels\", is_column_setting=True)\n",
" \n",
" \n",
" if label in saved_params.params[\"π\"].keys():\n",
" saved = saved_params.params[\"π\"][label]\n",
" \n",
" if num_levels == saved[\"num_levels\"]:\n",
" m_probs = [val['probability'] for key, val in saved[\"prob_dist_match\"].items()]\n",
" u_probs = [val['probability'] for key, val in saved[\"prob_dist_non_match\"].items()]\n",
" \n",
" comp[\"m_probabilities\"] = m_probs\n",
" comp[\"u_probabilities\"] = u_probs\n",
" else:\n",
" print(f\"{label}: Saved m and u probabilities do not match the specified number of levels ({num_levels}) - default probabilities will be used\")\n",
" \n",
" return(settings)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# update input settings with saved parameters\n",
"get_or_update_settings(\"saved_params.json\", input_settings2)\n",
"\n",
"# get previous settings and parameters\n",
"get_or_update_settings(\"saved_params.json\")"
]
},
{
Expand Down

0 comments on commit a3db597

Please sign in to comment.