diff --git a/examples/nli/scone/scone_with_MIPRO.ipynb b/examples/nli/scone/scone_with_MIPRO.ipynb index fc649c551f..4d004af333 100644 --- a/examples/nli/scone/scone_with_MIPRO.ipynb +++ b/examples/nli/scone/scone_with_MIPRO.ipynb @@ -108,7 +108,7 @@ "if not \"dspy-ai\" in {pkg.key for pkg in pkg_resources.working_set}:\n", " !pip install -U pip\n", " !pip install dspy-ai\n", - " !pip install openai~=0.28.1\n", + " !pip install openai~=1.12\n", " !pip install -e $repo_path\n", " !pip install --upgrade cloudpickle==3.0.0\n", "\n", @@ -124,6 +124,8 @@ "with zipfile.ZipFile(cache_file_path, 'r') as zip_ref:\n", " zip_ref.extractall(\".\")\n", "\n", + "os.environ[\"DSP_NOTEBOOK_CACHEDIR\"] = f\"{os.getcwd()}/MIPRO_notebook_cache\"\n", + "\n", "import dspy\n", "import pandas as pd\n", "import glob\n", @@ -343,7 +345,10 @@ "source": [ "def load_scone(dirname):\n", " dfs = []\n", - " for filename in glob.glob(dirname + \"/*.csv\"):\n", + " filenames = ['one_not_scoped.csv', 'one_scoped.csv', 'no_negation.csv', 'one_scoped_one_not_scoped.csv', 'two_scoped.csv', 'two_not_scoped.csv']\n", + "\n", + " for filename in filenames:\n", + " filename = os.path.join(dirname, filename)\n", " df = pd.read_csv(filename, index_col=0)\n", " df['category'] = os.path.basename(filename).replace(\".csv\", \"\")\n", " dfs.append(df)\n", @@ -368,6 +373,13 @@ " return list(data_df.apply(as_example, axis=1).values)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 6, @@ -425,9 +437,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 116 / 200 (58.0): 100%|██████████| 200/200 [00:00<00:00, 732.35it/s]\n", - "Average Metric: 99 / 200 (49.5): 100%|██████████| 200/200 [00:00<00:00, 899.25it/s]\n", - "Average Metric: 110 / 200 (55.0): 100%|██████████| 200/200 [00:00<00:00, 916.33it/s]\n" + "Average Metric: 116 / 200 (58.0): 100%|██████████| 200/200 [00:00<00:00, 907.05it/s]\n", + "Average Metric: 99 / 200 (49.5): 100%|██████████| 200/200 [00:00<00:00, 862.68it/s]\n", + "Average Metric: 110 / 200 (55.0): 100%|██████████| 200/200 [00:00<00:00, 647.97it/s]\n" ] } ], @@ -482,6 +494,8 @@ "name": "stdout", "output_type": "stream", "text": [ + "/Users/michaelryan/Documents/School/Stanford/Research/dspy_official/dspy/examples/nli/scone/MIPRO_notebook_cache/compiler\n", + "[Example({'context': 'The cowboy did not tell other people that he fall off a horse at the competition.', 'question': 'Can we logically conclude for sure that the cowboy did not tell other people that he fall off a bronco at the competition?', 'answer': 'No', 'category': 'one_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'The woman not crying is not wearing rings.', 'question': 'Can we logically conclude for sure that the woman not crying is not wearing jewelry?', 'answer': 'No', 'category': 'one_scoped_one_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'The people were not trying to keep their voices down so they woke a woman who is not indoors.', 'question': 'Can we logically conclude for sure that the people were not trying to keep their voices down so they woke a lady who is not indoors?', 'answer': 'No', 'category': 'two_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'It is a lie that the man is not listening to bluegrass.', 'question': 'Can we logically conclude for sure that it is a lie that the man is not listening to music?', 'answer': 'Yes', 'category': 'two_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'There is not a single person walking in the city.', 'question': 'Can we logically conclude for sure that there is not a single African walking in the city?', 'answer': 'Yes', 'category': 'one_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'The man does not dress up when listening to music.', 'question': 'Can we logically conclude for sure that the man does not dress up when listening to bluegrass?', 'answer': 'No', 'category': 'one_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'The man is not listening to opera.', 'question': 'Can we logically conclude for sure that the man is not listening to music?', 'answer': 'No', 'category': 'one_scoped'}) (input_keys={'context', 'question'}), Example({'context': \"There is a person walking in the city when it's not dark.\", 'question': \"Can we logically conclude for sure that there is a volunteer walking in the city when it's not dark?\", 'answer': 'No', 'category': 'one_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'The girl who is not here is not wearing any jewelry at all.', 'question': 'Can we logically conclude for sure that the girl who is not here is not wearing any rings at all?', 'answer': 'Yes', 'category': 'one_scoped_one_not_scoped'}) (input_keys={'context', 'question'}), Example({'context': 'the boy, not girl, will play an piccolo, but not for another week', 'question': 'Can we logically conclude for sure that the boy, not girl, will play an instrument, but not for another week?', 'answer': 'Yes', 'category': 'two_not_scoped'}) (input_keys={'context', 'question'})]\n", "\u001b[93m\u001b[1mWARNING: Projected Language Model (LM) Calls\u001b[0m\n", "\n", "Please be advised that based on the parameters you have set, the maximum number of LM calls is projected as follows:\n", @@ -533,17 +547,17 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 1/200 [00:00<00:00, 474.84it/s]\n", - " 2%|▏ | 3/200 [00:00<00:00, 613.74it/s]\n", - " 0%| | 1/200 [00:00<00:00, 707.18it/s]\n", - " 0%| | 1/200 [00:00<00:00, 309.31it/s]\n", - " 0%| | 1/200 [00:00<00:00, 579.72it/s]\n", - " 1%| | 2/200 [00:00<00:00, 816.73it/s]\n", - " 0%| | 1/200 [00:00<00:00, 602.37it/s]\n", + " 0%| | 1/200 [00:00<00:00, 650.99it/s]\n", + " 2%|▏ | 3/200 [00:00<00:00, 1005.35it/s]\n", " 0%| | 1/200 [00:00<00:00, 750.19it/s]\n", + " 0%| | 1/200 [00:00<00:00, 686.69it/s]\n", + " 0%| | 1/200 [00:00<00:00, 488.96it/s]\n", + " 1%| | 2/200 [00:00<00:00, 738.24it/s]\n", + " 0%| | 1/200 [00:00<00:00, 762.74it/s]\n", + " 0%| | 1/200 [00:00<00:00, 409.00it/s]\n", "/opt/miniconda3/envs/opt-prompt/lib/python3.11/site-packages/optuna/samplers/_tpe/sampler.py:295: ExperimentalWarning: ``multivariate`` option is an experimental feature. The interface can change in the future.\n", " warnings.warn(\n", - "[I 2024-06-20 22:06:46,767] A new study created in memory with name: no-name-1fe4456d-49ca-4355-9643-2e9660e5f63e\n" + "[I 2024-06-21 01:05:59,906] A new study created in memory with name: no-name-361c2f94-d354-463c-bbbe-c93d529a0192\n" ] }, { @@ -1267,7 +1281,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 13 / 25 (52.0): 100%|██████████| 25/25 [00:00<00:00, 1054.67it/s]\n" + "Average Metric: 13 / 25 (52.0): 100%|██████████| 25/25 [00:00<00:00, 654.31it/s]\n" ] }, { @@ -1281,7 +1295,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 834.69it/s]\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 1175.20it/s]" ] }, { @@ -1387,7 +1401,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 113 / 200 (56.5): 100%|██████████| 200/200 [00:00<00:00, 662.89it/s]\n" + "\n", + "Average Metric: 113 / 200 (56.5): 100%|██████████| 200/200 [00:00<00:00, 587.61it/s]\n" ] }, { @@ -1401,8 +1416,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 98 / 200 (49.0): 100%|██████████| 200/200 [00:00<00:00, 773.80it/s]\n", - "[I 2024-06-20 22:06:47,424] Trial 0 finished with value: 52.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 2}. Best is trial 0 with value: 52.0.\n" + "Average Metric: 98 / 200 (49.0): 100%|██████████| 200/200 [00:00<00:00, 660.76it/s]\n", + "[I 2024-06-21 01:06:00,667] Trial 0 finished with value: 52.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 2}. Best is trial 0 with value: 52.0.\n" ] }, { @@ -1422,7 +1437,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 11 / 25 (44.0): 100%|██████████| 25/25 [00:00<00:00, 1530.19it/s]\n" + "Average Metric: 11 / 25 (44.0): 100%|██████████| 25/25 [00:00<00:00, 712.57it/s]\n" ] }, { @@ -1436,8 +1451,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 441.13it/s]\n", - "[I 2024-06-20 22:06:47,470] Trial 1 finished with value: 44.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 2}. Best is trial 0 with value: 52.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 251.76it/s]\n", + "[I 2024-06-21 01:06:00,762] Trial 1 finished with value: 44.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 2}. Best is trial 0 with value: 52.0.\n" ] }, { @@ -1549,7 +1564,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 2381.12it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 500.19it/s]\n" ] }, { @@ -1563,8 +1578,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 590.66it/s]\n", - "[I 2024-06-20 22:06:47,520] Trial 2 finished with value: 56.0 and parameters: {'0_predictor_instruction': 8, '0_predictor_demos': 6}. Best is trial 2 with value: 56.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 76.44it/s]\n", + "[I 2024-06-21 01:06:00,877] Trial 2 finished with value: 56.0 and parameters: {'0_predictor_instruction': 8, '0_predictor_demos': 6}. Best is trial 2 with value: 56.0.\n" ] }, { @@ -1676,7 +1691,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 382.65it/s] \n" + "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1802.73it/s]\n" ] }, { @@ -1690,8 +1705,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 181.20it/s]\n", - "[I 2024-06-20 22:06:47,625] Trial 3 finished with value: 68.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 5}. Best is trial 3 with value: 68.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 71.35it/s]\n", + "[I 2024-06-21 01:06:01,019] Trial 3 finished with value: 68.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 5}. Best is trial 3 with value: 68.0.\n" ] }, { @@ -1803,7 +1818,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1328.35it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1270.00it/s]\n" ] }, { @@ -1817,8 +1832,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 568.87it/s]\n", - "[I 2024-06-20 22:06:47,675] Trial 4 finished with value: 56.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 8}. Best is trial 3 with value: 68.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 280.50it/s]\n", + "[I 2024-06-21 01:06:01,124] Trial 4 finished with value: 56.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 8}. Best is trial 3 with value: 68.0.\n" ] }, { @@ -1930,7 +1945,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1372.19it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 903.04it/s]\n" ] }, { @@ -1944,8 +1959,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 293.27it/s]\n", - "[I 2024-06-20 22:06:47,724] Trial 5 finished with value: 72.0 and parameters: {'0_predictor_instruction': 2, '0_predictor_demos': 3}. Best is trial 5 with value: 72.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 139.84it/s]\n", + "[I 2024-06-21 01:06:01,205] Trial 5 finished with value: 72.0 and parameters: {'0_predictor_instruction': 2, '0_predictor_demos': 3}. Best is trial 5 with value: 72.0.\n" ] }, { @@ -2057,7 +2072,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 778.21it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 938.26it/s]\n" ] }, { @@ -2071,8 +2086,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 587.44it/s]\n", - "[I 2024-06-20 22:06:47,784] Trial 6 finished with value: 56.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 5}. Best is trial 5 with value: 72.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 282.88it/s]\n", + "[I 2024-06-21 01:06:01,283] Trial 6 finished with value: 56.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 5}. Best is trial 5 with value: 72.0.\n" ] }, { @@ -2184,7 +2199,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1888.78it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1269.37it/s]\n" ] }, { @@ -2198,8 +2213,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 319.69it/s]\n", - "[I 2024-06-20 22:06:47,831] Trial 7 finished with value: 56.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 4}. Best is trial 5 with value: 72.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 326.99it/s]\n", + "[I 2024-06-21 01:06:01,363] Trial 7 finished with value: 56.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 4}. Best is trial 5 with value: 72.0.\n" ] }, { @@ -2311,7 +2326,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1093.36it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 907.74it/s]\n" ] }, { @@ -2325,8 +2340,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 405.17it/s]\n", - "[I 2024-06-20 22:06:47,882] Trial 8 finished with value: 72.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 7}. Best is trial 5 with value: 72.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 448.83it/s]\n", + "[I 2024-06-21 01:06:01,431] Trial 8 finished with value: 72.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 7}. Best is trial 5 with value: 72.0.\n" ] }, { @@ -2438,7 +2453,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 20 / 25 (80.0): 100%|██████████| 25/25 [00:00<00:00, 374.52it/s] \n" + "Average Metric: 20 / 25 (80.0): 100%|██████████| 25/25 [00:00<00:00, 846.48it/s] \n" ] }, { @@ -2452,8 +2467,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 257.40it/s]\n", - "[I 2024-06-20 22:06:47,997] Trial 9 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 116.36it/s]\n", + "[I 2024-06-21 01:06:01,509] Trial 9 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -2565,7 +2580,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 1396.18it/s]\n" + "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 202.29it/s]\n" ] }, { @@ -2579,7 +2594,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 459.95it/s]\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 315.15it/s]\n" ] }, { @@ -2685,7 +2700,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 139 / 200 (69.5): 100%|██████████| 200/200 [00:00<00:00, 853.68it/s]\n" + "Average Metric: 139 / 200 (69.5): 100%|██████████| 200/200 [00:00<00:00, 691.92it/s]\n" ] }, { @@ -2699,8 +2714,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 128 / 200 (64.0): 100%|██████████| 200/200 [00:00<00:00, 738.31it/s]\n", - "[I 2024-06-20 22:06:48,603] Trial 10 finished with value: 76.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 128 / 200 (64.0): 100%|██████████| 200/200 [00:00<00:00, 643.31it/s]\n", + "[I 2024-06-21 01:06:02,325] Trial 10 finished with value: 76.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -2720,7 +2735,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1535.25it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1546.18it/s]\n" ] }, { @@ -2734,8 +2749,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 725.16it/s]\n", - "[I 2024-06-20 22:06:48,642] Trial 11 finished with value: 72.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 516.29it/s]\n", + "[I 2024-06-21 01:06:02,377] Trial 11 finished with value: 72.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -2847,7 +2862,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 20 / 25 (80.0): 100%|██████████| 25/25 [00:00<00:00, 1685.22it/s]\n" + "Average Metric: 20 / 25 (80.0): 100%|██████████| 25/25 [00:00<00:00, 1216.49it/s]\n" ] }, { @@ -2861,8 +2876,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 521.68it/s]\n", - "[I 2024-06-20 22:06:48,684] Trial 12 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 739.61it/s]\n", + "[I 2024-06-21 01:06:02,442] Trial 12 finished with value: 80.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -2974,7 +2989,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 15 / 25 (60.0): 100%|██████████| 25/25 [00:00<00:00, 3042.88it/s]\n" + "Average Metric: 15 / 25 (60.0): 100%|██████████| 25/25 [00:00<00:00, 1431.84it/s]\n" ] }, { @@ -2988,8 +3003,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 1492.63it/s]\n", - "[I 2024-06-20 22:06:48,780] Trial 13 finished with value: 60.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 896.79it/s]\n", + "[I 2024-06-21 01:06:02,490] Trial 13 finished with value: 60.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3101,7 +3116,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1288.13it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 850.72it/s]\n" ] }, { @@ -3115,8 +3130,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 739.08it/s]\n", - "[I 2024-06-20 22:06:48,824] Trial 14 finished with value: 56.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 398.24it/s]\n", + "[I 2024-06-21 01:06:02,653] Trial 14 finished with value: 56.0 and parameters: {'0_predictor_instruction': 4, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3228,7 +3243,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 1826.18it/s]\n" + "Average Metric: 14 / 25 (56.0): 100%|██████████| 25/25 [00:00<00:00, 743.10it/s]\n" ] }, { @@ -3242,8 +3257,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 388.18it/s]\n", - "[I 2024-06-20 22:06:48,866] Trial 15 finished with value: 56.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 0}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 117.87it/s]\n", + "[I 2024-06-21 01:06:02,724] Trial 15 finished with value: 56.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 0}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3323,7 +3338,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1738.99it/s]\n" + "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 967.53it/s]\n" ] }, { @@ -3337,8 +3352,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 1104.93it/s]\n", - "[I 2024-06-20 22:06:48,912] Trial 16 finished with value: 68.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 608.22it/s]\n", + "[I 2024-06-21 01:06:02,793] Trial 16 finished with value: 68.0 and parameters: {'0_predictor_instruction': 7, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3450,7 +3465,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1189.51it/s]\n" + "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1159.33it/s]\n" ] }, { @@ -3464,8 +3479,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 444.03it/s]\n", - "[I 2024-06-20 22:06:48,962] Trial 17 finished with value: 68.0 and parameters: {'0_predictor_instruction': 8, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 528.52it/s]\n", + "[I 2024-06-21 01:06:02,842] Trial 17 finished with value: 68.0 and parameters: {'0_predictor_instruction': 8, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3577,7 +3592,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 385.39it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 924.62it/s]\n" ] }, { @@ -3591,8 +3606,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 205.17it/s]\n", - "[I 2024-06-20 22:06:49,060] Trial 18 finished with value: 72.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 1}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 220.07it/s]\n", + "[I 2024-06-21 01:06:02,919] Trial 18 finished with value: 72.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 1}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3692,7 +3707,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 2591.19it/s]\n" + "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 2253.45it/s]\n" ] }, { @@ -3706,8 +3721,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 648.57it/s]\n", - "[I 2024-06-20 22:06:49,106] Trial 19 finished with value: 68.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 8}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 285.21it/s]\n", + "[I 2024-06-21 01:06:03,017] Trial 19 finished with value: 68.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 8}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3819,7 +3834,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 2465.96it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1312.95it/s]\n" ] }, { @@ -3833,7 +3848,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 361.11it/s]\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 624.25it/s]\n" ] }, { @@ -3939,7 +3954,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 142 / 200 (71.0): 100%|██████████| 200/200 [00:00<00:00, 965.14it/s]\n" + "Average Metric: 142 / 200 (71.0): 100%|██████████| 200/200 [00:00<00:00, 725.50it/s]\n" ] }, { @@ -3953,8 +3968,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 130 / 200 (65.0): 100%|██████████| 200/200 [00:00<00:00, 1004.12it/s]\n", - "[I 2024-06-20 22:06:49,669] Trial 20 finished with value: 72.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 130 / 200 (65.0): 100%|██████████| 200/200 [00:00<00:00, 738.21it/s]\n", + "[I 2024-06-21 01:06:03,753] Trial 20 finished with value: 72.0 and parameters: {'0_predictor_instruction': 6, '0_predictor_demos': 7}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -3974,7 +3989,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 1511.22it/s]\n" + "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 677.20it/s] \n" ] }, { @@ -3988,8 +4003,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 500.69it/s]\n", - "[I 2024-06-20 22:06:49,733] Trial 21 finished with value: 76.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 525.14it/s]\n", + "[I 2024-06-21 01:06:03,818] Trial 21 finished with value: 76.0 and parameters: {'0_predictor_instruction': 5, '0_predictor_demos': 9}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -4101,7 +4116,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 1637.73it/s]\n" + "Average Metric: 19 / 25 (76.0): 100%|██████████| 25/25 [00:00<00:00, 887.00it/s]\n" ] }, { @@ -4115,8 +4130,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 673.03it/s]\n", - "[I 2024-06-20 22:06:49,824] Trial 22 finished with value: 76.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 4}. Best is trial 9 with value: 80.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 590.00it/s]\n", + "[I 2024-06-21 01:06:03,871] Trial 22 finished with value: 76.0 and parameters: {'0_predictor_instruction': 9, '0_predictor_demos': 4}. Best is trial 9 with value: 80.0.\n" ] }, { @@ -4228,7 +4243,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 22 / 25 (88.0): 100%|██████████| 25/25 [00:00<00:00, 1825.07it/s]\n" + "Average Metric: 22 / 25 (88.0): 100%|██████████| 25/25 [00:00<00:00, 1023.58it/s]\n" ] }, { @@ -4242,8 +4257,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 344.95it/s]\n", - "[I 2024-06-20 22:06:49,870] Trial 23 finished with value: 88.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 9}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 156.45it/s]\n", + "[I 2024-06-21 01:06:03,944] Trial 23 finished with value: 88.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 9}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4355,7 +4370,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1318.08it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1211.71it/s]\n" ] }, { @@ -4369,8 +4384,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 705.64it/s]\n", - "[I 2024-06-20 22:06:49,915] Trial 24 finished with value: 72.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 9}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 429.13it/s]\n", + "[I 2024-06-21 01:06:04,004] Trial 24 finished with value: 72.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 9}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4482,7 +4497,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1296.35it/s]\n" + "Average Metric: 18 / 25 (72.0): 100%|██████████| 25/25 [00:00<00:00, 1147.59it/s]\n" ] }, { @@ -4496,8 +4511,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 654.95it/s]\n", - "[I 2024-06-20 22:06:49,959] Trial 25 finished with value: 72.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 7}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 248.52it/s]\n", + "[I 2024-06-21 01:06:04,177] Trial 25 finished with value: 72.0 and parameters: {'0_predictor_instruction': 3, '0_predictor_demos': 7}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4609,7 +4624,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1512.42it/s]\n" + "Average Metric: 17 / 25 (68.0): 100%|██████████| 25/25 [00:00<00:00, 1022.33it/s]\n" ] }, { @@ -4623,8 +4638,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 567.56it/s]\n", - "[I 2024-06-20 22:06:50,004] Trial 26 finished with value: 68.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 3}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 216.73it/s]\n", + "[I 2024-06-21 01:06:04,245] Trial 26 finished with value: 68.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 3}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4736,7 +4751,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 9 / 25 (36.0): 100%|██████████| 25/25 [00:00<00:00, 2533.40it/s]\n" + "Average Metric: 9 / 25 (36.0): 100%|██████████| 25/25 [00:00<00:00, 1336.67it/s]\n" ] }, { @@ -4750,8 +4765,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 376.37it/s]\n", - "[I 2024-06-20 22:06:50,108] Trial 27 finished with value: 36.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 6}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 53.79it/s]\n", + "[I 2024-06-21 01:06:04,322] Trial 27 finished with value: 36.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 6}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4863,7 +4878,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 16 / 25 (64.0): 100%|██████████| 25/25 [00:00<00:00, 1399.84it/s]\n" + "Average Metric: 16 / 25 (64.0): 100%|██████████| 25/25 [00:00<00:00, 1755.88it/s]\n" ] }, { @@ -4877,8 +4892,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 259.53it/s]\n", - "[I 2024-06-20 22:06:50,159] Trial 28 finished with value: 64.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 0}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 0 / 1 (0.0): 100%|██████████| 1/1 [00:00<00:00, 164.77it/s]\n", + "[I 2024-06-21 01:06:04,385] Trial 28 finished with value: 64.0 and parameters: {'0_predictor_instruction': 1, '0_predictor_demos': 0}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -4958,7 +4973,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 11 / 25 (44.0): 100%|██████████| 25/25 [00:00<00:00, 1300.74it/s]\n" + "Average Metric: 11 / 25 (44.0): 100%|██████████| 25/25 [00:00<00:00, 1164.42it/s]\n" ] }, { @@ -4972,8 +4987,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 155.67it/s]\n", - "[I 2024-06-20 22:06:50,219] Trial 29 finished with value: 44.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 2}. Best is trial 23 with value: 88.0.\n" + "Average Metric: 1 / 1 (100.0): 100%|██████████| 1/1 [00:00<00:00, 499.80it/s]\n", + "[I 2024-06-21 01:06:04,442] Trial 29 finished with value: 44.0 and parameters: {'0_predictor_instruction': 0, '0_predictor_demos': 2}. Best is trial 23 with value: 88.0.\n" ] }, { @@ -5099,6 +5114,7 @@ " # Compile\n", " eval_kwargs = dict(num_threads=16, display_progress=True, display_table=0)\n", " teleprompter = MIPROv2(prompt_model=prompt_model, task_model=task_model, metric=metric, num_candidates=N, init_temperature=temperature, verbose=True)\n", + " print(trainset[:10])\n", " compiled_program = teleprompter.compile(program, trainset=trainset, valset=valset, num_batches=batches, max_bootstrapped_demos=1,max_labeled_demos=2, eval_kwargs=eval_kwargs)" ] }, @@ -5123,9 +5139,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 142 / 200 (71.0): 100%|██████████| 200/200 [00:00<00:00, 839.20it/s]\n", - "Average Metric: 130 / 200 (65.0): 100%|██████████| 200/200 [00:00<00:00, 720.08it/s]\n", - "Average Metric: 141 / 200 (70.5): 100%|██████████| 200/200 [00:00<00:00, 900.15it/s]\n" + "Average Metric: 142 / 200 (71.0): 100%|██████████| 200/200 [00:00<00:00, 792.69it/s]\n", + "Average Metric: 130 / 200 (65.0): 100%|██████████| 200/200 [00:00<00:00, 665.92it/s]\n", + "Average Metric: 141 / 200 (70.5): 100%|██████████| 200/200 [00:00<00:00, 818.67it/s]\n" ] } ], diff --git a/examples/qa/hotpot/hotpotqa_with_MIPRO.ipynb b/examples/qa/hotpot/hotpotqa_with_MIPRO.ipynb index fa6c32a155..883a5d8124 100644 --- a/examples/qa/hotpot/hotpotqa_with_MIPRO.ipynb +++ b/examples/qa/hotpot/hotpotqa_with_MIPRO.ipynb @@ -108,7 +108,7 @@ "if not \"dspy-ai\" in {pkg.key for pkg in pkg_resources.working_set}:\n", " !pip install -U pip\n", " !pip install dspy-ai\n", - " !pip install openai~=0.28.1\n", + " !pip install openai~=1.12\n", " !pip install -e $repo_path\n", " !pip install --upgrade cloudpickle==3.0.0\n", "\n", @@ -403,8 +403,12 @@ "name": "stderr", "output_type": "stream", "text": [ - "Average Metric: 177 / 500 (35.4): 100%|██████████| 500/500 [00:04<00:00, 108.21it/s]\n", - "Average Metric: 191 / 500 (38.2): 100%|██████████| 500/500 [00:04<00:00, 105.17it/s]\n" + "/opt/miniconda3/envs/opt-prompt/lib/python3.11/site-packages/joblib/memory.py:655: JobLibCollisionWarning: Possible name collisions between functions 'send_hfvllm_request_v01_wrapped' (/Users/michaelryan/Documents/School/Stanford/Research/dspy_official/dspy/dsp/modules/hf_client.py:-1) and 'send_hfvllm_request_v01_wrapped' (/Users/michaelryan/Documents/School/Stanford/Research/dspy_official/dspy/dsp/modules/hf_client.py:248)\n", + " return self._cached_call(args, kwargs)[0]\n", + " 0%| | 0/500 [00:00