{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":645643254,"defaultBranch":"master","name":"lm-evaluation-harness","ownerLogin":"polm-stability","currentUserCanPush":false,"isFork":true,"isEmpty":false,"createdAt":"2023-05-26T05:46:12.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/132827365?v=4","public":true,"private":false,"isOrgOwned":false},"refInfo":{"name":"","listCacheKey":"v0:1702889976.0","currentOid":""},"activityList":{"items":[{"before":"7b76d7dafe130edca64c20d006b98ce0f62b807d","after":"17e5b3221c3afc329f9e904d60eac8140feedf15","ref":"refs/heads/ja-tasks","pushedAt":"2024-01-09T04:49:42.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Fix marc-ja-fintan","shortMessageHtmlLink":"Fix marc-ja-fintan"}},{"before":"981eca226dcbedf054a9b1f0d895f5648f3a6a83","after":"7b76d7dafe130edca64c20d006b98ce0f62b807d","ref":"refs/heads/ja-tasks","pushedAt":"2024-01-04T05:54:07.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add various minor fixes\n\n- add fewshot counts for tasks with configs\n- fix some comments\n- add other comments explaining some settings","shortMessageHtmlLink":"Add various minor fixes"}},{"before":"8b136b05120f689280908a2981e727fd5e3a70f4","after":"2ab54ff94375f799aa25ef515a8568ce2926a402","ref":"refs/heads/feature/task-errors","pushedAt":"2023-12-25T12:17:40.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Remove intentionally failing task","shortMessageHtmlLink":"Remove intentionally failing task"}},{"before":"936a434e1e934192805b77d15e70626904516515","after":"8b136b05120f689280908a2981e727fd5e3a70f4","ref":"refs/heads/feature/task-errors","pushedAt":"2023-12-25T12:15:05.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Change how task errors are logged\n\nThis makes the proposed changes from PR discussion.\n\n1. Exceptions not related to missing modules/imports are logged as\n warnings.\n\n2. module/import related exceptions are still logged at debug level, but\n if any of them happen there is a warning about it with instructions\n on how to show logs.","shortMessageHtmlLink":"Change how task errors are logged"}},{"before":null,"after":"936a434e1e934192805b77d15e70626904516515","ref":"refs/heads/feature/task-errors","pushedAt":"2023-12-18T08:59:36.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Do not ignore errors when loading tasks","shortMessageHtmlLink":"Do not ignore errors when loading tasks"}},{"before":null,"after":"9a7c650f7b3d1ed134ad6d921a6b01368d0bfa6b","ref":"refs/heads/docs/add-choice-metric","pushedAt":"2023-12-18T08:10:15.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add docs on adding a multiple choice metric","shortMessageHtmlLink":"Add docs on adding a multiple choice metric"}},{"before":"24885af1495930ce9ae463ca240c73fc8ae8a3c1","after":"981eca226dcbedf054a9b1f0d895f5648f3a6a83","ref":"refs/heads/ja-tasks","pushedAt":"2023-12-14T06:42:29.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add more alpaca tasks","shortMessageHtmlLink":"Add more alpaca tasks"}},{"before":null,"after":"24885af1495930ce9ae463ca240c73fc8ae8a3c1","ref":"refs/heads/ja-tasks","pushedAt":"2023-12-13T08:21:19.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add set_tokenizer in jaquad","shortMessageHtmlLink":"Add set_tokenizer in jaquad"}},{"before":null,"after":"46041649b242cb8e440f04faa17545ee744f3eba","ref":"refs/heads/feature/registry-decorator","pushedAt":"2023-11-21T04:48:21.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Leave legacy registry code alone\n\nThis restores the manual creation of the registry for non-Japanese\ntasks. While it's possible to register them automatically, the mapping\nof the module or class name to the task name varies considerably, so it\nwould require a lot of special casing to get it right.","shortMessageHtmlLink":"Leave legacy registry code alone"}},{"before":"7b0b3eee1e5e14ac8aa446b5b139a34c81e35297","after":"a96bcca0bb15e4301d4be541a0647304cf82965f","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-13T07:53:32.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Handle complicated prompts\n\nThis introduces a style for handling complex prompts and specifically\nhandles the case of JSLM Beta. This is handled by using a function that\ntakes the name of task as input. This allows for full customization\nwithout requiring details specification when actually running an eval\nsuite.\n\nThe style is simple - instead of mapping to a numeric version like 0.2,\na shortname for a prompt can map to a callable that takes the task name.\nThis allows for any kind of custom logic.\n\nThis may not be the simplest or best approach, but it required few\nchanges, keeps everything in one place, and touches nothing else in the\ncode base, so it should be easy to change later if necessary.","shortMessageHtmlLink":"Handle complicated prompts"}},{"before":"a233a8ca4c68adcfa0e064e39222b22c32727c11","after":"7b0b3eee1e5e14ac8aa446b5b139a34c81e35297","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-13T03:19:01.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Fix link","shortMessageHtmlLink":"Fix link"}},{"before":"760b1d4e05d181ae25432bb85cc7edc117bca0e5","after":"a233a8ca4c68adcfa0e064e39222b22c32727c11","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-10T07:53:03.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Update README, prompt docs","shortMessageHtmlLink":"Update README, prompt docs"}},{"before":null,"after":"ac901f2bb6d2a75cc33f2b6871be076e548e94d0","ref":"refs/heads/fix/ja-llama2-prompts","pushedAt":"2023-11-09T08:16:46.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Change default Llama2 prompt to Japananese\n\nCurrently the default Llama2 prompt is the default English one. However,\nsince all the tasks are in Japanese, that doesn't make a lot of sense,\nand can cause the model to answer in English.\n\nThis makes the default prompt Japanese and leaves the English prompt\ncommented out for reference.","shortMessageHtmlLink":"Change default Llama2 prompt to Japananese"}},{"before":"d194a908dac5cf9b0bc62385b31bfdffa9119e7c","after":"760b1d4e05d181ae25432bb85cc7edc117bca0e5","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-09T08:16:20.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Rearrange files\n\nThis moves suite config handling code into the library proper instead of\nthe script, and creates a subdir for suite configs.","shortMessageHtmlLink":"Rearrange files"}},{"before":"7cbe6cf87c8fd2e3f973a422bd8f4aaebefb6282","after":"d194a908dac5cf9b0bc62385b31bfdffa9119e7c","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-08T09:04:59.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Move prompt codes to main lib","shortMessageHtmlLink":"Move prompt codes to main lib"}},{"before":"c6b7bcbeba24633878b2b164d26d4ed50550e2e0","after":"7cbe6cf87c8fd2e3f973a422bd8f4aaebefb6282","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-06T10:56:47.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Lint fix","shortMessageHtmlLink":"Lint fix"}},{"before":null,"after":"c6b7bcbeba24633878b2b164d26d4ed50550e2e0","ref":"refs/heads/feature/suite-runs","pushedAt":"2023-11-06T10:44:59.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"First steps to running suites\n\nThis is a barely functional wrapper for running \"test suites\", which are\njust a list of preconfigured tasks. You can specify prompt and model.\nThis needs more testing and UI cleanup.","shortMessageHtmlLink":"First steps to running suites"}},{"before":"7ddeeee9e2f8f420e63e01d998c23d36a2de06bf","after":"34e2a286d8412eb7648552ffde1f745c56c119e7","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-11-02T06:08:46.000Z","pushType":"push","commitsCount":8,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Merge branch 'jp-stable' into feature/argparse-refactor","shortMessageHtmlLink":"Merge branch 'jp-stable' into feature/argparse-refactor"}},{"before":"25a906168ceb962ae4d7b04b50c96d095badd73b","after":"7ddeeee9e2f8f420e63e01d998c23d36a2de06bf","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-25T08:38:27.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add README section describing cluster usage","shortMessageHtmlLink":"Add README section describing cluster usage"}},{"before":"8324a7e043fbc7dc5e7499ca0c4cbdab92344687","after":"25a906168ceb962ae4d7b04b50c96d095badd73b","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-24T06:17:42.000Z","pushType":"push","commitsCount":7,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Merge branch 'jp-stable' into feature/argparse-refactor","shortMessageHtmlLink":"Merge branch 'jp-stable' into feature/argparse-refactor"}},{"before":"49118739fe311438bf4ae6c4bdd60d8e4ae5cb4b","after":"8324a7e043fbc7dc5e7499ca0c4cbdab92344687","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-11T08:32:31.000Z","pushType":"push","commitsCount":3,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Cleanup / linter fixes\n\nThere were some things related to the old shell script usage that\nweren't working, this should fix it.","shortMessageHtmlLink":"Cleanup / linter fixes"}},{"before":"24123d48216a9835dcc8d64a2efba33f2ca53614","after":"49118739fe311438bf4ae6c4bdd60d8e4ae5cb4b","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-11T07:55:50.000Z","pushType":"push","commitsCount":16,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Merge branch 'jp-stable' into feature/argparse-refactor","shortMessageHtmlLink":"Merge branch 'jp-stable' into feature/argparse-refactor"}},{"before":"cfb1db7744444661fb6ed305306c873726df5b5a","after":"24123d48216a9835dcc8d64a2efba33f2ca53614","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-11T07:55:04.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Linter fixes","shortMessageHtmlLink":"Linter fixes"}},{"before":"42491aefe32de8503562ff469100efac616dd289","after":"cfb1db7744444661fb6ed305306c873726df5b5a","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-11T07:54:30.000Z","pushType":"push","commitsCount":14,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add JCoLA task (#93)\n\n* WIP: need JCoLA\r\n\r\n* Update harness.jcola.sh\r\n\r\n* update prompt\r\n\r\n* update prompt\r\n\r\n* update prompt\r\n\r\n* update prompt\r\n\r\n* Revert \"update prompt\"\r\n\r\nThis reverts commit cd9a9148e849d22a174a8e15192989d1d4a1156d.\r\n\r\n* WIP: evaluate on JCoLA\r\n\r\n* Add new metrics to cola\r\n\r\nThis modifies cola, since jcola just inherits this part. It's not a\r\nproblem to modify the parent task because it just adds some output.\r\n\r\n* Linter edits\r\n\r\n* evaluate on JCoLA\r\n\r\n* need JCoLAWithLlama2\r\n\r\n* JCoLA's prompt version should be 0.0\r\n\r\nhttps://github.com/Stability-AI/lm-evaluation-harness/blob/jp-stable/docs/prompt_templates.md\r\n\r\n* documentation\r\n\r\njptasks.md and prompt_templates.md\r\n\r\n* won't need harness and result for JCoLA\r\n\r\n* fix linter related issue\r\n\r\n* Delete harness.jcola.sh\r\n\r\n---------\r\n\r\nCo-authored-by: Paul O'Leary McCann \r\nCo-authored-by: mkshing <33302880+mkshing@users.noreply.github.com>","shortMessageHtmlLink":"Add JCoLA task (Stability-AI#93)"}},{"before":"0dee2e6d8af24bc2f92dce27f08d99ab4c074eca","after":"e8b2ac75a213ee068afa8e28ffbee39fc7246a82","ref":"refs/heads/feature/multiple-choice-generations","pushedAt":"2023-10-11T07:39:44.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Linter fixes","shortMessageHtmlLink":"Linter fixes"}},{"before":"5f2f03ecdd87f732ab2080cd137eee3a5cba38a4","after":"0dee2e6d8af24bc2f92dce27f08d99ab4c074eca","ref":"refs/heads/feature/multiple-choice-generations","pushedAt":"2023-10-11T07:30:00.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Log multiple choice scores in details\n\nThis logs the individual log likelihood scores for basic debugging of\nmultiple choice questions. It's more useful for balanced multiple\nchoice, but it's easy to collect for general questions so I added it\nthere.","shortMessageHtmlLink":"Log multiple choice scores in details"}},{"before":"44a2b9ed2cd98b52e4228006f8a0342925622f40","after":"42491aefe32de8503562ff469100efac616dd289","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-10T06:35:01.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Read webook url from env var","shortMessageHtmlLink":"Read webook url from env var"}},{"before":"b78efa1b979037365b4af59e4432b0becd1de581","after":"44a2b9ed2cd98b52e4228006f8a0342925622f40","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-10T06:08:53.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Fix typo","shortMessageHtmlLink":"Fix typo"}},{"before":"a70d95f27a91a89d735674e29c18412afbfc132e","after":"b78efa1b979037365b4af59e4432b0becd1de581","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-10T06:07:12.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"Add versions to tasks","shortMessageHtmlLink":"Add versions to tasks"}},{"before":"5a93648f39070ddffffe51f02ada90b3fc90499c","after":"a70d95f27a91a89d735674e29c18412afbfc132e","ref":"refs/heads/feature/argparse-refactor","pushedAt":"2023-10-10T06:02:37.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"polm-stability","name":"Paul McCann","path":"/polm-stability","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/132827365?s=80&v=4"},"commit":{"message":"task cleanup","shortMessageHtmlLink":"task cleanup"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAD2upN5wA","startCursor":null,"endCursor":null}},"title":"Activity ยท polm-stability/lm-evaluation-harness"}