{"payload":{"header_redesign_enabled":false,"results":[{"id":"730474014","archived":false,"color":"#3572A5","followers":23,"has_funding_file":false,"hl_name":"liziniu/policy_optimization","hl_trunc_description":"Code for Paper (Policy Optimization in RLHF: The Impact of Out-of-preference Data)","language":"Python","mirror":false,"owned_by_organization":false,"public":true,"repo":{"repository":{"id":730474014,"name":"policy_optimization","owner_id":33521885,"owner_login":"liziniu","updated_at":"2023-12-19T05:01:01.500Z","has_issues":true}},"sponsorable":false,"topics":["bandit","stochastic-approximation","policy-optimization","large-language-models","rlhf"],"type":"Public","help_wanted_issues_count":0,"good_first_issue_issues_count":0,"starred_by_current_user":false}],"type":"repositories","page":1,"page_count":1,"elapsed_millis":66,"errors":[],"result_count":1,"facets":[],"protected_org_logins":[],"topics":null,"query_id":"","logged_in":false,"sign_up_path":"/signup?source=code_search_results","sign_in_path":"/login?return_to=https%3A%2F%2Fgithub.com%2Fsearch%3Fq%3Drepo%253Aliziniu%252Fpolicy_optimization%2B%2Blanguage%253APython","metadata":null,"csrf_tokens":{"/liziniu/policy_optimization/star":{"post":"2flIu6sE3Z0fzju2-_AGjlAb9h720s2R1lwz53xBSGPhLHu93Ht_2yOFc7sSSIB0cKqZjdIfAQnBkZQLBQ5CHA"},"/liziniu/policy_optimization/unstar":{"post":"aFkom6veYTc4PNwmFm2gcNEDx542b_g3tbrgUvvKHFuybdnLEATqT8oSzKez0BkMO6BjfNWMIzsoq0hmbLlOwQ"},"/sponsors/batch_deferred_sponsor_buttons":{"post":"JU_qr511gBtXUx6k3QSI3VgcX8V8lGdkbxXXTJ3F8sRoTy47ZjkPZK7NVcdpG571IvUf4K-Uh_LFzFJz1YAFaQ"}}},"title":"Repository search results"}