{"payload":{"header_redesign_enabled":false,"results":[{"id":"762146531","archived":false,"color":"#3572A5","followers":1,"has_funding_file":false,"hl_name":"li-plus/nanoRLHF","hl_trunc_description":"Train a tiny LLaMA model from scratch to repeat your words using Reinforcement Learning from Human Feedback (RLHF)","language":"Python","mirror":false,"owned_by_organization":false,"public":true,"repo":{"repository":{"id":762146531,"name":"nanoRLHF","owner_id":39846316,"owner_login":"li-plus","updated_at":"2024-05-23T07:30:55.431Z","has_issues":true}},"sponsorable":false,"topics":["reinforcement-learning","deep-reinforcement-learning","llama","ppo","llm","rlhf"],"type":"Public","help_wanted_issues_count":0,"good_first_issue_issues_count":0,"starred_by_current_user":false}],"type":"repositories","page":1,"page_count":1,"elapsed_millis":93,"errors":[],"result_count":1,"facets":[],"protected_org_logins":[],"topics":null,"query_id":"","logged_in":false,"sign_up_path":"/signup?source=code_search_results","sign_in_path":"/login?return_to=https%3A%2F%2Fgithub.com%2Fsearch%3Fq%3Drepo%253Ali-plus%252FnanoRLHF%2B%2Blanguage%253APython","metadata":null,"csrf_tokens":{"/li-plus/nanoRLHF/star":{"post":"W0A1FycTRg1uFW_tEc99S-CjXukdzcA7baB_HUUufhL816LFJxktkKLo5CuZ7O9n2FguLr1DxaKkKCWAUyFSTA"},"/li-plus/nanoRLHF/unstar":{"post":"ulVVrsawsP-9Cbjz27OUaJnZCZy90exvrFp56_Fyuc839hcjjTHEmB-y6nzhsT7qz5SR3nhkuyizSygmY1totA"},"/sponsors/batch_deferred_sponsor_buttons":{"post":"kHGJtdb1R3tz9sv61aQcu3J3-RPKF-snjqUdIYD3Tks8diLl9V7LHniWCrNVFkEXRUoz2Hcg-dfLG5dDZbDZJA"}}},"title":"Repository search results"}