{"payload":{"header_redesign_enabled":false,"results":[{"id":"496950762","archived":false,"color":"#3572A5","followers":109,"has_funding_file":false,"hl_name":"shjwudp/c4-dataset-script","hl_trunc_description":"Inspired by google c4, here is a series of colossal clean data cleaning scripts focused on CommonCrawl data processing. Including Chineseā€¦","language":"Python","mirror":false,"owned_by_organization":false,"public":true,"repo":{"repository":{"id":496950762,"name":"c4-dataset-script","owner_id":11439912,"owner_login":"shjwudp","updated_at":"2023-06-07T14:13:15.179Z","has_issues":true}},"sponsorable":false,"topics":["python","nlp","spark","dataset","commoncrawl","massivetext"],"type":"Public","help_wanted_issues_count":0,"good_first_issue_issues_count":0,"starred_by_current_user":false}],"type":"repositories","page":1,"page_count":1,"elapsed_millis":96,"errors":[],"result_count":1,"facets":[],"protected_org_logins":[],"topics":null,"query_id":"","logged_in":false,"sign_up_path":"/signup?source=code_search_results","sign_in_path":"/login?return_to=https%3A%2F%2Fgithub.com%2Fsearch%3Fq%3Drepo%253Ashjwudp%252Fc4-dataset-script%2B%2Blanguage%253APython","metadata":null,"csrf_tokens":{"/shjwudp/c4-dataset-script/star":{"post":"dk2tVOb2_8MwkcBW0NK918LcrygqURWdrEP1RG6izeYSfnxg3MxtF0x80F4pqgsvOR1KgYeuuVuS3qFakkHIKg"},"/shjwudp/c4-dataset-script/unstar":{"post":"cXCBtS-G4Xa850B8Ye7f9a5pbFBxQr13fjU4ndCcAZGXWUj66TOyPR8MRla62t4_LGb7L2hM2Sl-eiP6c8gEEA"},"/sponsors/batch_deferred_sponsor_buttons":{"post":"W1LPBQVqmPeVJL_thPFU5pkaLk8VW9A1RLI1MHLf7nVNnp_lT-bcCcKXS4RMaGzX9TKuPUNClmvFATCTPoVvzg"}}},"title":"Repository search results"}