{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"warc2text-runner","owner":"hplt-project","isFork":false,"description":"Scripts for parallelized extraction of plain texts from WARC archieves. Aiming at common and reproducible extraction approach.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":1,"issueCount":4,"starsCount":3,"forksCount":0,"license":null,"participation":[1,0,1,0,2,0,1,0,0,0,4,0,2,6,0,0,0,3,2,0,1,3,0,2,0,0,3,6,2,0,0,0,0,0,0,3,2,1,12,0,0,9,8,6,3,4,1,0,4,3,12,32],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-16T17:16:22.280Z"}},{"type":"Public","name":"HPLT-WP4","owner":"hplt-project","isFork":false,"description":"Information and pipelines on WP4: language models training","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":"Creative Commons Zero v1.0 Universal","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-22T21:06:58.000Z"}},{"type":"Public","name":"data-analytics-tool","owner":"hplt-project","isFork":false,"description":"Data Analytics Tool","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":4,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-19T10:45:21.204Z"}},{"type":"Public","name":"monotextor-slurm","owner":"hplt-project","isFork":false,"description":"Set of scripts to run monotextor-like pipeline under slurm HPCs ","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Rust","color":"#dea584"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":"GNU General Public License v3.0","participation":[0,4,0,16,5,0,2,1,0,3,0,1,0,0,1,0,0,0,0,0,1,0,0,2,1,0,17,6,0,0,1,0,0,0,1,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-20T12:44:44.938Z"}},{"type":"Public","name":"document-aligner","owner":"hplt-project","isFork":false,"description":"tf/idf-based document aligner from Bitextor","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":1,"issueCount":0,"starsCount":0,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-19T09:27:19.873Z"}},{"type":"Public","name":"OPUS-MT-dashboard","owner":"hplt-project","isFork":true,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"PHP","color":"#4F5D95"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-09T21:50:01.907Z"}},{"type":"Public","name":"HPLT-MT-Models","owner":"hplt-project","isFork":false,"description":"This contains the configuration and scripts for HPLT MT model releases.","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":4,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,0,3,6,26,4,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-06T23:38:00.414Z"}},{"type":"Public","name":"monolingual-multilingual-instruction-tuning","owner":"hplt-project","isFork":false,"description":"Monolingual or Multilingual Instruction Tuning: Which Makes a Better Alpaca","topicNames":["large-language-models","instruction-tuning","instruction-data"],"topicsNotShown":0,"allTopics":["large-language-models","instruction-tuning","instruction-data"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":0,"license":null,"participation":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,5,0,0,0,0,0,0,0,0,0,0],"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-03-06T11:50:13.419Z"}},{"type":"Public","name":"OpusCleaner","owner":"hplt-project","isFork":false,"description":"OpusCleaner is a web interface that helps you select, clean and schedule your data for training machine translation models.","topicNames":["machine-translation","data-cleaning"],"topicsNotShown":0,"allTopics":["machine-translation","data-cleaning"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":55,"starsCount":39,"forksCount":12,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-19T14:28:07.263Z"}},{"type":"Public","name":"OpusTrainer","owner":"hplt-project","isFork":false,"description":"Curriculum training","topicNames":["machine-translation","training-pipeline"],"topicsNotShown":0,"allTopics":["machine-translation","training-pipeline"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":17,"starsCount":15,"forksCount":4,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-08T15:31:13.050Z"}},{"type":"Public","name":"OpusFilter","owner":"hplt-project","isFork":true,"description":"OpusFilter - Parallel corpus processing toolkit","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":18,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-01-03T10:44:47.950Z"}},{"type":"Public","name":"clianer","owner":"hplt-project","isFork":true,"description":"A lightweight command-line frontend to OpusCleaner","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-11-27T15:05:43.288Z"}},{"type":"Public","name":"sacremoses","owner":"hplt-project","isFork":false,"description":"Python port of Moses tokenizer, truecaser and normalizer","topicNames":["nlp","tokenizer","machine-translation"],"topicsNotShown":0,"allTopics":["nlp","tokenizer","machine-translation"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":4,"issueCount":24,"starsCount":480,"forksCount":59,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-30T16:23:53.064Z"}},{"type":"Public","name":"paracrawl-dashboard","owner":"hplt-project","isFork":false,"description":"Make-shift interface for managing Paracrawl processing and exploring its outputs","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-10-10T21:10:51.357Z"}},{"type":"Public","name":"bitextor-mt-models","owner":"hplt-project","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Shell","color":"#89e051"},"pullRequestCount":0,"issueCount":3,"starsCount":1,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-27T10:39:26.493Z"}},{"type":"Public","name":"ia-download","owner":"hplt-project","isFork":false,"description":"Internet archive downloader","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":1,"starsCount":2,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-15T15:42:30.836Z"}},{"type":"Public","name":"MT-winterschool-2023","owner":"hplt-project","isFork":false,"description":"","topicNames":[],"topicsNotShown":0,"allTopics":[],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-02-07T14:23:55.897Z"}}],"repositoryCount":17,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}