{"payload":{"pageCount":1,"repositories":[{"type":"Public","name":"Flash-Attention-Softmax-N","owner":"softmax1","isFork":false,"description":"CUDA and Triton implementations of Flash Attention with SoftmaxN.","allTopics":["transformers","artificial-intelligence","attention-mechanism","deep-learning","pytorch"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":1,"issueCount":1,"starsCount":66,"forksCount":5,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-05-26T22:58:38.002Z"}},{"type":"Public","name":"llama2.c-tinystories","owner":"softmax1","isFork":true,"description":"Inference Llama 2 in one file of pure C","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1943,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-12-20T06:08:19.869Z"}},{"type":"Public","name":"MosaicBERT-Softmax1","owner":"softmax1","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-23T19:41:13.440Z"}},{"type":"Public","name":"EsperBERTo","owner":"softmax1","isFork":false,"description":"A test of the Attention Is Off By One hypothesis","allTopics":["ai","llm","llm-training"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-09-16T13:30:41.885Z"}},{"type":"Public","name":"nanoGPT_softmax1","owner":"softmax1","isFork":false,"description":"An experiment using nanoGPT vs nanoGPT (softmax1) to see how it affects perplexity score","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-19T10:11:40.042Z"}},{"type":"Public","name":"nanoGPT_softmax1_reddit","owner":"softmax1","isFork":true,"description":"The simplest, fastest repository for training/finetuning medium-sized GPTs.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":5272,"license":"MIT License","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-19T10:09:37.522Z"}},{"type":"Public","name":"quietGPT","owner":"softmax1","isFork":false,"description":"A scaled down empirical study of \"Attention is Off by One\" on nanoGPT","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-08-09T09:39:48.136Z"}}],"repositoryCount":7,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}