Llm.cs - C# port of @karpathy llm.c

⭐ Please star this project if you like it. ⭐

Getting Started

Install .NET SDK matching version in global.json
Restore and build the project (working directory where Llm.sln):
```
dotnet restore
dotnet build -c Release
```
Run (currently defaults to running train "test"/verification):
```
dotnet run -c Release --project .\src\Llm\Llm.csproj
```
NOTE: First time this is run it will download binary files e.g. weights and input tokens from nietras/llm.bin on Hugging Face. This means there is no need to run any Python here to get data or similar. Clone and run ✅

Output should then be something like:

[GPT-2]
max_seq_len: 1024
vocab_size: 50257
num_layers: 12
num_heads: 12
channels: 768
num_parameters: 124439808
[State]
batch_size: 4
seq_len: 64
num_activations: 73323776
Logits           TENSOR OK
dwte             TENSOR OK
dwpe             TENSOR OK
dln1w            TENSOR OK
dln1b            TENSOR OK
dqkvw            TENSOR OK
dqkvb            TENSOR OK
dattprojw        TENSOR OK
dattprojb        TENSOR OK
dln2w            TENSOR OK
dln2b            TENSOR OK
dfcw             TENSOR OK
dfcb             TENSOR OK
dfcprojw         TENSOR OK
dfcprojb         TENSOR OK
dlnfw            TENSOR OK
dlnfb            TENSOR OK
step 0: loss 5.269890 expected loss 5.270007 OK   (took 4219 ms)
step 1: loss 4.059388 expected loss 4.059707 OK   (took 4099 ms)
step 2: loss 3.374212 expected loss 3.375123 OK   (took 4050 ms)
step 3: loss 2.800128 expected loss 2.800783 OK   (took 4073 ms)
step 4: loss 2.315312 expected loss 2.315382 OK   (took 4089 ms)
step 5: loss 1.849347 expected loss 1.849029 OK   (took 4052 ms)
step 6: loss 1.395217 expected loss 1.394656 OK   (took 4071 ms)
step 7: loss 0.998616 expected loss 0.999147 OK   (took 4057 ms)
step 8: loss 0.625540 expected loss 0.624080 OK   (took 4073 ms)
step 9: loss 0.378012 expected loss 0.376511 OK   (took 4059 ms)
overall okay: True

Example

Public API Reference

[assembly: System.CLSCompliant(false)]
[assembly: System.Reflection.AssemblyMetadata("IsTrimmable", "True")]
[assembly: System.Reflection.AssemblyMetadata("RepositoryUrl", "https://github.com/nietras/Llm/")]
[assembly: System.Resources.NeutralResourcesLanguage("en")]
[assembly: System.Runtime.CompilerServices.InternalsVisibleTo("Llm.Benchmarks")]
[assembly: System.Runtime.CompilerServices.InternalsVisibleTo("Llm.Test")]
[assembly: System.Runtime.Versioning.TargetFramework(".NETCoreApp,Version=v8.0", FrameworkDisplayName=".NET 8.0")]
namespace nietras.LargeLanguageModel
{
    public interface ILlm
    {
        unsafe void AttentionBackward(float* δoutput, float* postAttention, float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* δpreAttention, float* δpostAttention, float* δinput);
        unsafe void AttentionForward(float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* preAttention, float* postAttention, float* output);
        unsafe void CrossEntropyForward(float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* losses);
        unsafe void CrossEntropySoftmaxBackward(float* δlosses, float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* δlogits);
        unsafe void EmbedBackward(float* δoutput, int* tokenIndices, int batchSize, int tokenCount, int channelCount, float* δtokenEmbeddings, float* δpositionEmbeddings);
        unsafe void EmbedForward(int* tokenIndices, float* tokenEmbeddings, float* positionEmbeddings, int batchSize, int tokenCount, int channelCount, float* output);
        unsafe void GeLUBackward(float* δoutput, float* input, int count, float* δinput);
        unsafe void GeLUForward(float* input, int count, float* output);
        unsafe void LayerNormBackward(float* δoutput, float* input, float* weight, float* mean, float* invStdDev, int batchSize, int tokenCount, int channelCount, float* δweight, float* δbias, float* δinput);
        unsafe void LayerNormForward(float* input, float* weight, float* bias, int batchSize, int tokenCount, int channelCount, float* mean, float* invStdDev, float* output);
        unsafe void MatMulBackward(float* δoutput, float* input, float* weight, int batchSize, int tokenCount, int inputChannelCount, int outputChannelCount, float* δweight, float* δbias, float* δinput);
        unsafe void MatMulForward(float* input, float* weight, float* bias, int batchSize, int tokenCount, int inputChannelCount, int outputChannelCount, float* output);
        unsafe void ResidualBackward(float* δoutput, int count, float* δleft, float* δright);
        unsafe void ResidualForward(float* left, float* right, int count, float* output);
        unsafe void SoftmaxForward(float* logits, int batchSize, int tokenCount, int vocabularySize, float* probabilities);
    }
    public class Llm : nietras.LargeLanguageModel.ILlm
    {
        public static unsafe void AttentionBackward(float* δoutput, float* postAttention, float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* δpreAttention, float* δpostAttention, float* δinput) { }
        public static unsafe void AttentionForward(float* input, int batchSize, int tokenCount, int channelCount, int headCount, float* preAttention, float* postAttention, float* output) { }
        public static unsafe void CrossEntropyForward(float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* losses) { }
        public static unsafe void CrossEntropySoftmaxBackward(float* δlosses, float* probabilities, int* targetTokenIndices, int batchSize, int tokenCount, int vocabularySize, float* δlogits) { }
        public static unsafe void EmbedBackward(float* δoutput, int* tokenIndices, int batchSize, int tokenCount, int channelCount, float* δtokenEmbeddings, float* δpositionEmbeddings) { }
        public static unsafe void EmbedForward(int* tokenIndices, float* tokenEmbeddings, float* positionEmbeddings, int batchSize, int tokenCount, int channelCount, float* output) { }
        public static unsafe void GeLUBackward(float* δoutput, float* input, int count, float* δinput) { }
        public static unsafe void GeLUForward(float* input, int count, float* output) { }
        public static unsafe void LayerNormBackward(float* δoutput, float* input, float* weight, float* mean, float* invStdDev, int batchSize, int tokenCount, int channelCount, float* δweight, float* δbias, float* δinput) { }
        public static unsafe void LayerNormForward(float* input, float* weight, float* bias, int batchSize, int tokenCount, int channelCount, float* mean, float* invStdDev, float* output) { }
        public static unsafe void MatMulBackward(float* δoutput, float* input, float* weight, int batchSize, int tokenCount, int inputChannelCount, int outputChannelCount, float* δweight, float* δbias, float* δinput) { }
        public static unsafe void MatMulForward(float* input, float* weight, float* bias, int batchSize, int tokenCount, int inputChannelCount, int outputChannelCount, float* output) { }
        public static unsafe void ResidualBackward(float* δoutput, int count, float* δleft, float* δright) { }
        public static unsafe void ResidualForward(float* left, float* right, int count, float* output) { }
        public static unsafe void SoftmaxForward(float* logits, int batchSize, int tokenCount, int vocabularySize, float* probabilities) { }
    }
}

Name		Name	Last commit message	Last commit date
Latest commit History 20 Commits
.github		.github
src		src
.editorconfig		.editorconfig
.gitattributes		.gitattributes
.gitignore		.gitignore
.markdownlint.json		.markdownlint.json
CODE_OF_CONDUCT.md		CODE_OF_CONDUCT.md
CONTRIBUTING.md		CONTRIBUTING.md
Icon.png		Icon.png
LICENSE		LICENSE
Llm.sln		Llm.sln
README.md		README.md
bench.ps1		bench.ps1
global.json		global.json
nuget.config		nuget.config
pack.ps1		pack.ps1
publish-tester.ps1		publish-tester.ps1
rename.ps1		rename.ps1

License

nietras/Llm.cs

Folders and files

Latest commit

History

Repository files navigation

Llm.cs - C# port of @karpathy llm.c

Getting Started

Example

Public API Reference

About

Resources

License

Code of conduct

Stars

Watchers

Forks

Languages