ci(run-tests.yml): add GitHub Actions workflow for running tests

refactor(Context.php): change Context class from final to non-final feat(Context.php): add optional LLamaCPPFFI parameter to createWithParameter method fix(TokenGeneratedEvent.php): fix namespace typo in TokenGeneratedEvent class refactor(LLamaCPP.php): add support for multi-threaded token generation and evaluation refactor(LLamaCPPFFI.php): remove comments for generated wrapper methods feat(LLamaCPPFFI.php): add llama_init_from_file method to allocate memory for the model feat(LLamaCPPFFI.php): add llama_tokenize method to convert text into tokens feat(LLamaCPPFFI.php): add llama_eval method to run llama inference and obtain logits and probabilities feat(LLamaCPPFFI.php): add llama_sample_top_p_top_k method to sample top-k and top-p from the logits chore(LLamaCPPFFI.php): add docblocks to llama_token_to_str and llama_free methods refactor(Locator.php): import class_exists function feat(GenerationParameters.php): add noOfThreads parameter to GenerationParameters class feat(ModelParameters.php): add parameter description to ModelParameters class test(ContextTest.php): add unit tests for Context class methods test(LLamaCPPTest.php): add tests for LLamaCPP class generate and generateAll methods feat(GenerationParametersTest.php): add tests for GenerationParameters class constructor and getters test(ModelParametersTest.php): add test for ModelParameters constructor and getters
kambo-1st · Apr 22, 2023 · 5977e98 · 5977e98
1 parent 38cb321
commit 5977e98
Show file tree

Hide file tree

Showing 12 changed files with 443 additions and 14 deletions.
diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml
@@ -0,0 +1,37 @@
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: true
+      matrix:
+        os: [ubuntu-latest]
+        php: [8.2, 8.1]
+        stability: [prefer-lowest, prefer-stable]
+
+    name: P${{ matrix.php }} - ${{ matrix.stability }} - ${{ matrix.os }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Setup PHP
+        uses: shivammathur/setup-php@v2
+        with:
+          php-version: ${{ matrix.php }}
+          extensions: dom, curl, libxml, mbstring, zip, pcntl, pdo, sqlite, pdo_sqlite, bcmath, soap, intl, gd, exif, iconv, imagick, fileinfo
+          coverage: none
+
+      - name: Setup problem matchers
+        run: |
+          echo "::add-matcher::${{ runner.tool_cache }}/php.json"
+          echo "::add-matcher::${{ runner.tool_cache }}/phpunit.json"
+
+      - name: Install dependencies
+        run: composer update --${{ matrix.stability }} --prefer-dist --no-interaction
+
+      - name: Execute tests
+        run: vendor/bin/phpunit
diff --git a/src/Context.php b/src/Context.php
@@ -6,7 +6,7 @@
 use Kambo\LLamaCPP\Native\LLamaCPPFFI;
 use FFI\CData;
 
-final class Context
+class Context
 {
     private CData $ctx;
 
@@ -30,8 +30,9 @@ public function __construct(
 
     public static function createWithParameter(
         ModelParameters $modelParameters,
+        LLamaCPPFFI $ffi = null,
     ): self {
-        $ffi = LLamaCPPFFI::getInstance();
+        $ffi = $ffi ?? LLamaCPPFFI::getInstance();
 
         return new self($ffi, $modelParameters);
     }

diff --git a/src/Events/TokenGeneratedEvent.php b/src/Events/TokenGeneratedEvent.php
@@ -1,6 +1,6 @@
 <?php
 
-namespace Kambo\LLamaCpp\Events;
+namespace Kambo\LLamaCPP\Events;
 
 use Symfony\Contracts\EventDispatcher\Event;
 

diff --git a/src/LLamaCPP.php b/src/LLamaCPP.php
@@ -5,7 +5,7 @@
 use Kambo\LLamaCPP\Parameters\GenerationParameters;
 use Symfony\Component\EventDispatcher\EventDispatcherInterface;
 use Kambo\LLamaCPP\Native\LLamaCPPFFI;
-use Kambo\LLamaCpp\Events\TokenGeneratedEvent;
+use Kambo\LLamaCPP\Events\TokenGeneratedEvent;
 use Generator;
 
 use function strlen;
@@ -35,7 +35,13 @@ public function generate(string $prompt, ?GenerationParameters $generation = nul
         $nOfTok = $this->ffi->llama_tokenize($this->context->getCtx(), $prompt, $input, strlen($prompt), true);
 
         for ($i = 0; $i < $nOfTok; $i++) {
-            $this->ffi->llama_eval($this->context->getCtx(), $input + $i, 1, $i, 10);
+            $this->ffi->llama_eval(
+                $this->context->getCtx(),
+                $input + $i,
+                1,
+                $i,
+                $generation->getNoOfThreads()
+            );
         }
 
         $eosToken = $this->ffi->llama_token_eos();
@@ -68,7 +74,13 @@ public function generate(string $prompt, ?GenerationParameters $generation = nul
             );
 
             yield $prediction;
-            $this->ffi->llama_eval($this->context->getCtx(), $this->ffi->addr($token), 1, $nOfTok, 10);
+            $this->ffi->llama_eval(
+                $this->context->getCtx(),
+                $this->ffi->addr($token),
+                1,
+                $nOfTok,
+                $generation->getNoOfThreads()
+            );
         }
     }
 

diff --git a/src/Native/LLamaCPPFFI.php b/src/Native/LLamaCPPFFI.php
@@ -101,30 +101,70 @@ public function addr(CData $ptr): CData
         return FFI::addr($ptr);
     }
 
-    // generate wrapper method for llama_context_default_params
     public function llama_context_default_params(): CData
     {
         return $this->fii->llama_context_default_params();
     }
 
-    // generate wrapper method for llama_init_from_file
+    /**
+     * Allocate (almost) all memory needed for the model.
+     *
+     * @param string $path
+     * @param CData  $params
+     *
+     * @return CData Return NULL on failure
+     */
     public function llama_init_from_file(string $path, CData $params): CData
     {
         return $this->fii->llama_init_from_file($path, $params);
     }
 
-    // generate wrapper method for llama_tokenize
+    /**
+     * Convert the provided text into tokens.
+     * The tokens pointer must be large enough to hold the resulting tokens.
+     *
+     * @param CData  $ctx
+     * @param string $text
+     * @param CData  $tokens
+     * @param int    $maxTokens
+     * @param bool   $addEOS
+     *
+     * @return int the number of tokens on success, no more than n_max_tokens, or -1 on error
+     */
     public function llama_tokenize(CData $ctx, string $text, CData $tokens, int $maxTokens, bool $addEOS): int
     {
         return $this->fii->llama_tokenize($ctx, $text, $tokens, $maxTokens, $addEOS);
     }
 
-    // generate wrapper method for llama_eval
-    public function llama_eval(CData $ctx, CData $tokens, int $nOfTokens, int $position, int $nOfSamples): void
+    /**
+     * Run the llama inference to obtain the logits and probabilities for the next token.
+     *
+     * @param CData $ctx
+     * @param CData $tokens the provided batch of new tokens to process
+     * @param int   $nOfTokens the provided batch of new tokens to process
+     * @param int   $nOfPastTokens the number of tokens to use from previous eval calls
+     * @param int   $nOfThreads The number of threads to use for the inference
+     *
+     * @return int
+     */
+    public function llama_eval(CData $ctx, CData $tokens, int $nOfTokens, int $nOfPastTokens, int $nOfThreads): int
     {
-        $this->fii->llama_eval($ctx, $tokens, $nOfTokens, $position, $nOfSamples);
+        return $this->fii->llama_eval($ctx, $tokens, $nOfTokens, $nOfPastTokens, $nOfThreads);
     }
 
+    /**
+     * Sample top-k and top-p from the logits.
+     *
+     * @param CData      $ctx
+     * @param CData|null $lastNTokens
+     * @param int        $lastNTokensSize
+     * @param float      $topP
+     * @param float      $topK
+     * @param float      $temperature
+     * @param float      $repeatPenalty
+     *
+     * @return int
+     */
     public function llama_sample_top_p_top_k(
         CData $ctx,
         ?CData $lastNTokens,
@@ -145,11 +185,26 @@ public function llama_sample_top_p_top_k(
         );
     }
 
+    /**
+     * Token Id -> String. Uses the vocabulary in the provided context
+     *
+     * @param CData $ctx
+     * @param int   $id
+     *
+     * @return string
+     */
     public function llama_token_to_str(CData $ctx, int $id): string
     {
         return $this->fii->llama_token_to_str($ctx, $id);
     }
 
+    /**
+     * Frees all allocated memory
+     *
+     * @param CData $ctx
+     *
+     * @return void
+     */
     public function llama_free(CData $ctx): void
     {
         $this->fii->llama_free($ctx);

diff --git a/src/Native/Locator.php b/src/Native/Locator.php
@@ -6,6 +6,8 @@
 use Kambo\LLamaCPP\Exception\NotImplementedException;
 use Kambo\LLamaCPPLinuxLib\Info;
 
+use function class_exists;
+
 use const PHP_OS_FAMILY;
 
 /**

diff --git a/src/Parameters/GenerationParameters.php b/src/Parameters/GenerationParameters.php
@@ -2,14 +2,15 @@
 
 namespace Kambo\LLamaCpp\Parameters;
 
-final class GenerationParameters
+class GenerationParameters
 {
     public function __construct(
         private int $predictLength = 128,
         private float $topP = 0.9,
         private float $topK = 40,
         private float $temperature = 0.2,
         private float $repeatPenalty = 1 / 0.85,
+        private int $noOfThreads = 10,
     ) {
     }
 
@@ -37,4 +38,9 @@ public function getRepeatPenalty(): float
     {
         return $this->repeatPenalty;
     }
+
+    public function getNoOfThreads(): int
+    {
+        return $this->noOfThreads;
+    }
 }
diff --git a/src/Parameters/ModelParameters.php b/src/Parameters/ModelParameters.php
@@ -2,7 +2,7 @@
 
 namespace Kambo\LLamaCpp\Parameters;
 
-final class ModelParameters
+class ModelParameters
 {
     /**
      * @param string $modelPath path to used model

diff --git a/tests/ContextTest.php b/tests/ContextTest.php
@@ -0,0 +1,123 @@
+<?php
+
+namespace Kambo\Tests\LLamaCPP;
+
+use PHPUnit\Framework\TestCase;
+use Kambo\LLamaCPP\Parameters\ModelParameters;
+use Kambo\LLamaCPP\Native\LLamaCPPFFI;
+use Kambo\LLamaCPP\Context;
+use FFI\CData;
+use FFI;
+
+class ContextTest extends TestCase
+{
+    private LLamaCPPFFI $ffiMock;
+    private ModelParameters $modelParametersMock;
+    private FFI $ffi;
+
+    protected function setUp(): void
+    {
+        $this->ffiMock = $this->getMockBuilder(LLamaCPPFFI::class)
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $this->modelParametersMock = $this->getMockBuilder(ModelParameters::class)
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $this->ffi = FFI::cdef('struct llama_context; struct llama_context_params {
+    int n_ctx; // text context
+    int n_parts; // -1 for default
+    int seed; // RNG seed, 0 for random
+    bool f16_kv; // use fp16 for KV cache
+    bool logits_all; // the llama_eval() call computes all logits, not just the last one
+    bool vocab_only; // only load the vocabulary, no weights
+    bool use_mlock; // force system to keep model in RAM
+    bool embedding; // embedding mode only
+};');
+    }
+
+    public function testConstruct()
+    {
+        $this->ffiMock->method('llama_context_default_params')
+            ->willReturn($this->ffi->new('struct llama_context_params'));
+
+        $this->modelParametersMock->method('getNCtx')
+            ->willReturn(512);
+        $this->modelParametersMock->method('getNParts')
+            ->willReturn(16);
+        $this->modelParametersMock->method('getSeed')
+            ->willReturn(42);
+        $this->modelParametersMock->method('isF16KV')
+            ->willReturn(false);
+        $this->modelParametersMock->method('isLogitsAll')
+            ->willReturn(true);
+        $this->modelParametersMock->method('isVocabOnly')
+            ->willReturn(false);
+        $this->modelParametersMock->method('isUseMlock')
+            ->willReturn(false);
+        $this->modelParametersMock->method('isEmbedding')
+            ->willReturn(true);
+        $this->modelParametersMock->method('getModelPath')
+            ->willReturn('/path/to/model');
+
+        $this->ffiMock->method('llama_init_from_file')
+            ->willReturn(FFI::new('int'));
+
+        $context = new Context($this->ffiMock, $this->modelParametersMock);
+
+        $this->assertInstanceOf(Context::class, $context);
+    }
+
+    public function testCreateWithParameter()
+    {
+        $this->ffiMock->method('llama_context_default_params')
+            ->willReturn($this->ffi->new('struct llama_context_params'));
+
+        $this->modelParametersMock->method('getNCtx')
+            ->willReturn(512);
+
+        $this->ffiMock->method('llama_init_from_file')
+            ->willReturn(FFI::new('int'));
+
+        $context = Context::createWithParameter($this->modelParametersMock, $this->ffiMock);
+
+        $this->assertInstanceOf(Context::class, $context);
+    }
+
+    public function testGetCtx()
+    {
+        $this->ffiMock->method('llama_context_default_params')
+            ->willReturn($this->ffi->new('struct llama_context_params'));
+
+        $cdataStub = FFI::new('int');
+        $this->ffiMock->method('llama_init_from_file')
+            ->willReturn($cdataStub);
+
+        $context = new Context($this->ffiMock, $this->modelParametersMock);
+
+        $this->assertSame(
+            $cdataStub,
+            $context->getCtx()
+        );
+    }
+
+    public function testDestruct()
+    {
+        $this->ffiMock->method('llama_context_default_params')
+            ->willReturn($this->ffi->new('struct llama_context_params'));
+
+        $this->ffiMock->method('llama_init_from_file')
+            ->willReturn(FFI::new('int'));
+
+        $context = new Context($this->ffiMock, $this->modelParametersMock);
+
+        // Expect llama_free to be called once with the given CData stub
+        $this->ffiMock->expects($this->once())
+            ->method('llama_free')
+            ->with(FFI::new('int'));
+
+        // Trigger __destruct by unsetting the context
+        unset($context);
+    }
+}