Use the "tiktoken-php" package to encode a string to tokens, decode tokens to a string or calculate token usage for OpenAI models in Laravel.
You can install the package via composer:
composer require mis3085/tiktoken-for-laravel
You can publish the config file with:
php artisan vendor:publish --tag="tiktoken-for-laravel-config"
This is the contents of the published config file:
return [
// Cache folder for vocab files
'cache_dir' => storage_path('framework/cache/tiktoken'),
/**
* The default encoder
* cl100k_base: gpt-4, gpt-3.5-turbo, text-embedding-ada-002
* p50k_base: Codex models, text-davinci-002, text-davinci-003
* r50k_base: text-davinci-001
*/
'default_encoder' => 'cl100k_base',
];
use Mis3085\Tiktoken\Facades\Tiktoken;
// or
use Tiktoken;
// Use the default encoder: cl100k_base
Tiktoken::encode('this is a test');
// [ 576, 374, 264, 1296 ]
Tiktoken::encode('測試');
// [ 35086, 105, 50520, 99 ]
// Count tokens
Tiktoken::count('測試');
// 4
// Truncate a string to the specified length of tokens
Tiktoken::limit('this is a test', 2);
// this is
Tiktoken::limit('測試', 2);
// 測
Tiktoken::limit('測試', 1);
// EMPTY STRING
// Decode
Tiktoken::decode([ 35086, 105, 50520, 99 ]);
// 測試
// Change encoder in runtime
Tiktoken::setEncoder('p50k_base');
Tiktoken::encode('this is a test');
// [ 5661, 318, 257, 1332 ]
Tiktoken::setEncoder('p50k_base')->encode('測試');
// [ 162, 116, 105, 164, 102, 99 ]
Tiktoken::setEncoderForModel('text-davinci-003')->encode('測試');
// [ 162, 116, 105, 164, 102, 99 ]
composer test
The MIT License (MIT). Please see License File for more information.