diff --git a/README.md b/README.md index e012b5f..08e500b 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,6 @@ It is compatible with Symfony and Laravel. We are working to expand the support of different LLMs. Right now, we are supporting [OpenAI](https://openai.com/blog/openai-api) and [Ollama](https://ollama.ai/) that can be used to run LLM locally such as [Llama 2](https://llama.meta.com/). -If you want to use other LLMs, you can use [genossGPT](https://github.com/OpenGenenerativeAI/GenossGPT) -as a proxy. - We want to thank few amazing projects that we use here or inspired us: - the learnings from using [LangChain](https://www.langchain.com/) and [LLamaIndex](https://www.llamaindex.ai/) - the excellent work from the [OpenAI PHP SDK](https://github.com/openai-php/client). diff --git a/docusaurus/docs/usage.md b/docusaurus/docs/usage.md index eb0db8d..8a000cc 100644 --- a/docusaurus/docs/usage.md +++ b/docusaurus/docs/usage.md @@ -58,6 +58,85 @@ $messages = [ $response = $chat->generateChat($messages); ``` +## Get the token usage + +When using OpenAI is important to know how many token are you using +since the [model pricing](https://openai.com/api/pricing/) is token based. + +You can retrieve the total token usage using the `OpenAIChat::getTotalTokens()` +function, as follows: +```php +$chat = new OpenAIChat(); + +$answer = $chat->generateText('what is one + one ?'); +printf("%s\n", $answer); # One plus one equals two +printf("Total tokens usage: %d\n", $chat->getTotalTokens()); # 19 + +$answer = $chat->generateText('And what is two + two ?'); +printf("%s\n", $answer); # Two plus two equals four +printf("Total tokens usage: %d\n", $chat->getTotalTokens()); # 39 +``` + +The `getTotalTokens()` is an incremental value that is increased on each +API call. For instance, in the previous example the first `what is one + one ?` +generated 19 tokens and the second call 20 tokens. The total number of +tokens is than 19 + 20 = 39 tokens. + +## Get the last response from OpenAI + +If you want to inspect the last API response from OpenAI you can use the function +`OpenAIChat::getLastReponse()` function. + +This function returns an [OpenAI\Responses\Chat\CreateResponse](https://github.com/openai-php/client/blob/main/src/Responses/Chat/CreateResponse.php) +that contains the following properties: + +```php +namespace OpenAI\Responses\Chat; + +class CreateResponse +{ + public readonly string $id; + public readonly string $object; + public readonly int $created; + public readonly string $model; + public readonly ?string $systemFingerprint; + public readonly array $choices; + public readonly CreateResponseUsage $usage; +} +``` + +The `usage` property is an object of the following [CreateResponseUsage](https://github.com/openai-php/client/blob/main/src/Responses/Chat/CreateResponseUsage.php) +class: + +```php +namespace OpenAI\Responses\Chat; + +final class CreateResponseUsage +{ + public readonly int $promptTokens; + public readonly ?int $completionTokens; + public readonly int $totalTokens; +} +``` + +For instance, if you want to have specific information for the token usage, +you can then access the `usage` properties and then the sub-property, as follows: + +```php +$chat = new OpenAIChat(); + +$answer = $chat->generateText('what is the capital of Italy ?'); +$response = $chat->getLastResponse(); + +printf("Prompt tokens: %d\n", $response->usage->promptTokens); +printf("Completion tokens: %d\n", $response->usage->completionTokens); +printf("Total tokens: %d\n", $response->usage->totalTokens); +``` + +The value of the last `printf` is the total usage of the last response +(promptTokens + completionTokens) and should not be confused with the +`$chat->getTotalTokens()` function that is the sum of the previous totalTokens calls, +including the last one `what is the capital of Italy ?`. ## Tools diff --git a/src/Chat/ChatInterface.php b/src/Chat/ChatInterface.php index f600fb3..bfa7ed1 100644 --- a/src/Chat/ChatInterface.php +++ b/src/Chat/ChatInterface.php @@ -16,6 +16,9 @@ public function generateStreamOfText(string $prompt): StreamInterface; /** @param Message[] $messages */ public function generateChat(array $messages): string; + /** @param Message[] $messages */ + public function generateChatOrReturnFunctionCalled(array $messages): string|FunctionInfo; + /** @param Message[] $messages */ public function generateChatStream(array $messages): StreamInterface; diff --git a/src/Chat/Enums/OpenAIChatModel.php b/src/Chat/Enums/OpenAIChatModel.php index 7ecb2e8..e8ce8ff 100644 --- a/src/Chat/Enums/OpenAIChatModel.php +++ b/src/Chat/Enums/OpenAIChatModel.php @@ -7,6 +7,7 @@ enum OpenAIChatModel case Gpt35Turbo; case Gpt4; case Gpt4Turbo; + case Gpt4Omni; public function getModelName(): string { @@ -14,6 +15,7 @@ public function getModelName(): string OpenAIChatModel::Gpt35Turbo => 'gpt-3.5-turbo', OpenAIChatModel::Gpt4 => 'gpt-4', OpenAIChatModel::Gpt4Turbo => 'gpt-4-1106-preview', + OpenAIChatModel::Gpt4Omni => 'gpt-4o', }; } } diff --git a/src/Chat/OllamaChat.php b/src/Chat/OllamaChat.php index e675001..1001e1d 100644 --- a/src/Chat/OllamaChat.php +++ b/src/Chat/OllamaChat.php @@ -78,13 +78,31 @@ public function generateText(string $prompt): string } /** - * Ollama does not support (yet) functions, this is an alias of generateText + * Ollama does not support (yet) functions, this is an alias of generateText and generateChat */ public function generateTextOrReturnFunctionCalled(string $prompt): string|FunctionInfo { return $this->generateText($prompt); } + public function generateChatOrReturnFunctionCalled(array $messages): string|FunctionInfo + { + $params = [ + ...$this->modelOptions, + 'model' => $this->config->model, + 'messages' => $this->prepareMessages($messages), + 'stream' => false, + ]; + $response = $this->sendRequest( + 'POST', + 'chat', + $params + ); + $json = Utility::decodeJson($response->getBody()->getContents()); + + return $json['message']['content']; + } + public function generateStreamOfText(string $prompt): StreamInterface { $params = [ diff --git a/src/Chat/OpenAIChat.php b/src/Chat/OpenAIChat.php index 4e33dd9..69960dd 100644 --- a/src/Chat/OpenAIChat.php +++ b/src/Chat/OpenAIChat.php @@ -25,6 +25,10 @@ class OpenAIChat implements ChatInterface public string $model; + private ?CreateResponse $lastResponse = null; + + private int $totalTokens = 0; + /** @var array */ private array $modelOptions = []; @@ -37,11 +41,8 @@ class OpenAIChat implements ChatInterface public ?FunctionInfo $requiredFunction = null; - public TokenUsage $usage; - public function __construct(?OpenAIConfig $config = null) { - $this->usage = new TokenUsage(); if ($config instanceof OpenAIConfig && $config->client instanceof Client) { $this->client = $config->client; } else { @@ -59,14 +60,27 @@ public function __construct(?OpenAIConfig $config = null) public function generateText(string $prompt): string { $answer = $this->generate($prompt); + $this->handleTools($answer); return $answer->choices[0]->message->content ?? ''; } + public function getLastResponse(): ?CreateResponse + { + return $this->lastResponse; + } + + public function getTotalTokens(): int + { + return $this->totalTokens; + } + public function generateTextOrReturnFunctionCalled(string $prompt): string|FunctionInfo { + $this->lastFunctionCalled = null; $answer = $this->generate($prompt); + $toolsToCall = $this->getToolsToCall($answer); foreach ($toolsToCall as $toolToCall) { @@ -94,7 +108,26 @@ public function generateChat(array $messages): string { $openAiArgs = $this->getOpenAiArgs($messages); $answer = $this->client->chat()->create($openAiArgs); - $this->usage->logLastUsage($answer); + $this->lastResponse = $answer; + $this->totalTokens += $answer->usage->totalTokens ?? 0; + + return $answer->choices[0]->message->content ?? ''; + } + + public function generateChatOrReturnFunctionCalled(array $messages): string|FunctionInfo + { + $this->lastFunctionCalled = null; + $openAiArgs = $this->getOpenAiArgs($messages); + $answer = $this->client->chat()->create($openAiArgs); + $toolsToCall = $this->getToolsToCall($answer); + + foreach ($toolsToCall as $toolToCall) { + $this->lastFunctionCalled = $toolToCall; + } + + if ($this->lastFunctionCalled instanceof FunctionInfo) { + return $this->lastFunctionCalled; + } return $answer->choices[0]->message->content ?? ''; } @@ -159,10 +192,10 @@ private function generate(string $prompt): CreateResponse $messages = $this->createOpenAIMessagesFromPrompt($prompt); $openAiArgs = $this->getOpenAiArgs($messages); - $answer = $this->client->chat()->create($openAiArgs); - $this->usage->logLastUsage($answer); + $this->lastResponse = $this->client->chat()->create($openAiArgs); + $this->totalTokens += $this->lastResponse->usage->totalTokens ?? 0; - return $answer; + return $this->lastResponse; } /** diff --git a/src/Chat/TokenUsage.php b/src/Chat/TokenUsage.php deleted file mode 100644 index b994016..0000000 --- a/src/Chat/TokenUsage.php +++ /dev/null @@ -1,34 +0,0 @@ -usage->promptTokens)) { - $this->Prompt_Tokens = $answer->usage->promptTokens; - } - if (isset($answer->usage->completionTokens)) { - $this->Completion_Tokens = $answer->usage->completionTokens; - } - if (isset($answer->usage->totalTokens)) { - $this->Total_Tokens = $answer->usage->totalTokens; - } - } -} diff --git a/src/Embeddings/EmbeddingGenerator/Mistral/MistralEmbeddingGenerator.php b/src/Embeddings/EmbeddingGenerator/Mistral/MistralEmbeddingGenerator.php index 26e6098..07598e3 100644 --- a/src/Embeddings/EmbeddingGenerator/Mistral/MistralEmbeddingGenerator.php +++ b/src/Embeddings/EmbeddingGenerator/Mistral/MistralEmbeddingGenerator.php @@ -5,87 +5,36 @@ namespace LLPhant\Embeddings\EmbeddingGenerator\Mistral; use Exception; -use GuzzleHttp\Client; -use LLPhant\Embeddings\Document; -use LLPhant\Embeddings\EmbeddingGenerator\EmbeddingGeneratorInterface; +use LLPhant\Embeddings\EmbeddingGenerator\OpenAI\AbstractOpenAIEmbeddingGenerator; use LLPhant\OpenAIConfig; +use OpenAI\Client; use function getenv; -use function str_replace; -class MistralEmbeddingGenerator implements EmbeddingGeneratorInterface +class MistralEmbeddingGenerator extends AbstractOpenAIEmbeddingGenerator { public Client $client; - private readonly string $apiKey; - /** * @throws Exception */ public function __construct(?OpenAIConfig $config = null) { - $apiKey = $config->apiKey ?? getenv('MISTRAL_API_KEY'); - if (! $apiKey) { - throw new Exception('You have to provide a MISTRAL_API_KEY env var to request Mistral .'); - } - $this->apiKey = $apiKey; - $this->client = new Client(); - } - - /** - * Call out to OpenAI's embedding endpoint. - * - * @return float[] - */ - public function embedText(string $text): array - { - $text = str_replace("\n", ' ', $text); - - $response = $this->client->post('https://api.mistral.ai/v1/embeddings', [ - 'body' => json_encode([ - 'model' => $this->getModelName(), - 'input' => [$text], - ], JSON_THROW_ON_ERROR), - 'headers' => [ - 'Authorization' => 'Bearer '.$this->apiKey, - 'Content-Type' => 'application/json', - ], - ]); - - $searchResults = json_decode($response->getBody()->getContents(), true, 512, JSON_THROW_ON_ERROR); - if (! is_array($searchResults)) { - throw new Exception("Request to Mistral didn't returned an array: ".$response->getBody()->getContents()); - } + if ($config instanceof OpenAIConfig && $config->client instanceof Client) { + $this->client = $config->client; - if (! isset($searchResults['data'][0]['embedding'])) { - throw new Exception("Request to Mistral didn't returned expected format: ".$response->getBody()->getContents()); + return; } - return $searchResults['data'][0]['embedding']; - } - - public function embedDocument(Document $document): Document - { - $text = $document->formattedContent ?? $document->content; - $document->embedding = $this->embedText($text); - - return $document; - } - - /** - * TODO: use the fact that we can send multiple texts to the embedding API - * - * @param Document[] $documents - * @return Document[] - */ - public function embedDocuments(array $documents): array - { - $embedDocuments = []; - foreach ($documents as $document) { - $embedDocuments[] = $this->embedDocument($document); + $apiKey = $config->apiKey ?? getenv('MISTRAL_API_KEY'); + if (! $apiKey) { + throw new Exception('You have to provide a MISTRAL_API_KEY env var to request Mistral .'); } - return $embedDocuments; + $this->client = \OpenAI::factory() + ->withApiKey($apiKey) + ->withBaseUri('api.mistral.ai/v1') + ->make(); } public function getEmbeddingLength(): int diff --git a/src/Query/SemanticSearch/QuestionAnswering.php b/src/Query/SemanticSearch/QuestionAnswering.php index c4c2630..6790301 100644 --- a/src/Query/SemanticSearch/QuestionAnswering.php +++ b/src/Query/SemanticSearch/QuestionAnswering.php @@ -11,6 +11,9 @@ class QuestionAnswering { + /** @var Document[] */ + protected array $retrievedDocs; + public string $systemMessageTemplate = "Use the following pieces of context to answer the question of the user. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}."; public function __construct(public readonly VectorStoreBase $vectorStoreBase, public readonly EmbeddingGeneratorInterface $embeddingGenerator, public readonly ChatInterface $chat) @@ -54,21 +57,28 @@ public function answerQuestionFromChat(array $messages, int $k = 4, array $addit return $this->chat->generateChatStream($messages); } + /** + * @return Document[] + */ + public function getRetrievedDocuments(): array + { + return $this->retrievedDocs; + } + /** * @param array|array $additionalArguments */ private function searchDocumentAndCreateSystemMessage(string $question, int $k, array $additionalArguments): string { $embedding = $this->embeddingGenerator->embedText($question); - /** @var Document[] $documents */ - $documents = $this->vectorStoreBase->similaritySearch($embedding, $k, $additionalArguments); + $this->retrievedDocs = $this->vectorStoreBase->similaritySearch($embedding, $k, $additionalArguments); - if ($documents === []) { + if ($this->retrievedDocs === []) { return "I don't know. I didn't find any document to answer the question"; } $context = ''; - foreach ($documents as $document) { + foreach ($this->retrievedDocs as $document) { $context .= $document->content.' '; } diff --git a/tests/Fixtures/OpenAI/chat-response.json b/tests/Fixtures/OpenAI/chat-response.json new file mode 100644 index 0000000..b868e46 --- /dev/null +++ b/tests/Fixtures/OpenAI/chat-response.json @@ -0,0 +1,21 @@ +{ + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "model": "gpt-3.5-turbo-0125", + "system_fingerprint": "fp_44709d6fcb", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": "\n\nHello there, how may I assist you today?" + }, + "logprobs": null, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } + } \ No newline at end of file diff --git a/tests/Pest.php b/tests/Pest.php new file mode 100644 index 0000000..9000107 --- /dev/null +++ b/tests/Pest.php @@ -0,0 +1,14 @@ +generateChatStream([Message::user('here the question')]); expect($response)->toBeInstanceof(StreamInterface::class); }); + +it('returns last response using generateText()', function () { + $response = TransporterResponse::from( + fixture('OpenAI/chat-response'), + ['x-request-id' => '1'] + ); + $transport = Mockery::mock(TransporterContract::class); + $transport->allows()->requestObject(anyArgs())->andReturns($response); + + $config = new OpenAIConfig(); + $config->client = new Client($transport); + $chat = new OpenAIChat($config); + + $response = $chat->generateText('here the question'); + $lastResponse = $chat->getLastResponse(); + + expect($lastResponse->id)->toBe('chatcmpl-123'); + expect($lastResponse->object)->toBe('chat.completion'); + expect($lastResponse->model)->toBe('gpt-3.5-turbo-0125'); + expect($lastResponse->usage->promptTokens)->toBe(9); + expect($lastResponse->usage->completionTokens)->toBe(12); + expect($lastResponse->usage->totalTokens)->toBe(21); +}); + +it('returns last response using generateTextOrReturnFunctionCalled()', function () { + $response = TransporterResponse::from( + fixture('OpenAI/chat-response'), + ['x-request-id' => '1'] + ); + $transport = Mockery::mock(TransporterContract::class); + $transport->allows()->requestObject(anyArgs())->andReturns($response); + + $config = new OpenAIConfig(); + $config->client = new Client($transport); + $chat = new OpenAIChat($config); + + $response = $chat->generateText('here the question'); + $lastResponse = $chat->getLastResponse(); + expect($lastResponse->usage->promptTokens)->toBe(9); + expect($lastResponse->usage->completionTokens)->toBe(12); + expect($lastResponse->usage->totalTokens)->toBe(21); +}); + +it('returns empty (null) last response if no usage', function () { + $transport = Mockery::mock(TransporterContract::class); + + $config = new OpenAIConfig(); + $config->client = new Client($transport); + $chat = new OpenAIChat($config); + + expect($chat->getLastResponse())->toBe(null); +}); + +it('returns total token usage generate() or generateTextOrReturnFunctionCalled()', function () { + $response = TransporterResponse::from( + fixture('OpenAI/chat-response'), + ['x-request-id' => '1'] + ); + $transport = Mockery::mock(TransporterContract::class); + $transport->allows()->requestObject(anyArgs())->andReturns($response); + + $config = new OpenAIConfig(); + $config->client = new Client($transport); + $chat = new OpenAIChat($config); + + $response = $chat->generateText('here the question'); + expect($chat->getTotalTokens())->toBe(21); + + $response = $chat->generateTextOrReturnFunctionCalled('here the second question with function'); + expect($chat->getTotalTokens())->toBe(42); +}); diff --git a/tests/Unit/Query/SemanticSearch/QuestionAnsweringTest.php b/tests/Unit/Query/SemanticSearch/QuestionAnsweringTest.php new file mode 100644 index 0000000..ed128af --- /dev/null +++ b/tests/Unit/Query/SemanticSearch/QuestionAnsweringTest.php @@ -0,0 +1,70 @@ +question = 'What is the capital city of Italy?'; + $this->answer = 'The capital city of Italy is Rome'; + + $this->docs = getDocuments(); + + $this->vectorStore = Mockery::mock(VectorStoreBase::class); + $this->vectorStore->allows([ + 'similaritySearch' => $this->docs, + ]); + + $this->embedding = Mockery::mock(EmbeddingGeneratorInterface::class); + $this->embedding->allows([ + 'embedText' => [], + ]); + + $this->chat = Mockery::mock(ChatInterface::class); + $this->chat->allows([ + 'setSystemMessage' => null, + 'generateText' => $this->answer, + ]); +}); + +it('answer question', function () { + + $qa = new QuestionAnswering($this->vectorStore, $this->embedding, $this->chat); + + $result = $qa->answerQuestion($this->question); + + expect($result)->toBe($this->answer); +}); + +it('retrieved Documents', function () { + $qa = new QuestionAnswering($this->vectorStore, $this->embedding, $this->chat); + + $result = $qa->answerQuestion($this->question); + $docs = $qa->getRetrievedDocuments(); + + expect($docs)->toBe($this->docs); +}); + +/** + * @return Document[] + */ +function getDocuments(): array +{ + $doc1 = new Document; + $doc1->content = 'Rome is the capital city of Italy'; + $doc2 = new Document; + $doc2->content = 'Rome is also the capital of the Lazio region'; + $doc3 = new Document; + $doc3->content = 'The Metropolitan City of Rome, with a population of 4,355,725 residents'; + $doc4 = new Document; + $doc4->content = 'Rome is often referred to as the City of Seven Hills due to its geographic location, and also as the "Eternal City"'; + + return [$doc1, $doc2, $doc3, $doc4]; +}