-
Notifications
You must be signed in to change notification settings - Fork 2.8k
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
using System.Collections.Generic; | ||
using System.Threading.Tasks; | ||
using Microsoft.SemanticKernel.Text; | ||
using SharpToken; | ||
|
||
// ReSharper disable once InconsistentNaming | ||
public static class Example55_TextChunker | ||
|
@@ -69,6 +70,9 @@ private static void WriteParagraphsToConsole(List<string> paragraphs) | |
|
||
private static int CustomTokenCounter(string input) | ||
{ | ||
return input.Length / 4; | ||
var encoding = GptEncoding.GetEncoding("p50k_base"); | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
dmytrostruk
Author
Member
|
||
var tokens = encoding.Encode(input); | ||
|
||
return tokens.Count; | ||
} | ||
} |
Can this be changed to use cl100k encoding? Assume mostly all consumers will need that as it's what gpt-3.5-turbo, gpt-4, etc. all use (see #2334).