diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj index af9e7a6fe8f..a067873a0b9 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj/project.pbxproj @@ -25,15 +25,8 @@ 03729F0C2BB203B300152F2E /* util.h in Headers */ = {isa = PBXBuildFile; fileRef = 03729F092BB203B300152F2E /* util.h */; }; 03729F122BB2042B00152F2E /* sampler.h in Headers */ = {isa = PBXBuildFile; fileRef = 03729F102BB2042B00152F2E /* sampler.h */; }; 03729F132BB2042B00152F2E /* sampler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03729F112BB2042B00152F2E /* sampler.cpp */; }; - 03729F162BB2043600152F2E /* bpe_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03729F142BB2043600152F2E /* bpe_tokenizer.cpp */; }; - 03729F172BB2043600152F2E /* tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 03729F152BB2043600152F2E /* tokenizer.h */; }; 0372C3142C89418E00CD942A /* llava_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 0372C3122C89418E00CD942A /* llava_runner.h */; }; 0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 0372C3132C89418E00CD942A /* llava_runner.cpp */; }; - 038D678C2C482C1E00B88CF2 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 038D678A2C482C1D00B88CF2 /* llama_tiktoken.cpp */; }; - 038D678D2C482C1E00B88CF2 /* llama_tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 038D678B2C482C1E00B88CF2 /* llama_tiktoken.h */; }; - 03BADE202BD2E88600DDFDC2 /* bpe_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 03BADE1F2BD2E88600DDFDC2 /* bpe_tokenizer.h */; }; - 03BADE232BD2EB6700DDFDC2 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03BADE212BD2EB6600DDFDC2 /* tiktoken.cpp */; }; - 03BADE242BD2EB6700DDFDC2 /* tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = 03BADE222BD2EB6700DDFDC2 /* tiktoken.h */; }; 03CF43962CEC5CEC00C7113B /* backend_coreml in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43952CEC5CEC00C7113B /* backend_coreml */; }; 03CF43982CEC5CEC00C7113B /* backend_coreml_debug in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43972CEC5CEC00C7113B /* backend_coreml_debug */; }; 03CF439A2CEC5CEC00C7113B /* backend_mps in Frameworks */ = {isa = PBXBuildFile; productRef = 03CF43992CEC5CEC00C7113B /* backend_mps */; }; @@ -53,8 +46,20 @@ 03D03DA82C7823620088D6A7 /* text_prefiller.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DA62C7823620088D6A7 /* text_prefiller.h */; }; 03D03DAB2C7823830088D6A7 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */; }; 03D03DAC2C7823830088D6A7 /* text_decoder_runner.h in Headers */ = {isa = PBXBuildFile; fileRef = 03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */; }; - 03DDA0FB2BD6368100D234B3 /* base64.h in Headers */ = {isa = PBXBuildFile; fileRef = 03DDA0FA2BD6368100D234B3 /* base64.h */; }; 26A6A4282C8A3769005A761E /* ImagePicker.swift in Sources */ = {isa = PBXBuildFile; fileRef = 26A6A4272C8A3769005A761E /* ImagePicker.swift */; }; + F292B0752D88B0C200BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06F2D88B0C200BE6839 /* tiktoken.cpp */; }; + F292B0762D88B0C200BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */; }; + F292B0772D88B0C200BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */; }; + F292B0882D88B0D200BE6839 /* llama2c_tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B07C2D88B0D200BE6839 /* llama2c_tokenizer.h */; }; + F292B0892D88B0D200BE6839 /* tokenizer.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B0832D88B0D200BE6839 /* tokenizer.h */; }; + F292B08B2D88B0D200BE6839 /* result.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B07F2D88B0D200BE6839 /* result.h */; }; + F292B08D2D88B0D200BE6839 /* error.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B07A2D88B0D200BE6839 /* error.h */; }; + F292B08E2D88B0D200BE6839 /* bpe_tokenizer_base.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B0792D88B0D200BE6839 /* bpe_tokenizer_base.h */; }; + F292B08F2D88B0D200BE6839 /* log.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B07D2D88B0D200BE6839 /* log.h */; }; + F292B0912D88B0D200BE6839 /* tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B0812D88B0D200BE6839 /* tiktoken.h */; }; + F292B0922D88B0D200BE6839 /* base64.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B0782D88B0D200BE6839 /* base64.h */; }; + F292B1012D88B20C00BE6839 /* llama_tiktoken.h in Headers */ = {isa = PBXBuildFile; fileRef = F292B0FF2D88B20C00BE6839 /* llama_tiktoken.h */; }; + F292B1022D88B20C00BE6839 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B1002D88B20C00BE6839 /* llama_tiktoken.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -103,23 +108,28 @@ 03729F092BB203B300152F2E /* util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = ""; }; 03729F102BB2042B00152F2E /* sampler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = ""; }; 03729F112BB2042B00152F2E /* sampler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = ""; }; - 03729F142BB2043600152F2E /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer.cpp; path = ../../../../extension/llm/tokenizer/bpe_tokenizer.cpp; sourceTree = ""; }; - 03729F152BB2043600152F2E /* tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = tokenizer.h; path = ../../../../extension/llm/tokenizer/tokenizer.h; sourceTree = ""; }; 0372C3122C89418E00CD942A /* llava_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = llava_runner.h; path = ../../../examples/models/llava/runner/llava_runner.h; sourceTree = ""; }; 0372C3132C89418E00CD942A /* llava_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = llava_runner.cpp; path = ../../../examples/models/llava/runner/llava_runner.cpp; sourceTree = ""; }; - 038D678A2C482C1D00B88CF2 /* llama_tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = llama_tiktoken.cpp; sourceTree = ""; }; - 038D678B2C482C1E00B88CF2 /* llama_tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = llama_tiktoken.h; sourceTree = ""; }; - 03BADE1F2BD2E88600DDFDC2 /* bpe_tokenizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = bpe_tokenizer.h; path = ../../../../extension/llm/tokenizer/bpe_tokenizer.h; sourceTree = ""; }; - 03BADE212BD2EB6600DDFDC2 /* tiktoken.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = ../../../../extension/llm/tokenizer/tiktoken.cpp; sourceTree = ""; }; - 03BADE222BD2EB6700DDFDC2 /* tiktoken.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = tiktoken.h; path = ../../../../extension/llm/tokenizer/tiktoken.h; sourceTree = ""; }; 03C5F51C2CE7D35C00D6CE3F /* Release.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Release.xcconfig; sourceTree = ""; }; 03C5F51D2CE7D37100D6CE3F /* Debug.xcconfig */ = {isa = PBXFileReference; lastKnownFileType = text.xcconfig; path = Debug.xcconfig; sourceTree = ""; }; 03D03DA52C7823620088D6A7 /* text_prefiller.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_prefiller.cpp; sourceTree = ""; }; 03D03DA62C7823620088D6A7 /* text_prefiller.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_prefiller.h; sourceTree = ""; }; 03D03DA92C7823830088D6A7 /* text_decoder_runner.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = text_decoder_runner.cpp; sourceTree = ""; }; 03D03DAA2C7823830088D6A7 /* text_decoder_runner.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = text_decoder_runner.h; sourceTree = ""; }; - 03DDA0FA2BD6368100D234B3 /* base64.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = base64.h; path = ../../../../extension/llm/tokenizer/base64.h; sourceTree = ""; }; 26A6A4272C8A3769005A761E /* ImagePicker.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ImagePicker.swift; sourceTree = ""; }; + F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = ""; }; + F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = ""; }; + F292B06F2D88B0C200BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = ""; }; + F292B0782D88B0D200BE6839 /* base64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = ""; }; + F292B0792D88B0D200BE6839 /* bpe_tokenizer_base.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer_base.h; sourceTree = ""; }; + F292B07A2D88B0D200BE6839 /* error.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = error.h; sourceTree = ""; }; + F292B07C2D88B0D200BE6839 /* llama2c_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = llama2c_tokenizer.h; sourceTree = ""; }; + F292B07D2D88B0D200BE6839 /* log.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = log.h; sourceTree = ""; }; + F292B07F2D88B0D200BE6839 /* result.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = result.h; sourceTree = ""; }; + F292B0812D88B0D200BE6839 /* tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = ""; }; + F292B0832D88B0D200BE6839 /* tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = ""; }; + F292B0FF2D88B20C00BE6839 /* llama_tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = llama_tiktoken.h; path = /Users/larryliu/CLionProjects/executorch/examples/models/llama/tokenizer/llama_tiktoken.h; sourceTree = ""; }; + F292B1002D88B20C00BE6839 /* llama_tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama_tiktoken.cpp; path = /Users/larryliu/CLionProjects/executorch/examples/models/llama/tokenizer/llama_tiktoken.cpp; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -214,7 +224,7 @@ 0324D69B2BAACB7C00DEF36F /* Exported */, 03729F062BB2035900152F2E /* runner */, 03729F0F2BB203E100152F2E /* sampler */, - 03729F0E2BB203D700152F2E /* tokenizer */, + 03729F0E2BB203D700152F2E /* tokenizers */, ); path = LLaMARunner; sourceTree = ""; @@ -264,20 +274,18 @@ path = ../../../../../../extension/llm/runner; sourceTree = ""; }; - 03729F0E2BB203D700152F2E /* tokenizer */ = { + 03729F0E2BB203D700152F2E /* tokenizers */ = { isa = PBXGroup; children = ( - 03DDA0FA2BD6368100D234B3 /* base64.h */, - 03729F142BB2043600152F2E /* bpe_tokenizer.cpp */, - 03BADE1F2BD2E88600DDFDC2 /* bpe_tokenizer.h */, - 038D678A2C482C1D00B88CF2 /* llama_tiktoken.cpp */, - 038D678B2C482C1E00B88CF2 /* llama_tiktoken.h */, - 03BADE212BD2EB6600DDFDC2 /* tiktoken.cpp */, - 03BADE222BD2EB6700DDFDC2 /* tiktoken.h */, - 03729F152BB2043600152F2E /* tokenizer.h */, - ); - name = tokenizer; - path = ../../../../../models/llama/tokenizer; + F292B0FF2D88B20C00BE6839 /* llama_tiktoken.h */, + F292B1002D88B20C00BE6839 /* llama_tiktoken.cpp */, + F292B0862D88B0D200BE6839 /* include */, + F292B06A2D88B0C200BE6839 /* bpe_tokenizer_base.cpp */, + F292B06C2D88B0C200BE6839 /* llama2c_tokenizer.cpp */, + F292B06F2D88B0C200BE6839 /* tiktoken.cpp */, + ); + name = tokenizers; + path = ../../../../../../extension/llm/tokenizers; sourceTree = ""; }; 03729F0F2BB203E100152F2E /* sampler */ = { @@ -290,6 +298,37 @@ path = ../../../../../../extension/llm/sampler; sourceTree = ""; }; + F292B0842D88B0D200BE6839 /* tokenizers */ = { + isa = PBXGroup; + children = ( + F292B0782D88B0D200BE6839 /* base64.h */, + F292B0792D88B0D200BE6839 /* bpe_tokenizer_base.h */, + F292B07A2D88B0D200BE6839 /* error.h */, + F292B07C2D88B0D200BE6839 /* llama2c_tokenizer.h */, + F292B07D2D88B0D200BE6839 /* log.h */, + F292B07F2D88B0D200BE6839 /* result.h */, + F292B0812D88B0D200BE6839 /* tiktoken.h */, + F292B0832D88B0D200BE6839 /* tokenizer.h */, + ); + path = tokenizers; + sourceTree = ""; + }; + F292B0852D88B0D200BE6839 /* pytorch */ = { + isa = PBXGroup; + children = ( + F292B0842D88B0D200BE6839 /* tokenizers */, + ); + path = pytorch; + sourceTree = ""; + }; + F292B0862D88B0D200BE6839 /* include */ = { + isa = PBXGroup; + children = ( + F292B0852D88B0D200BE6839 /* pytorch */, + ); + path = include; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXHeadersBuildPhase section */ @@ -297,16 +336,20 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( - 03BADE202BD2E88600DDFDC2 /* bpe_tokenizer.h in Headers */, - 03729F172BB2043600152F2E /* tokenizer.h in Headers */, 03729EE22BB1F93E00152F2E /* LLaMARunner.h in Headers */, 03D03DA82C7823620088D6A7 /* text_prefiller.h in Headers */, + F292B0882D88B0D200BE6839 /* llama2c_tokenizer.h in Headers */, + F292B0892D88B0D200BE6839 /* tokenizer.h in Headers */, + F292B08B2D88B0D200BE6839 /* result.h in Headers */, + F292B08D2D88B0D200BE6839 /* error.h in Headers */, + F292B08E2D88B0D200BE6839 /* bpe_tokenizer_base.h in Headers */, + F292B08F2D88B0D200BE6839 /* log.h in Headers */, + F292B0912D88B0D200BE6839 /* tiktoken.h in Headers */, + F292B0922D88B0D200BE6839 /* base64.h in Headers */, 03D03DAC2C7823830088D6A7 /* text_decoder_runner.h in Headers */, - 03DDA0FB2BD6368100D234B3 /* base64.h in Headers */, - 03BADE242BD2EB6700DDFDC2 /* tiktoken.h in Headers */, 03729F122BB2042B00152F2E /* sampler.h in Headers */, - 038D678D2C482C1E00B88CF2 /* llama_tiktoken.h in Headers */, 03729F0C2BB203B300152F2E /* util.h in Headers */, + F292B1012D88B20C00BE6839 /* llama_tiktoken.h in Headers */, 03729F0B2BB203B300152F2E /* runner.h in Headers */, 0372C3142C89418E00CD942A /* llava_runner.h in Headers */, ); @@ -475,11 +518,12 @@ buildActionMask = 2147483647; files = ( 03729EE12BB1F93800152F2E /* LLaMARunner.mm in Sources */, - 03BADE232BD2EB6700DDFDC2 /* tiktoken.cpp in Sources */, - 038D678C2C482C1E00B88CF2 /* llama_tiktoken.cpp in Sources */, 0372C3152C89418E00CD942A /* llava_runner.cpp in Sources */, + F292B1022D88B20C00BE6839 /* llama_tiktoken.cpp in Sources */, 03D03DAB2C7823830088D6A7 /* text_decoder_runner.cpp in Sources */, - 03729F162BB2043600152F2E /* bpe_tokenizer.cpp in Sources */, + F292B0752D88B0C200BE6839 /* tiktoken.cpp in Sources */, + F292B0762D88B0C200BE6839 /* llama2c_tokenizer.cpp in Sources */, + F292B0772D88B0C200BE6839 /* bpe_tokenizer_base.cpp in Sources */, 03729F0A2BB203B300152F2E /* runner.cpp in Sources */, 03729F132BB2042B00152F2E /* sampler.cpp in Sources */, 03D03DA72C7823620088D6A7 /* text_prefiller.cpp in Sources */, diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig index 2dddc6f1f7a..4db30506e82 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Debug.xcconfig @@ -16,7 +16,8 @@ OTHER_LDFLAGS = $(inherited) \ // Include headers and libraries from $(TEMP_DIR)/cmake for it. HEADER_SEARCH_PATHS = $(inherited) \ $(SRCROOT)/../../../../.. \ - $(TEMP_DIR)/cmake/include + $(TEMP_DIR)/cmake/include \ + $(SRCROOT)/../../../../extension/llm/tokenizers/include LIBRARY_SEARCH_PATHS = $(inherited) \ $(TEMP_DIR)/cmake/lib diff --git a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig index 2f860aa4d30..d30a2c7957b 100644 --- a/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig +++ b/examples/demo-apps/apple_ios/LLaMA/LLaMA/SupportingFiles/Release.xcconfig @@ -18,7 +18,8 @@ OTHER_LDFLAGS = $(inherited) \ // Include headers and libraries from $(TEMP_DIR)/cmake for it. HEADER_SEARCH_PATHS = $(inherited) \ $(SRCROOT)/../../../../.. \ - $(TEMP_DIR)/cmake/include + $(TEMP_DIR)/cmake/include \ + $(SRCROOT)/../../../../extension/llm/tokenizers/include LIBRARY_SEARCH_PATHS = $(inherited) \ $(TEMP_DIR)/cmake/lib diff --git a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj index aab1c7dc0e8..9fc1d47cb22 100644 --- a/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj +++ b/extension/benchmark/apple/Benchmark/Benchmark.xcodeproj/project.pbxproj @@ -9,9 +9,7 @@ /* Begin PBXBuildFile section */ 032A73CA2CAFBA8600932D36 /* LLaMATests.mm in Sources */ = {isa = PBXBuildFile; fileRef = 032A73C82CAFBA8600932D36 /* LLaMATests.mm */; }; 032A74182CAFBB7800932D36 /* text_decoder_runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A73DB2CAFBB7800932D36 /* text_decoder_runner.cpp */; }; - 032A741A2CAFBB7800932D36 /* bpe_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A73FA2CAFBB7800932D36 /* bpe_tokenizer.cpp */; }; 032A741D2CAFBB7800932D36 /* text_prefiller.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A73DD2CAFBB7800932D36 /* text_prefiller.cpp */; }; - 032A741E2CAFBB7800932D36 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A73FE2CAFBB7800932D36 /* tiktoken.cpp */; }; 032A741F2CAFBB7800932D36 /* sampler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A73E62CAFBB7800932D36 /* sampler.cpp */; }; 032A74232CAFC1B300932D36 /* runner.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A74222CAFC1B300932D36 /* runner.cpp */; }; 032A74262CAFC34800932D36 /* llama_tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 032A74252CAFC34800932D36 /* llama_tiktoken.cpp */; }; @@ -29,6 +27,9 @@ 03F181582D7262FC0058BDF9 /* kernels_optimized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181572D7262FC0058BDF9 /* kernels_optimized */; }; 03F1815A2D7262FC0058BDF9 /* kernels_portable in Frameworks */ = {isa = PBXBuildFile; productRef = 03F181592D7262FC0058BDF9 /* kernels_portable */; }; 03F1815C2D7262FC0058BDF9 /* kernels_quantized in Frameworks */ = {isa = PBXBuildFile; productRef = 03F1815B2D7262FC0058BDF9 /* kernels_quantized */; }; + F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */; }; + F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */; }; + F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F292B01A2D88AF3500BE6839 /* tiktoken.cpp */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -55,12 +56,6 @@ 032A73DF2CAFBB7800932D36 /* util.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = util.h; sourceTree = ""; }; 032A73E52CAFBB7800932D36 /* sampler.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sampler.h; sourceTree = ""; }; 032A73E62CAFBB7800932D36 /* sampler.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = sampler.cpp; sourceTree = ""; }; - 032A73F82CAFBB7800932D36 /* base64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = ""; }; - 032A73F92CAFBB7800932D36 /* bpe_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer.h; sourceTree = ""; }; - 032A73FA2CAFBB7800932D36 /* bpe_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = bpe_tokenizer.cpp; sourceTree = ""; }; - 032A73FD2CAFBB7800932D36 /* tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = ""; }; - 032A73FE2CAFBB7800932D36 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; path = tiktoken.cpp; sourceTree = ""; }; - 032A73FF2CAFBB7800932D36 /* tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = ""; }; 032A74212CAFC1B300932D36 /* runner.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = runner.h; path = ../../../../examples/models/llama/runner/runner.h; sourceTree = SOURCE_ROOT; }; 032A74222CAFC1B300932D36 /* runner.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = runner.cpp; path = ../../../../examples/models/llama/runner/runner.cpp; sourceTree = SOURCE_ROOT; }; 032A74242CAFC34800932D36 /* llama_tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = llama_tiktoken.h; path = ../../../../examples/models/llama/tokenizer/llama_tiktoken.h; sourceTree = SOURCE_ROOT; }; @@ -78,6 +73,17 @@ 03B2D3792C8A515C0046936E /* GenericTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = GenericTests.mm; sourceTree = ""; }; 03C7FA322C8AA24200E6E9AE /* Resources */ = {isa = PBXFileReference; lastKnownFileType = folder; path = Resources; sourceTree = SOURCE_ROOT; }; 03E7E6782CBDC1C900205E71 /* CoreMLTests.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CoreMLTests.mm; sourceTree = ""; }; + F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = bpe_tokenizer_base.cpp; path = src/bpe_tokenizer_base.cpp; sourceTree = ""; }; + F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = llama2c_tokenizer.cpp; path = src/llama2c_tokenizer.cpp; sourceTree = ""; }; + F292B01A2D88AF3500BE6839 /* tiktoken.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = tiktoken.cpp; path = src/tiktoken.cpp; sourceTree = ""; }; + F292B0222D88AF4800BE6839 /* base64.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = base64.h; sourceTree = ""; }; + F292B0232D88AF4800BE6839 /* bpe_tokenizer_base.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = bpe_tokenizer_base.h; sourceTree = ""; }; + F292B0242D88AF4800BE6839 /* error.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = error.h; sourceTree = ""; }; + F292B0262D88AF4800BE6839 /* llama2c_tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = llama2c_tokenizer.h; sourceTree = ""; }; + F292B0272D88AF4800BE6839 /* log.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = log.h; sourceTree = ""; }; + F292B0292D88AF4800BE6839 /* result.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = result.h; sourceTree = ""; }; + F292B02B2D88AF4800BE6839 /* tiktoken.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tiktoken.h; sourceTree = ""; }; + F292B02D2D88AF4800BE6839 /* tokenizer.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = tokenizer.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -111,7 +117,7 @@ children = ( 032A73E02CAFBB7800932D36 /* runner */, 032A73E92CAFBB7800932D36 /* sampler */, - 032A74022CAFBB7800932D36 /* tokenizer */, + 032A74022CAFBB7800932D36 /* tokenizers */, 032A73C82CAFBA8600932D36 /* LLaMATests.mm */, ); path = LLaMA; @@ -147,20 +153,18 @@ path = ../../../llm/sampler; sourceTree = SOURCE_ROOT; }; - 032A74022CAFBB7800932D36 /* tokenizer */ = { + 032A74022CAFBB7800932D36 /* tokenizers */ = { isa = PBXGroup; children = ( - 032A73F82CAFBB7800932D36 /* base64.h */, - 032A73F92CAFBB7800932D36 /* bpe_tokenizer.h */, - 032A73FA2CAFBB7800932D36 /* bpe_tokenizer.cpp */, + F292B0302D88AF4800BE6839 /* include */, + F292B0162D88AF3500BE6839 /* bpe_tokenizer_base.cpp */, + F292B0172D88AF3500BE6839 /* llama2c_tokenizer.cpp */, + F292B01A2D88AF3500BE6839 /* tiktoken.cpp */, 032A74242CAFC34800932D36 /* llama_tiktoken.h */, 032A74252CAFC34800932D36 /* llama_tiktoken.cpp */, - 032A73FD2CAFBB7800932D36 /* tiktoken.h */, - 032A73FE2CAFBB7800932D36 /* tiktoken.cpp */, - 032A73FF2CAFBB7800932D36 /* tokenizer.h */, ); - name = tokenizer; - path = ../../../llm/tokenizer; + name = tokenizers; + path = ../../../llm/tokenizers; sourceTree = SOURCE_ROOT; }; 03B0118D2CAC567900054791 /* TestUtils */ = { @@ -215,6 +219,37 @@ path = Tests; sourceTree = SOURCE_ROOT; }; + F292B02E2D88AF4800BE6839 /* tokenizers */ = { + isa = PBXGroup; + children = ( + F292B0222D88AF4800BE6839 /* base64.h */, + F292B0232D88AF4800BE6839 /* bpe_tokenizer_base.h */, + F292B0242D88AF4800BE6839 /* error.h */, + F292B0262D88AF4800BE6839 /* llama2c_tokenizer.h */, + F292B0272D88AF4800BE6839 /* log.h */, + F292B0292D88AF4800BE6839 /* result.h */, + F292B02B2D88AF4800BE6839 /* tiktoken.h */, + F292B02D2D88AF4800BE6839 /* tokenizer.h */, + ); + path = tokenizers; + sourceTree = ""; + }; + F292B02F2D88AF4800BE6839 /* pytorch */ = { + isa = PBXGroup; + children = ( + F292B02E2D88AF4800BE6839 /* tokenizers */, + ); + path = pytorch; + sourceTree = ""; + }; + F292B0302D88AF4800BE6839 /* include */ = { + isa = PBXGroup; + children = ( + F292B02F2D88AF4800BE6839 /* pytorch */, + ); + path = include; + sourceTree = ""; + }; /* End PBXGroup section */ /* Begin PBXNativeTarget section */ @@ -341,11 +376,12 @@ files = ( 03B0118E2CAC567900054791 /* DynamicTestCase.m in Sources */, 032A74182CAFBB7800932D36 /* text_decoder_runner.cpp in Sources */, - 032A741A2CAFBB7800932D36 /* bpe_tokenizer.cpp in Sources */, 032A741D2CAFBB7800932D36 /* text_prefiller.cpp in Sources */, - 032A741E2CAFBB7800932D36 /* tiktoken.cpp in Sources */, 032A741F2CAFBB7800932D36 /* sampler.cpp in Sources */, 03B011912CAD114E00054791 /* ResourceTestCase.m in Sources */, + F292B01D2D88AF3500BE6839 /* bpe_tokenizer_base.cpp in Sources */, + F292B0202D88AF3500BE6839 /* llama2c_tokenizer.cpp in Sources */, + F292B0212D88AF3500BE6839 /* tiktoken.cpp in Sources */, 03E7E6792CBDCAE900205E71 /* CoreMLTests.mm in Sources */, 032A74232CAFC1B300932D36 /* runner.cpp in Sources */, 03B2D37A2C8A515C0046936E /* GenericTests.mm in Sources */, diff --git a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig index 9891a952069..0172f28b1bb 100644 --- a/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig +++ b/extension/benchmark/apple/Benchmark/Tests/Tests.xcconfig @@ -16,7 +16,8 @@ OTHER_LDFLAGS = $(inherited) \ // Include headers and libraries from $(TEMP_DIR)/cmake for it. HEADER_SEARCH_PATHS = $(inherited) \ $(SRCROOT)/../../../../.. \ - $(TEMP_DIR)/cmake/include + $(TEMP_DIR)/cmake/include \ + $(SRCROOT)/../../../../extension/llm/tokenizers/include LIBRARY_SEARCH_PATHS = $(inherited) \ $(TEMP_DIR)/cmake/lib diff --git a/extension/llm/tokenizers b/extension/llm/tokenizers index ec61ab1489e..d70f5a76055 160000 --- a/extension/llm/tokenizers +++ b/extension/llm/tokenizers @@ -1 +1 @@ -Subproject commit ec61ab1489e2d0fb6ac82b39288ce505bf8bdeca +Subproject commit d70f5a760552d8d3bb288cdd93eebde477bb6eb0