Fix abi #877

Workflow file for this run

.github/workflows/causal_lm_cpp.yml at 0532cf2

	name: causal_lm_cpp
	on:
	pull_request:
	paths:
	- .github/workflows/causal_lm_cpp.yml
	- text_generation/causal_lm/cpp/*
	- thirdparty/openvino_tokenizers
	- "!**.md"
	concurrency:
	group: ${{ github.workflow }}-${{ github.ref }}
	cancel-in-progress: true

	jobs:
	cpp-greedy_causal_lm-ubuntu:
	runs-on: ubuntu-20.04-8-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: greedy_causal_lm
	run: \|
	source ./ov/setupvars.sh
	./build/text_generation/causal_lm/cpp/greedy_causal_lm ./open_llama_3b_v2/ "return 0"

	cpp-beam_search_causal_lm-ubuntu:
	runs-on: ubuntu-20.04
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Compare
	run: \|
	source ./ov/setupvars.sh

	timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('Why is the Sun yellow?', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "Why is the Sun yellow?" passed

	timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('69', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "69" passed

	timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('Hi', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "Hi" passed

	timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('return 0', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "return 0" passed

	./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好！你好嗎？" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	tokenized = tokenizer('你好！你好嗎？', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "你好！你好嗎？" passed

	timeout 1m ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好！你好嗎？" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0')
	prompts = [
	'Alan Turing was a',
	'return 0',
	'你好！你好嗎？'
	]
	for prompt in prompts:
	tokenized = tokenizer(prompt, return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "Multi prompt" passed
	cpp-beam_search_causal_lm-windows:
	runs-on: windows-latest
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	shell: bash
	run: \|
	curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/windows/w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64.zip
	unzip ov.zip
	- name: Download, convert and build
	shell: cmd
	run: \|
	call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat
	python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Compare
	shell: cmd
	run: \|
	call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat
	set PATH=.\build\openvino_genai\;%PATH%
	.\build\text_generation\causal_lm\cpp\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt

	echo import transformers > ref.py
	echo predictions = open('pred.txt', 'r').read() >> ref.py
	echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py
	echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py
	echo for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False): >> ref.py
	echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py
	echo idx = predictions.find(ref) >> ref.py
	echo if -1 == idx: >> ref.py
	echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
	echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
	python ref.py

	cpp-beam_search_causal_lm-Qwen-7B-Chat:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt

	cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Run
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好！" > ./pred_qwen15.txt

	cpp-beam_search_causal_lm-Phi-2:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j 15
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt

	cpp-beam_search_causal_lm-notus-7b-v1:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt

	cpp-speculative_decoding_lm-ubuntu:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: run and compare
	run: \|
	source ./ov/setupvars.sh
	./build/text_generation/causal_lm/cpp/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt
	./build/text_generation/causal_lm/cpp/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt
	python -c "
	with open('predictions_greedy.txt', 'r') as f:
	predicted_greedy = f.readline()
	with open('predictions_speculative.txt', 'r') as f:
	predicted_speculative = f.readline()
	assert predicted_greedy == predicted_speculative
	"
	echo "Alan Turing was a" passed


	cpp-prompt_lookup_decoding_lm-ubuntu:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- name: run and compare
	run: \|
	source ./ov/setupvars.sh

	echo 'Code:```python
	def add(a, b):
	return a + b
	```
	Question: Can you please add 2 and 3
	A:' > ./prompt.txt

	./build/text_generation/causal_lm/cpp/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt
	./build/text_generation/causal_lm/cpp/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt
	python -c "
	with open('predictions_greedy.txt', 'r') as f:
	predicted_greedy = f.readline()
	with open('predictions_prompt_lookup.txt', 'r') as f:
	predicted_prompt_lookup = f.readline()
	assert predicted_greedy == predicted_prompt_lookup
	"
	echo "Prompt lookup" passed

	cpp-Phi-1_5:
	runs-on: ubuntu-20.04-16-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j 15
	- name: Run Generation
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
	timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
	- name: Compare
	run: \|
	python -c "
	import transformers
	with open('pred_greedy.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.AutoTokenizer.from_pretrained('microsoft/phi-1_5')
	tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
	for output in transformers.AutoModelForCausalLM.from_pretrained('microsoft/phi-1_5').generate(**tokenized, max_length=100, do_sample=False):
	ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo Phi-1_5 passed

	cpp-greedy_causal_lm-redpajama-3b-chat:
	runs-on: ubuntu-20.04-4-cores
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt
	python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release
	sudo apt-get install libtbb-dev
	optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat
	cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
	cmake --build ./build/ --config Release -j
	- run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/ --output ./redpajama-3b-chat/ --with-detokenizer --trust-remote-code
	- name: Run Generation
	run: \|
	source ./ov/setupvars.sh
	timeout 50s ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt
	- name: Compare
	run: \|
	python -c "
	import transformers
	with open('pred_greedy.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat')
	tokenized = tokenizer('Alan Turing was a', return_tensors='pt')
	for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False):
	ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True)
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo "Alan Turing was a" passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix abi #877

Workflow file

Fix abi #877

Jobs

Run details

Workflow file for this run