### requirements.txt :

accelerate <br>
cchardet<br>
einops<br>
evaluate<br>
fastapi<br>
fschat==0.2.35<br>
huggingface_hub<br>
intel_extension_for_pytorch==2.3.0<br>
lm-eval<br>
neural-compressor<br>
neural_speed==1.0a0<br>
numpy==1.23.5<br>
onnx>=1.15.0<br>
optimum<br>
optimum-intel<br>
peft==0.6.2<br>
pydantic==1.10.13<br>
python-dotenv<br>
python-multipart<br>
rouge_score<br>
sacremoses<br>
shortuuid<br>
starlette<br>
tensorflow>=2.13.0<br>
torch==2.3.0<br>
torchaudio==2.3.0<br>
transformers>=4.35.2<br>
transformers_stream_generator<br>
uvicorn<br>
vllm<br>
yacs<br>


### requirements_cpu.txt :

--extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ <br>
accelerate==0.28.0<br>
cchardet<br>
einops<br>
evaluate<br>
fastapi<br>
fschat==0.2.32<br>
huggingface_hub<br>
intel_extension_for_pytorch==2.3.0<br>
lm-eval<br>
neural-compressor<br>
neural_speed==1.0a0<br>
numpy==1.23.5<br>
oneccl_bind_pt<br>
optimum<br>
optimum-intel<br>
peft==0.6.2<br>
pydantic==1.10.13<br>
python-dotenv<br>
python-multipart<br>
rouge_score<br>
sacremoses<br>
shortuuid<br>
starlette<br>
tiktoken==0.4.0<br>
torch==2.3.0<br>
torchaudio==2.3.0<br>
transformers>=4.35.2<br>
transformers_stream_generator<br>
uvicorn<br>
yacs<br>


### Building the chatbot on spr


In [None]:
# BF16 Optimization
!pip install intel_extension_for_transformers
from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
from intel_extension_for_transformers.transformers import MixedPrecisionConfig
config = PipelineConfig(optimization_config=MixedPrecisionConfig())
chatbot = build_chatbot(config)

In [None]:
# response generaton
response = chatbot.predict(query="Tell me about Intel Xe processors.")
print(response)
response = chatbot.predict(query="What cooling technology is used for new Intel CPUs?")
print(response)
response = chatbot.predict(query="What are the newer processors provided by Intel?")
print(response)

### Single Node finetuning on spr


# To deal with inconsistency

import json

# Load the JSON file

file_path = './reduced_alpaca_data.json'

with open(file_path, 'r') as file:
data = json.load(file)

# Inspect the data for inconsistencies

for idx, record in enumerate(data): # Check if 'column' is an object or an array
if isinstance(record.get('column_name'), (list, tuple)):
continue
elif isinstance(record.get('column_name'), dict): # If the column is an object, convert it to an array
record['column_name'] = [record['column_name']]
else:
print(f"Inconsistent data type at row {idx}: {record.get('column_name')}")

# Save the corrected JSON file

corrected_file_path = './reduced_alpaca_data_corrected.json'
with open(corrected_file_path, 'w') as file:
json.dump(data, file, indent=2)

print(f"Corrected JSON file saved to {corrected_file_path}")

# Finetuning

from transformers import TrainingArguments
from intel_extension_for_transformers.neural_chat.config import (
ModelArguments,
DataArguments,
FinetuningArguments,
TextGenerationFinetuningConfig,
)
from intel_extension_for_transformers.neural_chat.chatbot import finetune_model
model_args = ModelArguments(model_name_or_path="meta-llama/Meta-Llama-3-8B")
data_args = DataArguments(train_file="reduced_alpaca_data_corrected.json", validation_split_percentage=1)
training_args = TrainingArguments(
output_dir='./tmp',
do_train=True,
do_eval=True,
num_train_epochs=3,
overwrite_output_dir=True,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
gradient_accumulation_steps=2,
save_strategy="no",
log_level="info",
save_total_limit=2,
bf16=True,
)
finetune_args = FinetuningArguments()
finetune_cfg = TextGenerationFinetuningConfig(
model_args=model_args,
data_args=data_args,
training_args=training_args,
finetune_args=finetune_args,
)
finetune_model(finetune_cfg)
