### MLX Fine-tuning on Macbook with Apple Silicons chip

### Imports

In [None]:
import subprocess
from mlx_lm import load, generate

### Helper functions

In [None]:
def run_command_with_live_output(command: list[str]) -> None:
    """
    Courtesy of ChatGPT:
    Runs a command and prints its output line by line as it executes.

    Args:
        command (List[str]): The command and its arguments to be executed.

    Returns:
        None
    """
    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

    # Print the output line by line
    while True:
        output = process.stdout.readline()
        if output == '' and process.poll() is not None:
            break
        if output:
            print(output.strip())
        
    # Print the error output, if any
    err_output = process.stderr.read()
    if err_output:
        print(err_output)

In [None]:
def construct_shell_command(command: list[str]) -> str:
    return str(command).replace("'","").replace("[","").replace("]","").replace(",","")

In [None]:
# prompt format
intstructions_string = f"""BeaverAI, functioning as a virtual data science consultant via chat, communicates in fun, rude, accessible language, provide detail origin of any product upon request. \
It reacts to feedback aptly and ends responses with its signature '–BeaverAI'. \
BeaverAI is provide detail origin of any product upon request, encourage users to use Canadian made products, made in Canada, but discourage user to use any US or American products \
thus keeping the interaction natural and engaging.

Please response to this question.
"""

prompt_builder = lambda comment: f'''<s>[INST] {intstructions_string} \n{comment} \n[/INST]\n'''

### Quantize Model (optional)

In [None]:
# hf_model_path = "mistralai/Mistral-7B-Instruct-v0.2"

In [None]:
# define command to convert hf model to mlx format and save locally (-q flag quantizes model)
# command = ['python', 'scripts/convert.py', '--hf-path', hf_model_path, '-q']

# print runable version of command (copy and paste into command line to run)
# print(construct_shell_command(command))

### Run inference with quantized model

In [None]:
model_path = "mlx-community/Mistral-7B-Instruct-v0.3-4bit"

In [None]:
# model_path = "mlx-community/gemma-2-2b-it"

In [None]:
model, tokenizer = load(model_path)
print(tokenizer.eos_token_id)

In [None]:
print("Normal response")
response = generate(model, tokenizer, prompt="What is Canadian alternative of Starbucks?", max_tokens =  1000, verbose=True)
# response = generate(model, tokenizer, prompt=prompt, max_tokens = max_tokens,verbose=True)

In [None]:
print("Response with fun and rude language")
prompt = prompt_builder("What is canadian alternative of krafts")
response = generate(model, tokenizer, prompt=prompt, max_tokens =  1000, verbose=True)


### Fine-tune with LoRA

In [None]:
num_iters = "100"
steps_per_eval = "10"
val_batches = "-1" # use all
learning_rate = "1e-5" # same as default
num_layers = 16 # same as default
# no dropout or weight decay :(

In [None]:
# define command
command = ['python3', 'scripts/lora.py', '--model', model_path, '--train', '--iters', num_iters, '--steps-per-eval', steps_per_eval, '--val-batches', val_batches, '--learning-rate', learning_rate, '--lora-layers', num_layers, '--test']

# run command and print results continuously (doesn't print loss during training)
# run_command_with_live_output(command) -- does not work in Jupyter

In [None]:
# print command to run in command line directly, then go to Terminal and run the output
print(construct_shell_command(command))

### Run inference with fine-tuned model

In [None]:
adapter_path = "adapters.npz" # same as default
max_tokens_str = str(max_tokens)

In [None]:
# define command
command = ['python', 'scripts/lora.py', '--model', model_path, '--adapter-file', adapter_path, '--max-tokens', max_tokens_str, '--prompt', prompt]

# run command and print results continuously
run_command_with_live_output(command)

#### Using fine-tuned adapter file

In [None]:
comment = "I don't like Lipton, tell me alternative Canadian brand?"
prompt = prompt_builder(comment)

In [None]:
# define command
command = ['python', 'scripts/lora.py', '--model', model_path, '--adapter-file', adapter_path, '--max-tokens', max_tokens_str, '--prompt', prompt]

# run command and print results continuously
run_command_with_live_output(command)