In [None]:
import os
from google.colab import userdata

# Note: userdata.get is a Colab API. If you're not using Colab, set the env
# vars as appropriate for your system.

os.environ["KAGGLE_USERNAME"] = userdata.get('KAGGLE_USERNAME')
os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')

### Install dependencies

Install Keras, KerasNLP, and other dependencies.

In [None]:
# Install Keras 3 last. See https://keras.io/getting_started/ for more details.
!pip install -q -U keras-nlp
!pip install -q -U keras>=3

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m571.8/571.8 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m601.3/601.3 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m102.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m90.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m37.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m76.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m347.7/347.7 kB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency re

### Select a backend

Keras is a high-level, multi-framework deep learning API designed for simplicity and ease of use. Using Keras 3, you can run workflows on one of three backends: TensorFlow, JAX, or PyTorch.

For this tutorial, configure the backend for JAX.

In [None]:
os.environ["KERAS_BACKEND"] = "jax"  # Or "torch" or "tensorflow".
# Avoid memory fragmentation on JAX backend.
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"

### Import packages

Import Keras and KerasNLP.

In [None]:
import keras
import keras_nlp

## Load Dataset

In [None]:
# !pip install datasets
!pip install huggingface_hub
from huggingface_hub import hf_hub_download



In [None]:
from huggingface_hub import hf_hub_download
import pandas as pd


repo_id = "iamtarun/python_code_instructions_18k_alpaca"
df = pd.read_parquet("/content/train-00000-of-00001-8b6e212f3e1ece96.parquet")

import json

data = []

for index, col in df.iterrows():
    instruction = col['instruction']
    response = col['output']

    template = "Instruction:\n{instruction}\n\nResponse:\n{response}"
    data.append(template.format(instruction=instruction, response=response))

data = data[:1000]

In [None]:

with open("transformed_dataset.jsonl", "w") as file:
    for item in data:
        file.write(json.dumps({"text": item}) + "\n")


print(data[:5])  

['Instruction:\nCreate a function to calculate the sum of a sequence of integers.\n\nResponse:\n# Python code\ndef sum_sequence(sequence):\n  sum = 0\n  for num in sequence:\n    sum += num\n  return sum', "Instruction:\nGenerate a Python code for crawling a website for a specific type of data.\n\nResponse:\nimport requests\nimport re\n\ndef crawl_website_for_phone_numbers(website):\n    response = requests.get(website)\n    phone_numbers = re.findall('\\d{3}-\\d{3}-\\d{4}', response.text)\n    return phone_numbers\n    \nif __name__ == '__main__':\n    print(crawl_website_for_phone_numbers('www.example.com'))", 'Instruction:\nCreate a Python list comprehension to get the squared values of a list [1, 2, 3, 5, 8, 13].\n\nResponse:\n[x*x for x in [1, 2, 3, 5, 8, 13]]', 'Instruction:\nGenerate a python script to perform this action.\n\nResponse:\ndef remove_duplicates(string): \n    result = "" \n    prev = \'\' \n\n    for char in string:\n        if char != prev: \n            result +=

## Load Model

KerasNLP provides implementations of many popular [model architectures](https://keras.io/api/keras_nlp/models/){:.external}. In this tutorial, you'll create a model using `GemmaCausalLM`, an end-to-end Gemma model for causal language modeling. A causal language model predicts the next token based on previous tokens.

Create the model using the `from_preset` method:

In [None]:
# https://keras.io/api/keras_nlp/models/gemma/gemma_causal_lm/
gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("code_gemma_2b_en")
gemma_lm.summary()

Downloading from https://www.kaggle.com/api/v1/models/keras/codegemma/keras/code_gemma_2b_en/1/download/metadata.json...


100%|██████████| 142/142 [00:00<00:00, 408kB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/codegemma/keras/code_gemma_2b_en/1/download/config.json...


100%|██████████| 554/554 [00:00<00:00, 525kB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/codegemma/keras/code_gemma_2b_en/1/download/model.weights.h5...


100%|██████████| 4.67G/4.67G [02:32<00:00, 32.8MB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/codegemma/keras/code_gemma_2b_en/1/download/tokenizer.json...


100%|██████████| 401/401 [00:00<00:00, 491kB/s]


Downloading from https://www.kaggle.com/api/v1/models/keras/codegemma/keras/code_gemma_2b_en/1/download/assets/tokenizer/vocabulary.spm...


100%|██████████| 4.04M/4.04M [00:00<00:00, 9.25MB/s]


The `from_preset` method instantiates the model from a preset architecture and weights. In the code above, the string "code_gemma_2b_en" specifies the preset architecture — a Gemma model with 2 billion parameters.

NOTE: A Gemma model with 7
billion parameters is also available. To run the larger model in Colab, you need access to the premium GPUs available in paid plans. Alternatively, you can perform [distributed tuning on a Gemma 7B model](https://ai.google.dev/gemma/docs/distributed_tuning) on Kaggle or Google Cloud.

## Inference before fine tuning

In this section, you will query the model with various prompts to see how it responds.

In [None]:
prompt = template.format(
    instruction="write me  code to print fibonacci series",
    response="",
)
sampler = keras_nlp.samplers.TopKSampler(k=5, seed=2)
gemma_lm.compile(sampler=sampler)
print(gemma_lm.generate(prompt, max_length=256))

Instruction:
write me  code to print fibonacci series

Response:
function fibo(x)
{
    let a=0;
    let b=1;
    let c;
    for i = 0; i < x; i++
    {
        c=a+b;
        a=b;
        b=c;
        console.log("fibonacci series:",c);
    }

}

fibo(10);<|file_separator|>


In [None]:
prompt = template.format(
    instruction="write me  code to fibonacii series using recrusive functions",
    response="",
)
print(gemma_lm.generate(prompt, max_length=256))

Instruction:
write me  code to fibonacii series using recrusive functions

Response:
<|file_separator|>


The model response contains words that might not be easy to understand for a child such as chlorophyll.

## LoRA Fine-tuning

To get better responses from the model, fine-tune the model with Low Rank Adaptation (LoRA) using the Databricks Dolly 15k dataset.

The LoRA rank determines the dimensionality of the trainable matrices that are added to the original weights of the LLM. It controls the expressiveness and precision of the fine-tuning adjustments.

A higher rank means more detailed changes are possible, but also means more trainable parameters. A lower rank means less computational overhead, but potentially less precise adaptation.

This tutorial uses a LoRA rank of 4. In practice, begin with a relatively small rank (such as 4, 8, 16). This is computationally efficient for experimentation. Train your model with this rank and evaluate the performance improvement on your task. Gradually increase the rank in subsequent trials and see if that further boosts performance.

In [None]:
# Enable LoRA for the model and set the LoRA rank to 4.
gemma_lm.backbone.enable_lora(rank=4)
gemma_lm.summary()

Note that enabling LoRA reduces the number of trainable parameters significantly (from 2.5 billion to 1.3 million).

In [None]:
# Limit the input sequence length to 512 (to control memory usage).
gemma_lm.preprocessor.sequence_length = 512
# Use AdamW (a common optimizer for transformer models).
optimizer = keras.optimizers.AdamW(
    learning_rate=5e-5,
    weight_decay=0.01,
)
# Exclude layernorm and bias terms from decay.
optimizer.exclude_from_weight_decay(var_names=["bias", "scale"])

gemma_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
)

gemma_lm.fit(data, epochs=1, batch_size=1)

[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1404s[0m 1s/step - loss: 0.2607 - sparse_categorical_accuracy: 0.7913


<keras.src.callbacks.history.History at 0x7ec2b057bf40>

### Note on mixed precision fine-tuning on NVIDIA GPUs

Full precision is recommended for fine-tuning. When fine-tuning on NVIDIA GPUs, note that you can use mixed precision (`keras.mixed_precision.set_global_policy('mixed_bfloat16')`) to speed up training with minimal effect on training quality. Mixed precision fine-tuning does consume more memory so is useful only on larger GPUs.


For inference, half-precision (`keras.config.set_floatx("bfloat16")`) will work and save memory while mixed precision is not applicable.

In [None]:
# Uncomment the line below if you want to enable mixed precision training on GPUs
# keras.mixed_precision.set_global_policy('mixed_bfloat16')

## Inference after fine-tuning
After fine-tuning, responses follow the instruction provided in the prompt.

### Europe Trip Prompt

In [None]:
prompt = template.format(
    instruction="write me  code to print fibonacci series",
    response="",
)
sampler = keras_nlp.samplers.TopKSampler(k=5, seed=2)
gemma_lm.compile(sampler=sampler)
print(gemma_lm.generate(prompt, max_length=256))

Instruction:
write me  code to print fibonacci series

Response:
a=0
b=1
print(a)
print(b)
for i in range(0,10):
    print(a+b)
    a=b
    b=a+b


In [None]:
prompt = template.format(
    instruction="write me  code to fibonacii series using recrusive functions",
    response="",
)
print(gemma_lm.generate(prompt, max_length=256))

Instruction:
write me  code to fibonacii series using recrusive functions

Response:
def fib_iterative(n):
    a = 0
    b = 1

    for i in range(n):
        c = a
        a = b
        b += c 

    return a

print(fib_iterative(5))
<|file_separator|>


Note that for demonstration purposes, this tutorial fine-tunes the model on a small subset of the dataset for just one epoch and with a low LoRA rank value. To get better responses from the fine-tuned model, you can experiment with:

1. Increasing the size of the fine-tuning dataset
2. Training for more steps (epochs)
3. Setting a higher LoRA rank
4. Modifying the hyperparameter values such as `learning_rate` and `weight_decay`.