In [1]:
import pandas as pd 

df = pd.read_csv('python_50k.csv')
df.head()

Unnamed: 0,data
0,Instruction:\nHelp me set up my daily to-do li...
1,Instruction:\nCreate a shopping list based on ...
2,Instruction:\nCalculate how much time I spend ...
3,Instruction:\nHelp me split the bill among my ...
4,Instruction:\nOrganize my movie list into genr...


In [2]:
import random

data = df['data'].tolist()

print(random.choice(data))

Instruction:

```
class First(object):
    def __init__(self):
        print("first")

class Second(First):
    def __init__(self):
        print("second")

class Third(First, Second):
    def __init__(self):
        print("third")

```

[Source](https://stackoverflow.com/a/3277407)


Why can't Python create a consistent MRO? It seems to me it's pretty clear:


1. Search in First if method does not exist in Third
2. Search in Second if method does not exist in First


But if you try it out:



```
TypeError: Error when calling the metaclass bases
    Cannot create a consistent method resolution
order (MRO) for bases First, Second

```


Response:
The reason why Python cannot create a consistent method resolution order (MRO) for this example is because the diamond-shaped inheritance relationship between First, Second, and Third violates the C3 linearization algorithm, which is used to determine the MRO.

In this case, since Third inherits from both First and Second, and Second also inhe

In [3]:
import os

os.environ["KERAS_BACKEND"] = "jax" #or torch or tensorflow
os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"]="1.00"

In [None]:
import keras_nlp

gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en")

In [None]:
gemma_lm.summary()

In [None]:
gemma_lm.backbone.enable_lora(rank=4)

In [None]:
def split_data(data, num_splits: int, split_size: int):
    """
    Splits a list of data into a specified number of parts, each of a given size.

    Args:
        data: The list of data to be split.
        num_splits (int): The number of parts to divide the data into.
        split_size (int): The number of elements each part should contain.

    Returns:
        splits: array, where each inner list represents a split of the 
                original data containing `split_size` elements, except possibly 
                for the last split.

    Example usage:
        data = [1, 2, 3, ..., 50000]
        splits = split_data(data, num_splits=5, split_size=10000)
        # This will split `data` into 5 parts, each with 10000 elements.
    """
    
    splits = [data[i*split_size:(i+1)*split_size] for i in range(num_splits)]

    for i, split in enumerate(splits, start=1):
        print(f'Length from split {i}: {len(split)}')

    return splits

In [None]:
splits = split_data(data=data, num_splits=5, split_size=10000)

In [None]:
del data

In [None]:
import keras

# Limit the input sequence length to 512 (to control memory usage).
gemma_lm.preprocessor.sequence_length = 256
# Use AdamW (a common optimizer for transformer models).
optimizer = keras.optimizers.AdamW(
    learning_rate=5e-6,
    weight_decay=0.01,
)
# Exclude layernorm and bias terms from decay.
optimizer.exclude_from_weight_decay(var_names=["bias", "scale"])

gemma_lm.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=optimizer,
    weighted_metrics=[keras.metrics.SparseCategoricalAccuracy()],
)
gemma_lm.fit(splits[0], epochs=1, batch_size=1)

In [None]:
test_example = random.choice(splits[0])
print(test_example)

In [None]:
instruction="Write a Python script to check whether a given number is in between two given values num = 10, lowerBound = 9, upperBound = 11"
response=""
prompt = f"Instruction:\n{instruction}\n\nResponse:\n{response}"
print(gemma_lm.generate(prompt, max_length=512))

In [None]:
gemma_lm.save("version_finetuned.keras")