In [23]:
!pip install gradio
!pip install transformers
!pip install keras-nlp
!pip install --upgrade kagglehub


Collecting kagglehub
  Downloading kagglehub-0.3.4-py3-none-any.whl.metadata (22 kB)
Downloading kagglehub-0.3.4-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.0/43.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kagglehub
  Attempting uninstall: kagglehub
    Found existing installation: kagglehub 0.3.3
    Uninstalling kagglehub-0.3.3:
      Successfully uninstalled kagglehub-0.3.3
Successfully installed kagglehub-0.3.4


In [24]:
import kagglehub

# Download the latest version of the model from Kaggle
path = kagglehub.model_download("oluidiakhoa/bert/keras/finetuneolu_gemma2_c10")

# Print the local path where the model files are saved
print("Path to model files:", path)



Path to model files: /root/.cache/kagglehub/models/oluidiakhoa/bert/keras/finetuneolu_gemma2_c10/2


In [1]:
import gradio as gr
import keras_nlp
from keras_nlp.models import GemmaCausalLM
from keras_nlp.samplers import TopKSampler
from tensorflow.keras import mixed_precision
import os

# Optional: Disable GPU if necessary due to memory constraints
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Enable mixed precision to save memory
mixed_precision.set_global_policy("mixed_float16")

# Define the template for prompting the model
template = "Instruction:\n{instruction}\n\nResponse:\n{response}"

# Load the fine-tuned Gemma model from the specified path
model_path = "/root/.cache/kagglehub/models/oluidiakhoa/bert/keras/finetuneolu_gemma2_c10/2"

# Load the Gemma model
try:
    finetuned_model = GemmaCausalLM.from_preset(model_path)
except Exception as e:
    print("Error loading model:", str(e))
    exit()

# Set up a Top-K Sampler to control the model's output diversity
sampler = TopKSampler(k=5, seed=2)

# Compile the model with the sampler
finetuned_model.compile(sampler=sampler)

# Function to generate a response from the model
def generate_response(instruction):
    # Format the prompt using the template
    prompt = template.format(instruction=instruction, response="")
    try:
        # Generate text based on the input instruction
        response = finetuned_model.generate(prompt, max_length=128)  # Reduce max_length for memory efficiency
        return response
    except Exception as e:
        return f"Error generating response: {str(e)}"

# Set up a Gradio interface for the model
interface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="Medical Language Model",
    description=(
        "Ask a medical question and receive a response based on a fine-tuned Gemma model. "
        "This model is optimized for medical text generation."
    ),
    examples=[
        ["What are the symptoms of diabetes?"],
        ["Explain the medical definition of 'myelodysplastic syndrome'."],
        ["What are the treatment options for hypertension?"]
    ],
)

# Launch the Gradio interface
if __name__ == "__main__":
    interface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f278b77cf680b62d0d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
