In [None]:
# Uninstall existing libraries to ensure clean installation
#!pip uninstall -y transformers tensorflow keras scikit-learn numpy

# Install necessary libraries, allowing compatible versions to be installed
!pip install transformers scikit-learn numpy tensorflow-intel tensorflow --upgrade

Collecting scikit-learn
  Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Collecting numpy
  Downloading numpy-2.3.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-intel
  Downloading tensorflow_intel-0.0.1-py3-none-any.whl.metadata (582 bytes)
Collecting tensorflow
  Downloading tensorflow-2.19.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting pyinstaller (from tensorflow-intel)
  Downloading pyinstaller-6.14.1-py3-none-manylinux2014_x86_64.whl.metadata (8.3 kB)
Collecting twine (from tensorflow-intel)
  Downloading twine-6.1.0-py3-none-any.whl.metadata (3.7 kB)
Collecting tensorboard~=2.19.0 (from tensorflow)
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting numpy
  Downloading numpy-2.1.3-cp311-cp311-man

In [None]:
import kagglehub
# Download latest version
path = kagglehub.dataset_download("niyarrbarman/symptom2disease")

print("Path to dataset files:", path)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

Path to dataset files: /kaggle/input/symptom2disease
/kaggle/input/symptom2disease/Symptom2Disease.csv


In [None]:
import pandas as pd
try:
    df = pd.read_csv('/kaggle/input/symptom2disease/Symptom2Disease.csv', encoding='latin1', sep=',')
except UnicodeDecodeError:
    df = pd.read_csv('/kaggle/input/symptom2disease/Symptom2Disease.csv', encoding='cp1252', sep=',')
df.rename(columns={'text': 'symptom', 'label': 'diagnosis'}, inplace=True)
df.head()

Unnamed: 0.1,Unnamed: 0,diagnosis,symptom
0,0,Psoriasis,I have been experiencing a skin rash on my arm...
1,1,Psoriasis,"My skin has been peeling, especially on my kne..."
2,2,Psoriasis,I have been experiencing joint pain in my fing...
3,3,Psoriasis,"There is a silver like dusting on my skin, esp..."
4,4,Psoriasis,"My nails have small dents or pits in them, and..."


In [None]:
# Import libraries
## Setting up the environment

import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification, TFBertModel
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np

In [None]:
# Check TensorFlow version
print("TensorFlow version:", tf.__version__)

## Mock Data Generation (Replace with your actual dataset)
import kagglehub


# Create a dummy dataset of patient symptoms and corresponding diagnoses
"""
data = {
    'symptom': [
        "I have a headache and a sore throat.",
        "I feel very tired and have a fever.",
        "My stomach hurts and I feel nauseous.",
        "I have a persistent cough and difficulty breathing.",
        "I have a rash on my skin and it itches.",
        "I have joint pain and stiffness.",
        "I have blurred vision and dizziness.",
        "I have chest pain and shortness of breath.",
        "I have difficulty sleeping and feel anxious.",
        "I have a runny nose and sneezing."
    ],
    'diagnosis': [
        "Common Cold",
        "Flu",
        "Gastroenteritis",
        "Bronchitis",
        "Allergy",
        "Arthritis",
        "Migraine",
        "Angina",
        "Anxiety",
        "Allergy"
    ]
}

df = pd.DataFrame(data)
"""
# Map diagnoses to numerical labels
unique_diagnoses = df['diagnosis'].unique().tolist()
diagnosis_to_label = {diagnosis: i for i, diagnosis in enumerate(unique_diagnoses)}
df['label'] = df['diagnosis'].map(diagnosis_to_label)
num_labels = len(unique_diagnoses)

print("\nSample Data:")
print(df.head())
print("\nDiagnosis to Label Mapping:")
print(diagnosis_to_label)

TensorFlow version: 2.19.0

Sample Data:
   Unnamed: 0  diagnosis                                            symptom  \
0           0  Psoriasis  I have been experiencing a skin rash on my arm...   
1           1  Psoriasis  My skin has been peeling, especially on my kne...   
2           2  Psoriasis  I have been experiencing joint pain in my fing...   
3           3  Psoriasis  There is a silver like dusting on my skin, esp...   
4           4  Psoriasis  My nails have small dents or pits in them, and...   

   label  
0      0  
1      0  
2      0  
3      0  
4      0  

Diagnosis to Label Mapping:
{'Psoriasis': 0, 'Varicose Veins': 1, 'Typhoid': 2, 'Chicken pox': 3, 'Impetigo': 4, 'Dengue': 5, 'Fungal infection': 6, 'Common Cold': 7, 'Pneumonia': 8, 'Dimorphic Hemorrhoids': 9, 'Arthritis': 10, 'Acne': 11, 'Bronchial Asthma': 12, 'Hypertension': 13, 'Migraine': 14, 'Cervical spondylosis': 15, 'Jaundice': 16, 'Malaria': 17, 'urinary tract infection': 18, 'allergy': 19, 'gastroesoph

In [None]:
## Data Preprocessing
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['symptom'], df['label'], test_size=0.2, random_state=42)

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize and encode the text data
def encode_text(tokenizer, text_list, max_length=128):
    return tokenizer(
        text_list,
        add_special_tokens=True,
        max_length=max_length,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_token_type_ids=True,
        return_tensors='tf'
    )

max_length = 128 # You can adjust this based on your data

train_encodings = encode_text(tokenizer, X_train.tolist(), max_length=max_length)
test_encodings = encode_text(tokenizer, X_test.tolist(), max_length=max_length)

# Create TensorFlow Datasets
train_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': train_encodings['input_ids'],
        'token_type_ids': train_encodings['token_type_ids'],
        'attention_mask': train_encodings['attention_mask']
    },
    tf.constant(y_train.values)
))

test_dataset = tf.data.Dataset.from_tensor_slices((
    {
        'input_ids': test_encodings['input_ids'],
        'token_type_ids': test_encodings['token_type_ids'],
        'attention_mask': test_encodings['attention_mask']
    },
    tf.constant(y_test.values)
))

# Batch the datasets
batch_size = 16 # You can adjust this
train_dataset = train_dataset.shuffle(len(X_train)).batch(batch_size)
test_dataset = test_dataset.batch(batch_size)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.


In [None]:
## Model Building
# Load the pre-trained BERT model for sequence classification
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=num_labels)

# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss, metrics=[metric])

print("\nModel Summary:")
model.summary()

TensorFlow and JAX classes are deprecated and will be removed in Transformers v5. We recommend migrating to PyTorch classes or pinning your version of Transformers.
All PyTorch model weights were used when initializing TFBertForSequenceClassification.

Some weights or buffers of the TF 2.0 model TFBertForSequenceClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Model Summary:
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bert (TFBertMainLayer)      multiple                  109482240 
                                                                 
 dropout_37 (Dropout)        multiple                  0 (unused)
                                                                 
 classifier (Dense)          multiple                  18456     
                                                                 
Total params: 109500696 (417.71 MB)
Trainable params: 109500696 (417.71 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
## Model Training
# Train the model
epochs = 3 # You can adjust this based on your data and resources
history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=test_dataset
)

print("\nTraining History:")
print(history.history)
## Model Evaluation
# Evaluate the model on the test set
loss, accuracy = model.evaluate(test_dataset)
print(f"\nTest Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

# Predict on the test set
predictions = model.predict(test_dataset)
predicted_labels = tf.argmax(predictions.logits, axis=1).numpy()

# Convert numerical labels back to diagnoses
label_to_diagnosis = {i: diagnosis for diagnosis, i in diagnosis_to_label.items()}
predicted_diagnoses = [label_to_diagnosis[label] for label in predicted_labels]
true_diagnoses = [label_to_diagnosis[label] for label in y_test.values]

# Print classification report
print("\nClassification Report:")
print(classification_report(true_diagnoses, predicted_diagnoses))

Epoch 1/3
Epoch 2/3
Epoch 3/3

Training History:
{'loss': [2.8678576946258545, 1.5528892278671265, 0.5939411520957947], 'accuracy': [0.21562500298023224, 0.7885416746139526, 0.9708333611488342], 'val_loss': [2.2514634132385254, 0.883381187915802, 0.3483802378177643], 'val_accuracy': [0.574999988079071, 0.9291666746139526, 0.9708333611488342]}

Test Loss: 0.3484
Test Accuracy: 0.9708

Classification Report:
                                 precision    recall  f1-score   support

                           Acne       1.00      1.00      1.00         7
                      Arthritis       1.00      1.00      1.00        10
               Bronchial Asthma       1.00      1.00      1.00        11
           Cervical spondylosis       1.00      1.00      1.00         7
                    Chicken pox       1.00      0.83      0.91        12
                    Common Cold       1.00      1.00      1.00        12
                         Dengue       0.86      1.00      0.92        12
     

In [None]:

def predict_diagnosis(symptom_text, model, tokenizer, diagnosis_map, max_length=128):
    """Predicts the diagnosis for a given symptom text."""
    encoding = encode_text(tokenizer, [symptom_text], max_length=max_length)
    input_dict = {
        'input_ids': encoding['input_ids'],
        'token_type_ids': encoding['token_type_ids'],
        'attention_mask': encoding['attention_mask']
    }
    predictions = model.predict(input_dict)
    predicted_label = tf.argmax(predictions.logits, axis=1).numpy()[0]
    predicted_diagnosis = diagnosis_map[predicted_label]
    return predicted_diagnosis

## Using the Health AI Agent for Prediction
# Example usage of the trained agent
new_symptom = "I have a terrible headache and feel dizzy."
predicted = predict_diagnosis(new_symptom, model, tokenizer, label_to_diagnosis)
print(f"\nSymptom: '{new_symptom}'")
print(f"Predicted Diagnosis: {predicted}")

new_symptom_2 = "My nose is running and I keep sneezing."
predicted_2 = predict_diagnosis(new_symptom_2, model, tokenizer, label_to_diagnosis)
print(f"\nSymptom: '{new_symptom_2}'")
print(f"Predicted Diagnosis: {predicted_2}")

# Save the trained model (optional)
model.save_pretrained('./health_ai_agent_model')

# Load the model (optional)
loaded_model = TFBertForSequenceClassification.from_pretrained('./health_ai_agent_model')


Symptom: 'I have a terrible headache and feel dizzy.'
Predicted Diagnosis: drug reaction

Symptom: 'My nose is running and I keep sneezing.'
Predicted Diagnosis: Common Cold


Some layers from the model checkpoint at ./health_ai_agent_model were not used when initializing TFBertForSequenceClassification: ['dropout_37']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at ./health_ai_agent_model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [None]:
# prompt: optimization on model

# Fine-tuning with a smaller learning rate and more epochs (example)
optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5) # Smaller learning rate
# Use the original loss function object instead of the numerical value
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')

model.compile(optimizer=optimizer, loss=loss_fn, metrics=[metric])

epochs = 5  # Increased epochs
history = model.fit(
    train_dataset,
    epochs=epochs,
    validation_data=test_dataset
)

print("\nTraining History (Optimized):")
print(history.history)

# Evaluate the model on the test set after further training
loss_optimized, accuracy_optimized = model.evaluate(test_dataset)
print(f"\nTest Loss (Optimized): {loss_optimized:.4f}")
print(f"Test Accuracy (Optimized): {accuracy_optimized:.4f}")

# Predict and report again
predictions_optimized = model.predict(test_dataset)
predicted_labels_optimized = tf.argmax(predictions_optimized.logits, axis=1).numpy()
predicted_diagnoses_optimized = [label_to_diagnosis[label] for label in predicted_labels_optimized]

print("\nClassification Report (Optimized):")
print(classification_report(true_diagnoses, predicted_diagnoses_optimized))

# You can explore other optimizations like:
# - Adjusting max_length
# - Trying different batch sizes
# - Implementing early stopping
# - Using learning rate schedules
# - Exploring different pre-trained BERT models (e.g., `bert-large-uncased`)
# - Data augmentation (if applicable to text data)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5

Training History (Optimized):
{'loss': [0.25416648387908936, 0.13200989365577698, 0.08231466263532639, 0.05662664398550987, 0.041300274431705475], 'accuracy': [0.9947916865348816, 1.0, 1.0, 1.0, 1.0], 'val_loss': [0.1974584013223648, 0.1361314207315445, 0.09995925426483154, 0.08217423409223557, 0.07323849946260452], 'val_accuracy': [0.987500011920929, 0.9833333492279053, 0.987500011920929, 0.987500011920929, 0.987500011920929]}

Test Loss (Optimized): 0.0732
Test Accuracy (Optimized): 0.9875

Classification Report (Optimized):
                                 precision    recall  f1-score   support

                           Acne       1.00      1.00      1.00         7
                      Arthritis       1.00      1.00      1.00        10
               Bronchial Asthma       1.00      1.00      1.00        11
           Cervical spondylosis       1.00      1.00      1.00         7
                    Chicken pox       1.00      0.


**Explanation:**

1.  **Setup and Imports:**
    *   Installs necessary libraries: `transformers` for BERT, `tensorflow` and `keras` for building and training the model, and `scikit-learn` for data splitting and evaluation.
    *   Imports the required modules.
    *   Prints the TensorFlow version for verification.

2.  **Mock Data Generation:**
    *   Creates a simple pandas DataFrame (`df`) to simulate a dataset of patient symptoms and their corresponding diagnoses. **In a real-world scenario, you would load your actual healthcare dataset here.**
    *   Maps the text diagnoses to numerical labels for model training.
    *   Prints a sample of the data and the diagnosis-to-label mapping.

3.  **Data Preprocessing:**
    *   Splits the data into training and testing sets using `train_test_split`.
    *   Loads the `BertTokenizer` from the pre-trained `bert-base-uncased` model. The tokenizer is crucial for converting text into a format that BERT understands (token IDs, attention masks, and token type IDs).
    *   Defines the `encode_text` function to tokenize and pad/truncate the symptom text to a fixed `max_length`.
    *   Encodes the training and testing text data.
    *   Creates TensorFlow `Dataset` objects from the encoded data and labels. This is a more efficient way to handle data during training in TensorFlow.
    *   Batches and shuffles the training dataset for better training performance.

4.  **Model Building:**
    *   Loads the `TFBertForSequenceClassification` model from the pre-trained `bert-base-uncased` weights. This model is specifically designed for classification tasks using BERT. `num_labels` is set to the number of unique diagnoses.
    *   Compiles the model using the Adam optimizer, Sparse Categorical Crossentropy loss (suitable for multi-class classification with integer labels), and Sparse Categorical Accuracy as the evaluation metric.
    *   Prints the model summary to see its architecture.

5.  **Model Training:**
    *   Trains the model using the `fit` method, providing the training dataset and the number of `epochs`.
    *   Includes `validation_data` (the test dataset) to monitor the model's performance on unseen data during training.
    *   Prints the training history, which includes loss and accuracy for both training and validation sets over epochs.

6.  **Model Evaluation:**
    *   Evaluates the trained model on the test dataset using the `evaluate` method to get the final loss and accuracy on the test set.
    *   Uses the `predict` method to get predictions for the test set.
    *   Converts the predicted numerical labels back to their corresponding diagnoses using the `label_to_diagnosis` mapping.
    *   Prints a `classification_report` which provides detailed metrics like precision, recall, F1-score, and support for each diagnosis class.

7.  **Using the Health AI Agent for Prediction:**
    *   Defines the `predict_diagnosis` function to take a symptom text as input and return the predicted diagnosis using the trained model and tokenizer.
    *   Demonstrates how to use the function with new symptom examples.

8.  **Saving and Loading the Model (Optional):**
    *   Includes commented-out code to show how to save and load the trained model's weights and configuration. This is useful for deploying the model later without retraining.

**To use this code effectively:**

1.  **Replace Mock Data:** The most crucial step is to replace the mock `df` with your actual healthcare dataset. This dataset should contain symptom descriptions and their corresponding diagnoses. The quality and quantity of your data will significantly impact the agent's performance.
2.  **Data Cleaning and Preprocessing:** Real-world healthcare data often requires extensive cleaning and preprocessing. This might involve handling missing values, standardizing text, removing noise, etc.
3.  **Hyperparameter Tuning:** Experiment with different `max_length`, `batch_size`, `epochs`, and optimizer learning rates to find the best configuration for your dataset.
4.  **Model Architecture:** While BERT is a powerful model, you might explore other transformer models or even simpler models depending on your data size and complexity.
5.  **Domain Adaptation:** For better performance on medical text, consider using a BERT model pre-trained specifically on medical corpora (e.g., BioBERT, ClinicalBERT).
6.  **Validation:** Implement more robust validation strategies (e.g., cross-validation) to ensure the model generalizes well.
7.  **Deployment:** For production use, you would typically deploy the trained model using frameworks like TensorFlow Serving or cloud platforms.
8.  **Ethical Considerations:** Be mindful of the ethical implications of using AI in healthcare, including bias in data, transparency of predictions, and the importance of human oversight. This model is a simplified example and should not be used for real-world medical diagnoses without rigorous validation and regulatory approval.

In [None]:
import tensorflow as tf
import numpy as np
import os
from sklearn.metrics import classification_report

# Define a function to represent your dataset for the converter
# This is a simplified representative dataset. For better quantization,
# use a more diverse and representative subset of your training data.
def representative_dataset_gen():
    for i in range(len(X_train)):
        # Get a single training example
        symptom = X_train.iloc[i:i+1].tolist()
        # Preprocess the symptom text
        encoding = encode_text(tokenizer, symptom, max_length=max_length)
        # Yield the input tensors
        yield [tf.constant(encoding['input_ids'], dtype=tf.int32),
               tf.constant(encoding['token_type_ids'], dtype=tf.int32),
               tf.constant(encoding['attention_mask'], dtype=tf.int32)]

# Load the model (optional)
loaded_model = TFBertForSequenceClassification.from_pretrained('./health_ai_agent_model')

# Convert the Keras model to a TensorFlow Lite model with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]

# Specify the representative dataset for full integer quantization
converter.representative_dataset = representative_dataset_gen

# Ensure that ops that don't have a quantized implementation are allowed
# to fall back to floating-point. This is often necessary for models
# with complex operations.
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8, tf.lite.OpsSet.TFLITE_BUILTINS]

# Convert the model
tflite_model_quant = converter.convert()

# Save the quantized model (optional)
with open('health_ai_agent_quantized.tflite', 'wb') as f:
    f.write(tflite_model_quant)

print("Quantized TFLite model created and saved.")

# --- Evaluate the Quantized TFLite Model ---

# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_content=tflite_model_quant)
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Prepare the test dataset for TFLite inference
# Convert TensorFlow dataset to numpy arrays or lists
test_input_ids = []
test_token_type_ids = []
test_attention_mask = []
test_labels = []

for batch in test_dataset:
    inputs, labels = batch
    test_input_ids.extend(inputs['input_ids'].numpy())
    test_token_type_ids.extend(inputs['token_type_ids'].numpy())
    test_attention_mask.extend(inputs['attention_mask'].numpy())
    test_labels.extend(labels.numpy())

# Convert lists to numpy arrays
test_input_ids = np.array(test_input_ids, dtype=np.int32)
test_token_type_ids = np.array(test_token_type_ids, dtype=np.int32)
test_attention_mask = np.array(test_attention_mask, dtype=np.int32)
test_labels = np.array(test_labels)


# Run inference on the TFLite model
tflite_predictions = []
for i in range(len(test_input_ids)):
    # Set the input tensor
    interpreter.set_tensor(input_details[0]['index'], np.array([test_input_ids[i]]))
    interpreter.set_tensor(input_details[1]['index'], np.array([test_token_type_ids[i]]))
    interpreter.set_tensor(input_details[2]['index'], np.array([test_attention_mask[i]]))

    # Run inference
    interpreter.invoke()

    # Get the output tensor and append to results
    output_data = interpreter.get_tensor(output_details[0]['index'])
    tflite_predictions.append(output_data[0]) # Get the logits

# Convert TFLite predictions (logits) to predicted labels
tflite_predicted_labels = np.argmax(tflite_predictions, axis=1)

# Convert numerical labels back to diagnoses
label_to_diagnosis = {i: diagnosis for diagnosis, i in diagnosis_to_label.items()}
tflite_predicted_diagnoses = [label_to_diagnosis[label] for label in tflite_predicted_labels]

# Print classification report for the quantized model
print("\nClassification Report (Quantized TFLite Model):")
print(classification_report(test_labels, tflite_predicted_diagnoses, target_names=unique_diagnoses, labels=list(diagnosis_to_label.values())))

# Compare model sizes
# Ensure the directory exists if saving the Keras model
keras_model_dir = './health_ai_agent_model'
if not os.path.exists(keras_model_dir):
    # Save the Keras model first if it hasn't been saved
    try:
        model.save_pretrained(keras_model_dir)
        print(f"Keras model saved to {keras_model_dir}")
    except Exception as e:
        print(f"Could not save Keras model: {e}")


original_model_size = 0
for root, dirs, files in os.walk(keras_model_dir):
    for file in files:
        original_model_size += os.path.getsize(os.path.join(root, file))


quantized_model_path = 'health_ai_agent_quantized.tflite'
quantized_model_size = os.path.getsize(quantized_model_path) if os.path.exists(quantized_model_path) else 0


print(f"\nOriginal Keras Model Size (approx): {original_model_size / 1024**2:.2f} MB")
print(f"Quantized TFLite Model Size: {quantized_model_size / 1024**2:.2f} MB")

### Explanation of Model Quantization Steps

Model quantization is essentially a technique to reduce the size of your trained model and make it run faster, especially on devices with limited resources. It does this by reducing the precision of the numbers (weights and activations) in the model, often from 32-bit floating-point numbers to 8-bit integers.

Here are the steps taken in the code cell above to apply post-training integer quantization:

1.  **Defining a Representative Dataset (`representative_dataset_gen` function):**
    *   **Purpose:** For **full integer quantization** (quantizing both weights and activations), the TensorFlow Lite converter needs a small, representative sample of your *training* data. It uses this data to determine the range (minimum and maximum values) for the activations in each layer of the model. This range is then used to map the floating-point values to fixed-point integers during quantization.
    *   **Implementation:** The `representative_dataset_gen` function is a generator that iterates through your training data (`X_train`), preprocesses each symptom text using the same tokenizer as before, and yields the input tensors (`input_ids`, `token_type_ids`, `attention_mask`) in the format expected by the converter.

2.  **Creating a TFLiteConverter (`tf.lite.TFLiteConverter.from_keras_model`):**
    *   **Purpose:** This object is the core tool for converting your trained TensorFlow Keras model into the TensorFlow Lite format (`.tflite`).
    *   **Implementation:** We create a converter instance and pass our trained Keras `model` to it.

3.  **Setting Optimization Options (`converter.optimizations = [tf.lite.Optimize.DEFAULT]`):**
    *   **Purpose:** This line tells the converter to apply default optimizations during conversion. This includes quantization. `tf.lite.Optimize.DEFAULT` currently enables post-training quantization.
    *   **Implementation:** We set the `optimizations` attribute of the converter.

4.  **Specifying the Representative Dataset (`converter.representative_dataset = representative_dataset_gen`):**
    *   **Purpose:** This is where we provide the converter with the representative dataset generator we defined earlier. This is necessary for the converter to perform full integer quantization.
    *   **Implementation:** We assign our `representative_dataset_gen` function to the `representative_dataset` attribute.

5.  **Setting Target Specification (`converter.target_spec.supported_ops = [...]`):**
    *   **Purpose:** This line is important for handling operations in your model that might not have a direct integer implementation in TensorFlow Lite. It tells the converter that it's okay to "fall back" to using floating-point operations for those specific parts of the model. This prevents conversion errors but results in a hybrid model (partially integer, partially floating-point).
    *   **Implementation:** We set the `supported_ops` attribute to include both `TFLITE_BUILTINS_INT8` (for integer operations) and `TFLITE_BUILTINS` (for floating-point operations).

6.  **Converting the Model (`tflite_model_quant = converter.convert()`):**
    *   **Purpose:** This is the step where the actual conversion and quantization happen. The converter uses the model, optimization settings, and representative dataset to produce the quantized TFLite model.
    *   **Implementation:** We call the `convert()` method on the converter object.

7.  **Saving the Quantized Model:**
    *   **Purpose:** To save the resulting quantized TFLite model to a file (`.tflite`) so you can load and use it later without reconverting.
    *   **Implementation:** The converted model is a byte string, which is written to a file named `health_ai_agent_quantized.tflite`.

8.  **Evaluating the Quantized Model:**
    *   **Purpose:** To see how the quantization affected the model's performance (accuracy). Quantization can sometimes lead to a small drop in accuracy.
    *   **Implementation:**
        *   A `tf.lite.Interpreter` is created to load and run the TFLite model.
        *   Input and output details of the TFLite model are retrieved.
        *   The test dataset, which was originally in TensorFlow Dataset format, is converted into NumPy arrays because the TFLite interpreter works with NumPy arrays.
        *   Inference is run sample by sample: the input tensors for each test example are set, the interpreter is invoked, and the output (logits) is retrieved.
        *   The collected logits are converted back to predicted labels and then to diagnoses.
        *   Finally, a classification report is printed for the quantized model, and its size is compared to the original model size.

This process allows you to reduce the model size and potentially improve inference speed, which is particularly useful for deployment on devices with limited resources.