In [None]:
texts = [
    "Patient has a fever and headache",
    "X-ray shows a fracture in the left leg",
    "Prescribed 500mg paracetamol twice daily",
    "Blood test indicates high sugar levels",
    "MRI reveals a tumor in the brain",
    "Take two tablets of aspirin every morning",
    "Surgery scheduled for next Monday",
    "Blood pressure is elevated",
    "Symptoms include cough and fatigue",
    "CT scan confirms internal bleeding"
]

labels = [
    "symptom", "test", "treatment", "test", "test",
    "treatment", "treatment", "symptom", "symptom", "test"
]


In [None]:
#Preprocessing
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)
num_classes = len(label_encoder.classes_)

# Tokenize text
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, padding='post', maxlen=20)


In [None]:
#Build the model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

model = Sequential([
    Embedding(input_dim=1000, output_dim=16, input_length=20),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [None]:
#Train the model
model.fit(padded, np.array(encoded_labels), epochs=200)


Epoch 1/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.4000 - loss: 1.0695
Epoch 2/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step - accuracy: 0.4000 - loss: 1.0682
Epoch 3/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step - accuracy: 0.4000 - loss: 1.0667
Epoch 4/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 0.4000 - loss: 1.0653
Epoch 5/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.4000 - loss: 1.0639
Epoch 6/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step - accuracy: 0.4000 - loss: 1.0624
Epoch 7/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.4000 - loss: 1.0609
Epoch 8/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 55ms/step - accuracy: 0.4000 - loss: 1.0594
Epoch 9/200
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x7d3334551f10>

In [None]:
model.save("medical_text_model.h5")




In [None]:
#Use for testing
def classify_text(text):
    seq = tokenizer.texts_to_sequences([text])
    pad = pad_sequences(seq, maxlen=20, padding='post')
    pred = model.predict(pad)[0]
    label_idx = np.argmax(pred)
    label = label_encoder.inverse_transform([label_idx])[0]
    confidence = float(pred[label_idx])
    return label, confidence


In [None]:
text = "ECG confirms herat condition"
label, conf = classify_text(text)
print(f"Prediction: {label}, Confidence: {conf:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Prediction: test, Confidence: 0.45
