In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

In [2]:
data = pd.read_csv('C:\\Contributions\Programs\\ML_DL\\Medical_diagnoser\\Dataset\\medical_data.csv')
data.tail()

  data = pd.read_csv('C:\\Contributions\Programs\\ML_DL\\Medical_diagnoser\\Dataset\\medical_data.csv')


Unnamed: 0,Patient_Problem,Disease,Prescription
402,Noticeable thinning of the hair on the top of ...,Androgenetic Alopecia,"Minoxidil, finasteride for males."
403,"Greenish discharge from the eyes, accompanied ...",Conjunctivitis,Antibiotic or antihistamine eye drops.
404,"Experiencing confusion, difficulty speaking, a...",Stroke,"Immediate medical attention, clot-busting drugs."
405,Constant feeling of fullness in the ears and h...,Eustachian Tube Dysfunction,"Nasal steroids, autoinflation exercises."
406,"Blood in stool, along with a change in bowel m...",Colorectal Cancer,"Colonoscopy, potentially surgery, chemotherapy."


In [7]:
tokenizer = Tokenizer(num_words=5000, oov_token="<OOV>")
tokenizer.fit_on_texts(data['Patient_Problem'])

sequences = tokenizer.texts_to_sequences(data['Patient_Problem'])

In [8]:
max_length = max(len(x) for x in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

In [9]:
# Encoding the labels
label_encoder_disease = LabelEncoder()
label_encoder_prescription = LabelEncoder()

disease_labels = label_encoder_disease.fit_transform(data['Disease'])
prescription_labels = label_encoder_prescription.fit_transform(data['Prescription'])

# Converting labels to categorical
disease_labels_categorical = to_categorical(disease_labels)
prescription_labels_categorical = to_categorical(prescription_labels)


In [10]:
Y = np.hstack((disease_labels_categorical, prescription_labels_categorical))


In [11]:
input_layer = Input(shape=(max_length,))

embedding = Embedding(input_dim=5000, output_dim=64)(input_layer)
lstm_layer = LSTM(64)(embedding)

disease_output = Dense(len(label_encoder_disease.classes_), activation='softmax', 
name='disease_output')(lstm_layer)

prescription_output = Dense(len(label_encoder_prescription.classes_), 
activation='softmax', name='prescription_output')(lstm_layer)


In [12]:
model = Model(inputs=input_layer, outputs=[disease_output, prescription_output])

model.compile(
    loss={'disease_output': 'categorical_crossentropy', 
    'prescription_output': 'categorical_crossentropy'},
    optimizer='adam',
    metrics={'disease_output': ['accuracy'], 'prescription_output': ['accuracy']}
)

model.summary()


In [13]:
model.fit(padded_sequences, {'disease_output': disease_labels_categorical, 'prescription_output':
      prescription_labels_categorical}, epochs=100, batch_size=32)


Epoch 1/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - disease_output_accuracy: 0.0000e+00 - loss: 11.1431 - prescription_output_accuracy: 0.0039 
Epoch 2/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - disease_output_accuracy: 0.0260 - loss: 11.1243 - prescription_output_accuracy: 0.0058        
Epoch 3/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - disease_output_accuracy: 0.0259 - loss: 11.0825 - prescription_output_accuracy: 0.0164    
Epoch 4/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - disease_output_accuracy: 0.0434 - loss: 10.9096 - prescription_output_accuracy: 0.0162
Epoch 5/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - disease_output_accuracy: 0.0321 - loss: 10.8354 - prescription_output_accuracy: 0.0168
Epoch 6/100
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - disease_output_accuracy

<keras.src.callbacks.history.History at 0x1aa23f47e60>

In [14]:
def make_prediction(patient_problem):
    # Preprocessing the input
    sequence = tokenizer.texts_to_sequences([patient_problem])
    padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post')
    
    # Making prediction
    prediction = model.predict(padded_sequence)
    
    # Decoding the prediction
    disease_index = np.argmax(prediction[0], axis=1)[0]
    prescription_index = np.argmax(prediction[1], axis=1)[0]
    
    disease_predicted = label_encoder_disease.inverse_transform([disease_index])[0]
    prescription_predicted = label_encoder_prescription.inverse_transform([prescription_index])[0]
    
    print(f"Predicted Disease: {disease_predicted}")
    print(f"Suggested Prescription: {prescription_predicted}")


patient_input = "I played a lot and i cannot walk properly now."
make_prediction(patient_input)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
Predicted Disease: Hyperthyroidism
Suggested Prescription: Dietary iodine; consider thyroid hormone replacement therapy.
