In [3]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from collections import Counter
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential

# Load the data from the provided database
data = pd.read_csv(r'D:\\Code\\Projects\\medbuddyAPI\\datasets\\symptom_checker\\disease_sympts_prec_full.csv')

# Preprocess the data
symptoms = data['symptoms'].apply(lambda x: ' '.join(x.split(',')))
diseases = data['disease']

# Tokenize the symptoms
all_symptoms = ' '.join(symptoms).split()
symptom_counts = Counter(all_symptoms)
symptom_vocab = sorted(symptom_counts, key=symptom_counts.get, reverse=True)
symptom_to_idx = {symptom: idx for idx, symptom in enumerate(symptom_vocab)}

# Convert symptoms to sequences
sequences = [[symptom_to_idx[token] for token in word_tokenize(symptom.lower()) if token in symptom_to_idx] for symptom in symptoms]
max_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_length)

# Encode diseases as integers
disease_labels = diseases.unique().tolist()
disease_encoder = {label: idx for idx, label in enumerate(disease_labels)}
encoded_diseases = [disease_encoder[disease] for disease in diseases]

# Create text embeddings
vocab_size = len(symptom_vocab)
embedding_dim = 100
embedding_matrix = np.zeros((vocab_size, embedding_dim))

# Build the model
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=max_length),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(len(disease_labels), activation='softmax')
])

# Compile and train the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(padded_sequences, np.array(encoded_diseases), epochs=5, batch_size=32)

# Symptom checker function
def symptom_checker(symptoms):
    tokenized_symptoms = [symptom_to_idx.get(token, 0) for token in word_tokenize(' '.join(symptoms).lower())]
    padded_symptoms = pad_sequences([tokenized_symptoms], maxlen=max_length)
    predictions = model.predict(padded_symptoms)
    predicted_index = np.argmax(predictions[0])
    predicted_disease = disease_labels[predicted_index]
    precautions = data[data['disease'] == predicted_disease]['precautions'].iloc[0]
    print(f"Predicted disease: {predicted_disease}")
    print(f"Precautions: {precautions}")

# Example usage
symptom_checker(['fatigue','weight_loss','restlessness','lethargy','irregular_sugar_level','blurred_and_distorted_vision','obesity','increased_appetite','polyuria'])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Predicted disease: Diabetes 
Precautions: have balanced diet, exercise, consult doctor, follow up


In [4]:
# Convert the Keras model to a TensorFlow Lite model file
converter= tf.lite.TFLiteConverter.from_keras_model(model)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
tflite_model = converter.convert()


# Save the TensorFlow Lite model to a file
with open('symptom_checker_model.tflite', 'wb') as f:
    f.write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\Rohan\AppData\Local\Temp\tmpyy_682so\assets


In [22]:
import pickle

with open('symptom_varsV1.pickle', 'wb') as f:
    pickle.dump((disease_labels, symptom_to_idx, max_length), f)

In [14]:
interpreter = tf.lite.Interpreter(model_path="symptom_checker_model.tflite")
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Function to predict disease based on symptoms
def predict_disease(symptom_sentence):
  symptoms = word_tokenize(symptom_sentence.lower())
  tokenized_symptoms = tokenizer.texts_to_sequences([' '.join(symptoms)])
  padded_symptoms = pad_sequences(tokenized_symptoms, maxlen=max_length)

  interpreter.resize_tensor_input(input_details[0]['index'], [1, max_length])
  interpreter.allocate_tensors()
  input_data = np.array([padded_symptoms], dtype=np.float32)
  input_data = np.squeeze(input_data, axis=1)
  interpreter.set_tensor(input_details[0]['index'], input_data)
  interpreter.invoke()
  
  output_data = interpreter.get_tensor(output_details[0]['index'])
  predicted_index = np.argmax(output_data[0])
  predicted_disease = disease_labels[predicted_index]

  precautions = data[data['disease'] == predicted_disease]['precautions'].iloc[0]
  
  return predicted_disease, precautions

In [20]:
symptom_sentence = "I have fatigue, weight loss, restlessness, lethargy, irregular sugar level, blurred and distorted vision, obesity, increased appetite, and polyuria."
predicted_disease, precautions = predict_disease(symptom_sentence)
print(f"Predicted disease: {predicted_disease}")
print(f"Precautions: {precautions}")

NameError: name 'tokenizer' is not defined

In [21]:
import tensorflow as tf

# Load the TensorFlow Lite model
interpreter = tf.lite.Interpreter(model_path="symptom_checker_model.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Prepare input data
symptom_sentence = "I have fatigue, weight loss, restlessness, lethargy, irregular sugar level, blurred and distorted vision, obesity, increased appetite, and polyuria."
symptoms = word_tokenize(symptom_sentence.lower())
tokenized_symptoms = [symptom_to_idx.get(token, 0) for token in word_tokenize(' '.join(symptoms).lower())]
padded_symptoms = pad_sequences([tokenized_symptoms], maxlen=max_length)
input_tensor = np.array(padded_symptoms, dtype=np.float32)


# Set the input tensor
interpreter.set_tensor(input_details[0]['index'], input_tensor)

# Run the inference
interpreter.invoke()

# Get the output tensor
output_data = interpreter.get_tensor(output_details[0]['index'])
predicted_index = np.argmax(output_data[0])
predicted_disease = disease_labels[predicted_index]

# Print the predicted disease
print(f"Predicted disease: {predicted_disease}")
precautions = data[data['disease'] == predicted_disease]['precautions'].iloc[0]
print(f"Precautions: {precautions}")

Predicted disease: Varicose veins
Precautions: lie down flat and raise the leg high, use oinments, use vein compression, dont stand still for long
