<a href="https://colab.research.google.com/github/shahbashajahan/disease_predictionn/blob/main/disease_prediction_using_one_hot_encodig.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import regex as re
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import MultiLabelBinarizer
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
main_data = pd.read_csv("/content/dataset.csv")

In [10]:
main_data.head()

Unnamed: 0,disease,symptoms,cures,doctor,risk level
0,flu,"fever,cough,sore throat,runny or stuffy nose,m...","over-the-counter medications,rest,fluids","family doctor,urgent care",low (0.1%
1,bronchitis,"cough,mucus production,shortness of breath,che...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",low (0.5%
2,pneumonia,"fever,cough,shortness of breath,chest pain,fat...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",moderate (1%)
3,heart attack,"chest pain,shortness of breath,nausea,vomiting...",emergency medical services,cardiologist,high (20%)
4,stroke,"sudden weakness,numbness on one side of the bo...",emergency medical services,neurologist,high (15%)


In [7]:
main_data.sample(10)

Unnamed: 0,disease,symptoms,cures,doctor,risk level
94,epilepsy,seizures,"medication,surgery",neurologist,varies
20,bacterial meningitis,"fever,headache,stiff neck,nausea,vomiting,sens...",antibiotics,"neurologist,neurosurgeon",high (20%)
43,hepatitis,inflammation of the liver,treatment for hepatitis may include medication...,gastroenterologist,varies depending on the type of hepatitis
60,warts,"raised,rough growths on the skin","over-the-counter medications,prescription medi...",dermatologist,low (0.1%)
37,epilepsy,seizures,"medication,surgery",neurologist,varies depending on the type of epilepsy
24,bronchitis,"cough,mucus production,shortness of breath,che...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",low (0.5%)
51,meningitis,"fever,headache,stiff neck,nausea,vomiting,sens...",antibiotics,"neurologist,neurosurgeon",high (20%)
21,bipolar disorder,"extreme mood swings,from manito depression",treatment for bipolar disorder may include med...,"psychologist,psychiatrist",varies
34,diabetes,"increased thirst,frequent urination,unexplaine...","diet,exercise,medication",endocrinologist,high (25%)
16,appendicitis,"pain in the lower right abdomen,nausea,vomitin...",appendectomy,surgeon,high (20%)


In [8]:
main_data.shape

(99, 5)

In [14]:
list(main_data.columns)

['disease', 'symptoms', 'cures', 'doctor', 'risk level']

In [15]:
'disease' in main_data.columns

True

In [16]:
main_data.disease.value_counts()

pneumonia                                       3
alzheimer's disease                             3
bipolar disorder                                2
anemia                                          2
stroke                                          2
cancer                                          2
diabetes                                        2
arthritis                                       2
bronchitis                                      2
bacterial meningitis                            1
cervical cancer                                 1
colorectal cancer                               1
flu                                             1
dementia                                        1
erectile dysfunction                            1
aortic aneurysm                                 1
glaucoma                                        1
fibromyalgi                                     1
yeast infection                                 1
acute kidney injury (aki)                       1


In [18]:
df = main_data.copy() # We take a copy of the original data incase we needed the original data later
df.dropna(axis=1, how='all', inplace=True) # Dropping rows which are all NaN
df.fillna(0, inplace=True)                 # Replacing the NaN with 0

# Creating a custom label encoder so we can specify which number the encoding starts from
class CustomLabelEncoder(LabelEncoder):
    def __init__(self, start=0):
        self.start = start
        super().__init__()

    def fit_transform(self, y):
        encoded = super().fit_transform(y)
        encoded += self.start
        return encoded

# Flatten the 'Disease' column into a single Series
flattened_series = df['disease'].astype(str)

# Create and fit label encoder for the 'Disease' column
encoder = CustomLabelEncoder(start=200) # Here we tell the label encoder to start encoding from 200

In [19]:
encoded_values = encoder.fit_transform(flattened_series)
df['disease'] = encoded_values

mapping_data = {'label_encoder': encoder}

# Saving the mapping of the label column "Disease" to use later
label_mapping = {k: v for k, v in zip(mapping_data['label_encoder'].classes_, range(200, 200+len(mapping_data['label_encoder'].classes_)))}

df.head()

Unnamed: 0,disease,symptoms,cures,doctor,risk level
0,246,"fever,cough,sore throat,runny or stuffy nose,m...","over-the-counter medications,rest,fluids","family doctor,urgent care",low (0.1%
1,221,"cough,mucus production,shortness of breath,che...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",low (0.5%
2,272,"fever,cough,shortness of breath,chest pain,fat...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",moderate (1%)
3,253,"chest pain,shortness of breath,nausea,vomiting...",emergency medical services,cardiologist,high (20%)
4,279,"sudden weakness,numbness on one side of the bo...",emergency medical services,neurologist,high (15%)


In [20]:
label_mapping

{'acanthamoebkeratitis': 200,
 'acute appendicitis': 201,
 'acute kidney injury (aki) ': 202,
 'adenovirus': 203,
 'aflatoxicosis': 204,
 'alcoholism': 205,
 'allergic rhinitis': 206,
 "alzheimer's disease": 207,
 'anemia': 208,
 'anxiety disorder': 209,
 'aortic aneurysm': 210,
 'appendicitis': 211,
 'arthritis': 212,
 'asperger syndrome': 213,
 'asthma': 214,
 'atopic dermatitis': 215,
 'bacterial meningitis': 216,
 'bacterial meningitis ': 217,
 'bipolar disorder': 218,
 'bladder cancer': 219,
 'blood clot': 220,
 'bronchitis': 221,
 'bursitis': 222,
 'cancer': 223,
 'celiac disease': 224,
 'cervical cancer': 225,
 'cervical cancer ': 226,
 'chickenpox': 227,
 'cholesterol': 228,
 'chronic kidney disease': 229,
 'chronic obstructive pulmonary disease (copd)': 230,
 'colorectal cancer': 231,
 'common cold': 232,
 'conjunctivitis': 233,
 "crohn's disease": 234,
 'dementia': 235,
 'depression ': 236,
 'diabetes': 237,
 'diarrhea': 238,
 'eating disorder': 239,
 'epilepsy': 240,
 'epile

In [22]:
# Stack the entire data into a single Series.
# We are stacking the entire data because there're similar values in different columns. **REMEMBER THIS**
encode_df = df.copy() # Again, taking a copy because we might need the original later.
encode_df = encode_df.drop(["disease"], axis = 1)
flattened_series = encode_df.stack().astype(str)

# Create and fit label encoder.
encoder = LabelEncoder()
encoded_values = encoder.fit_transform(flattened_series)

# Reshape the encoded values back to the original DataFrame shape.
F_encoded_df = pd.DataFrame(encoded_values.reshape(encode_df.shape), columns=encode_df.columns,
                            index=encode_df.index)

# Store the mapping data for future use
Fmapping_data = {'label_encoder': encoder}
feature_mapping = {k: v for k, v in zip(Fmapping_data['label_encoder'].classes_,
                                        Fmapping_data['label_encoder'].\
                                        transform(Fmapping_data['label_encoder'].classes_))}
F_encoded_df.head(3)

Unnamed: 0,symptoms,cures,doctor,risk level
0,52,107,45,77
1,24,6,44,79
2,51,6,44,95


In [23]:
feature_mapping

{' ': 0,
 'abdominal pain,bloating,constipation,diarrhea': 1,
 'abnormal pap test results,abnormal vaginal bleeding,pain during sex': 2,
 'allergist': 3,
 'antibiotics': 4,
 'antibiotics,antifungals,surgery': 5,
 'antibiotics,over-the-counter medications,rest,fluids': 6,
 'antimalarial drugs': 7,
 'antiviral medication': 8,
 'antiviral medication,over-the-counter medications,rest,fluids': 9,
 'appendectomy': 10,
 'blood in the urine,pain or burning when urinating,frequent urination,urgency to urinate,incontinence': 11,
 'bulge in the wall of the aorta,the largest artery in the body': 12,
 'cardiologist': 13,
 'cardiologist,hematologist': 14,
 'change in bowel habits,such as diarrhea,constipation,narrowing of the stool,blood in the stool,abdominal pain,unexplained weight loss': 15,
 'chest pain,shortness of breath,fatigue,dizziness,fainting': 16,
 'chest pain,shortness of breath,nausea,vomiting,lightheadedness,sweating': 17,
 'chronic autoimmune disease that can affect any part of the b

In [24]:
label_encoded_df = pd.concat([df['disease'], F_encoded_df], axis = 1)
label_encoded_df.head()

Unnamed: 0,disease,symptoms,cures,doctor,risk level
0,246,52,107,45,77
1,221,24,6,44,79
2,272,51,6,44,95
3,253,17,37,13,65
4,279,137,37,100,64


In [25]:
# Creating X and y
model_features = label_encoded_df.columns.tolist()
model_features.remove("disease")
X = label_encoded_df[model_features]
y = label_encoded_df["disease"]

In [26]:
# One_hot_encoding the y column to use it as a multicalss in the model output layer
y_encoded = pd.get_dummies(y)
y_encoded.shape

(99, 88)

In [27]:
# The column names are the mapping of the target column. **REMEMBER THIS**
y_encoded.head()

Unnamed: 0,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [28]:
# Reshape the data
X_reshaped = X.values.reshape(-1, 1)
scaler = StandardScaler().fit(X_reshaped)
X_scaled_reshaped = scaler.transform(X_reshaped)
# Reshape back to original shape
X_scaled = X_scaled_reshaped.reshape(X.shape)
X_df = pd.DataFrame(X_scaled)
X_df.head()

Unnamed: 0,0,1,2,3
0,-0.802233,0.200041,-0.929795,-0.346654
1,-1.312482,-1.640499,-0.948019,-0.310208
2,-0.820456,-1.640499,-0.948019,-0.018637
3,-1.440044,-1.075581,-1.512937,-0.565332
4,0.746735,-1.075581,0.072478,-0.583555


In [29]:
X_train, X_test, y_train, y_test = train_test_split(X_df, y_encoded, test_size = 0.25, random_state=42)
X_eval, X_test, y_eval, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [30]:
X_train_tensor = tf.convert_to_tensor(X_train.values, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test.values, dtype=tf.float32)
X_eval_tensor = tf.convert_to_tensor(X_eval.values, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float64)
y_eval_tensor = tf.convert_to_tensor(y_eval, dtype=tf.float64)

In [31]:
X_train_tensor

<tf.Tensor: shape=(74, 4), dtype=float32, numpy=
array([[ 0.6738428 ,  1.1658682 ,  0.49161118,  1.384546  ],
       [-0.7293406 , -1.6404986 , -0.9297954 , -0.32843107],
       [ 0.32760274, -0.16442262,  1.5850009 ,  1.384546  ],
       [ 0.47338802,  1.8401251 ,  1.7854556 , -0.547109  ],
       [-0.05508365,  0.9654134 ,  0.05425532, -0.52888584],
       [ 0.69206595, -0.09152997,  0.05425532,  1.384546  ],
       [-0.5106627 , -1.2031428 , -1.5129366 ,  1.4209924 ],
       [ 0.56450385, -1.5858291 , -0.9297954 , -0.32843107],
       [ 0.5462807 ,  0.18181744,  1.6761167 , -0.32843107],
       [-0.25553843,  0.8560744 ,  0.10892481,  1.493885  ],
       [-0.8022333 ,  0.20004061, -0.9297954 , -0.32843107],
       [-1.0209112 ,  1.1111987 ,  0.49161118,  1.384546  ],
       [-0.36487737,  0.20004061, -0.9297954 , -0.32843107],
       [ 0.2547101 , -1.567606  ,  0.80140495, -0.5653322 ],
       [-0.05508365,  1.0018597 ,  0.07247848,  1.384546  ],
       [-0.43777004, -1.2031428 , -1

In [32]:
y_train_tensor

<tf.Tensor: shape=(74, 88), dtype=float64, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])>

In [33]:
with tf.device('/GPU:0'):
    model_1 = keras.Sequential([
        layers.Input(shape=(X_train_tensor.shape[1],)),
        layers.Dense(32, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(128, activation='tanh'),
        layers.BatchNormalization(),
        layers.Dense(128, activation='tanh'),
        layers.Dropout(0.1),
        layers.Dense(64, activation='relu'),
        layers.Dense(32, activation='relu'),
        layers.Dense(y_train_tensor.shape[1], activation='softmax')])

    model_1.compile(optimizer= 'adam', loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=4, mode='max')
    history = model_1.fit(X_train_tensor, y_train_tensor, epochs=500, callbacks=[early_stopping],
                batch_size=16, validation_data=(X_eval_tensor, y_eval_tensor))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500


In [34]:
model_1.evaluate(X_test_tensor, y_test_tensor)



[4.489377975463867, 0.0]

In [35]:
def encode_user_input(user_input, mapping=feature_mapping):
    '''
    This function takes user input and transform it to the same encoding
    the original data, which the model was trained on, has.

    Args:
        user_input (str): The user input.
        mapping (dict): The mapping the label_encoder used earlier.

    Returns:
        str: encoded user input.
    '''
    encoded_input = []
    for symptom in user_input:
        for key in mapping.keys():
            if symptom.strip().lower() == key.strip().lower():
                encoded_input.append(mapping[key])
                break  # Break out of inner loop if a match is found
    return encoded_input


In [36]:
# let's take a random row from the original data.
user_input = ['itching','skin_rash','nodal_skin_eruptions','dischromic _patches']
# This row should result in "Fungal infection".
encoded_input = encode_user_input(user_input)
encoded_input

[]

In [37]:
# Transforming the encoded user input to a tensor.
input_tensor = tf.cast(encoded_input, tf.float32)
input_tensor

<tf.Tensor: shape=(0,), dtype=float32, numpy=array([], dtype=float32)>

In [38]:
# Checking the number of dimensions.
input_tensor.ndim == X_train_tensor[1].ndim

True

In [39]:
label_encoded_df.iloc[0][1:5]

symptoms       52
cures         107
doctor         45
risk level     77
Name: 0, dtype: int64

In [41]:
label_encoded_df.head()

Unnamed: 0,disease,symptoms,cures,doctor,risk level
0,246,52,107,45,77
1,221,24,6,44,79
2,272,51,6,44,95
3,253,17,37,13,65
4,279,137,37,100,64


In [42]:
padding_value = tf.constant(130, dtype=tf.float32)
desired_length = X_train_tensor[1].shape[0]
padding_length = desired_length - tf.shape(input_tensor)[0]
padding_tensor = tf.fill((padding_length,), padding_value)
final_input = tf.concat([input_tensor, padding_tensor], axis=0)
final_input


<tf.Tensor: shape=(4,), dtype=float32, numpy=array([130., 130., 130., 130.], dtype=float32)>

In [43]:
target_index = y_encoded.columns.tolist() # If you remember, the column names after the one-hot-encoding ARE the mapping of the target values.


In [44]:
final_array = final_input.numpy()
final_reshaped = final_array.reshape(-1, 1)
X_scaled = scaler.transform(final_reshaped)
final_tensor = tf.convert_to_tensor(X_scaled)
final_tensor = tf.squeeze(final_tensor)
final_tensor

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.6191733, 0.6191733, 0.6191733, 0.6191733], dtype=float32)>

In [46]:
X_df.head()

Unnamed: 0,0,1,2,3
0,-0.802233,0.200041,-0.929795,-0.346654
1,-1.312482,-1.640499,-0.948019,-0.310208
2,-0.820456,-1.640499,-0.948019,-0.018637
3,-1.440044,-1.075581,-1.512937,-0.565332
4,0.746735,-1.075581,0.072478,-0.583555


In [47]:
import numpy as np
predict_proba = model_1.predict(tf.expand_dims(final_input, axis = 0)) # Expanding dims to get (1,17)
predicted_class_index = np.argmax(predict_proba) # Getting the 'index' of our prediction
prediction_encode = target_index[predicted_class_index] # Getting to mapping of that 'index' using y column names
inverse_label_encoding = {v: k for k, v in label_mapping.items()} # Inverse the label encoding
prediction = inverse_label_encoding[prediction_encode]
prediction



"parkinson's disease"

In [48]:
main_data.head()

Unnamed: 0,disease,symptoms,cures,doctor,risk level
0,flu,"fever,cough,sore throat,runny or stuffy nose,m...","over-the-counter medications,rest,fluids","family doctor,urgent care",low (0.1%
1,bronchitis,"cough,mucus production,shortness of breath,che...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",low (0.5%
2,pneumonia,"fever,cough,shortness of breath,chest pain,fat...","antibiotics,over-the-counter medications,rest,...","family doctor,pulmonologist",moderate (1%)
3,heart attack,"chest pain,shortness of breath,nausea,vomiting...",emergency medical services,cardiologist,high (20%)
4,stroke,"sudden weakness,numbness on one side of the bo...",emergency medical services,neurologist,high (15%)


In [49]:
df = main_data.copy() # As usual, taking a copy from that data incase we needed the original later
# Combine all symptom columns into a single column
df['All Symptoms'] = df.apply(lambda row: ','.join(row.dropna()), axis=1)
# Drop duplicate symptoms within each cell
df['All Symptoms'] = df['All Symptoms'].apply(lambda x: ','.join(sorted(set(x.split(','))) if x else ''))
stay_cols= ['disease', 'All Symptoms']
df = df[stay_cols]
df.head()

Unnamed: 0,disease,All Symptoms
0,flu,"cough,family doctor,fatigue,fever,flu,fluids,h..."
1,bronchitis,"antibiotics,bronchitis,chest pain,cough,family..."
2,pneumonia,"antibiotics,chest pain,cough,family doctor,fat..."
3,heart attack,"cardiologist,chest pain,emergency medical serv..."
4,stroke,"confusion,difficulty speaking,emergency medica..."


In [53]:
df['All Symptoms'][0]

'cough,family doctor,fatigue,fever,flu,fluids,headache,low (0.1%,muscle aches,over-the-counter medications,rest,runny or stuffy nose,sore throat,urgent care'

In [54]:
def strip_to_basic_tokens(text):
    # Remove doble spaces and underscores
    text = re.sub(r'[_\s]+', ' ', text)
    # Split by commas and lowercase the tokens
    tokens = [token.strip().lower() for token in text.split(',')]
    return tokens

# Apply the function to 'All Symptoms' column
df['Basic Tokens'] = df['All Symptoms'].apply(strip_to_basic_tokens)
df['Basic Tokens'] = df['Basic Tokens'].apply(lambda x: ', '.join(x))
df = df.drop(['All Symptoms'], axis = 1)
df.head()

Unnamed: 0,disease,Basic Tokens
0,flu,"cough, family doctor, fatigue, fever, flu, flu..."
1,bronchitis,"antibiotics, bronchitis, chest pain, cough, fa..."
2,pneumonia,"antibiotics, chest pain, cough, family doctor,..."
3,heart attack,"cardiologist, chest pain, emergency medical se..."
4,stroke,"confusion, difficulty speaking, emergency medi..."


In [55]:
df['Basic Tokens'][0]

'cough, family doctor, fatigue, fever, flu, fluids, headache, low (0.1%, muscle aches, over-the-counter medications, rest, runny or stuffy nose, sore throat, urgent care'

In [56]:
dfE = df.copy() # Taking a copy because we never know what might happen
dfE['Basic Tokens'] = dfE['Basic Tokens'].apply(lambda x: x.split(', '))

mlb = MultiLabelBinarizer()
# Fit and transform the 'Basic Tokens' column
one_hot_encoded = pd.DataFrame(mlb.fit_transform(dfE['Basic Tokens']), columns=mlb.classes_, index=df.index)

# Concatenate the one-hot encoded DataFrame with the original DataFrame
df_encoded = pd.concat([dfE, one_hot_encoded], axis=1)

# Drop the 'Basic Tokens' column
df_encoded = df_encoded.drop(columns=['Basic Tokens'])
df_encoded.head()

Unnamed: 0,disease,Unnamed: 2,abdominal cramps,abdominal pain,abnormal pap test results,abnormal vaginal bleeding,acanthamoebkeratitis,acute appendicitis,acute kidney injury (aki),addiction specialist,adenovirus,aerophobia,aflatoxicosis,alcoholism,allergic rhinitis,allergist,allergy shots,alzheimer's disease,an infection,anemia,antibiotics,antifungals,antimalarial drugs,antiviral medication,anxiety disorder,anxiety that interfere with daily life,aortic aneurysm,appendectomy,appendicitis,arthritis,asperger syndrome,asthma,atopic dermatitis,bacterial meningitis,bipolar disorder,bladder cancer,bladder habits,bloating,blood clot,blood in the sputum,blood in the stool,blood in the urine,blood transfusions,bloody stool,blurred vision,bronchitis,bulge in the wall of the aorta,burning,bursitis,but may include medication,but symptoms can be treated with rest,but there are treatments that can help manage the symptoms,but treatment may include supportive care,can make it difficult to breathe,cancer,cardiologist,celiac disease,cervical cancer,change in bowel habits,changes in appetite,changes in bowel,changes in personality or behavior,changes in urination,chemotherapy,chest pain,chest tightness,chickenpox,chills,cholesterol,chronic autoimmune disease that can affect any part of the body,chronic bronchitis,chronic kidney disease,chronic obstructive pulmonary disease (copd),colorectal cancer,combination of both,common cold,compression,compression stockings,condition in which the heart cannot pump blood as well as it should,condition that affects the lungs,confusion,congestion,conjunctivitis,constipation,coordination,cough,cough that does not go away,coughing,coughing up blood,crohn's disease,cryotherapy,cuts that are slow to heal,damage to the kidneys that can lead to kidney failure,death,decreased kidney function,dementia,depression,dermatologist,diabetes,diarrhea,diet,difficulty breathing,difficulty getting,difficulty speaking,difficulty swallowing,difficulty thinking,difficulty urinating,difficulty with balance,difficulty with social interaction,discharge from the penis or vagina,discharge from the vaginor penis,disturbance in eating behavior that can lead to serious health problems,dizziness,doctor,eating disorder,elevated body temperature,elevation,emergency medical services,emphysema,endocrinologist,energy levels,epilepsy,erectile dysfunction,excessive worry,exercise,extreme mood swings,fainting,family doctor,fatigue,fear,fever,fibromyalgi,fibromyalgia,flu,fluids,food poisoning,frequent urination,from manito depression,gallstones,gastroenterologist,glaucoma,gluten-free diet,gonorrhea,graves' disease,gynecologist,hashimoto's thyroiditis,headache,heart attack,heart disease,heart failure,hematologist,hepatitis,herpes,high (15%),high (20%),high (25%),high (70%),high blood pressure,high levels of cholesterol in the blood,hoarseness,hopelessness,hormone therapy,hydrophobia,hyperthyroidism,hypothyroidism,ice,impaired judgment,incontinence,increased pressure in the eye that can damage the optic nerve,increased thirst,indigestion,infection,inflamed skin,inflammation in bursa,inflammation in the eye,inflammation in the joints,inflammation of the liver,influenza,irritable bowel syndrome (ibs),itching,itchy,itchy eyes,itchy nose,jaundice,keeping an erection,kidney disease,kidney stones,laser therapy,lead to vision loss,lifestyle changes,lightheadedness,liver damage,liver transplantation,loose,loss of interest in activities,low (0.1%,low (0.1%),low (0.5%,low (0.5%),lump,lump in the neck,lung cancer,lung disease,lupus,malaria,measles,measles vaccine,medication,medication side effect,memory loss,memory problems,meningitis,moderate (1%,moderate (1%),mononucleosis,mouth,mucus production,muscle aches,narrowing of the stool,nausea,nephrologist,neurologist,neurosurgeon,night sweats,numbness,numbness on one side of the body,oncologist,ophthalmologist,orthopedist,other parts of the body,over-the-counter medications,oxygen therapy,pain,pain during sex,pain in the back or hips,pain in the lower abdomen,pain in the lower back or side,pain in the lower right abdomen,pain in the upper abdomen,pain in the upper right abdomen,pain or burning when urinating,painful rash that follows nerve path,painful urination,palate,pale skin,parkinson's disease,peptic ulcer disease,peripheral artery disease,persistent cough,physical dependence on alcohol,physical therapy,pink eye,pneumonia,prescription medications,prostate cancer,psychiatrist,psychologist,pulmonologist,rabies,rabies vaccine,radiation therapy,raised,rash,red,red eyes,redness,repetitive behaviors,rest,restricted interests,rheumatoid arthritis,rheumatologist,rough growths on the skin,runny nose,runny or stuffy nose,sadness,salmonella,seizures,sensitivity to cold,sensitivity to light,severe headache,sexually transmitted infection that can cause blisters on the genitals,sexually transmitted infection that can cause burning during urination,shingles,shortness of breath,sleep,slow movement,sneezing,sore throat,stiff neck,stiffness,stiffness in the joints,stomach cancer,stroke,such as asthma,such as clot-busting medication,such as dehydration,such as diarrhea,such as high blood pressure,sudden weakness,support groups,surgeon,surgery,sweating,swelling,swollen,swollen lymph nodes,tenderness,tenderness in the affected area,the largest artery in the body,therapy,there is no cure for aflatoxicosis,there is no cure for alzheimer's disease,there is no cure for asperger syndrome,there is no cure for dementia,there is no cure for herpes,there is no cure for mononucleosis,throat,thyroid cancer,tingling at the bite site,tingling in the arms or legs,tolerance to alcohol,treatment for alcoholism may include counseling,treatment for anemidepends on the type of anemia,treatment for anxiety disorders may include therapy,treatment for asthmmay include medication,treatment for bipolar disorder may include medication,treatment for blood clot may include medication,treatment for copd may include medication,treatment for depression may include medication,treatment for eating disorders may include therapy,treatment for heart disease may include medication,treatment for hepatitis may include medication,treatment for ibs may include diet,treatment for the underlying cause,tremors,trouble seeing in one eye,tuberculosis (tb),twisted veins,ulcerative colitis,unexplained bleeding or discharge,unexplained heat intolerance,unexplained weight loss,urgency to urinate,urgent care,urinary tract infection (uti),urologist,usually in the legs,varicose veins,varies,varies depending on the bursa,varies depending on the level of cholesterol,varies depending on the type of anemia,varies depending on the type of anxiety disorder,varies depending on the type of arthritis,varies depending on the type of cancer,varies depending on the type of copd,varies depending on the type of eating disorder,varies depending on the type of epilepsy,varies depending on the type of hepatitis,vascular surgeon,vomiting,warm compresses,warmth,warts,watery eyes,watery stools,weight loss,wheezing,which is an overactive thyroid,which is an underactive thyroid,which is small sac of fluid that cushions joints,widespread pain,withdrawal symptoms,yeast infection
0,flu,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,bronchitis,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,pneumonia,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,heart attack,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,stroke,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [57]:
df_encoded.shape

(99, 374)

In [58]:
disease_names = [key for key in label_mapping.keys()]
diseases = [strip_to_basic_tokens(disease) for disease in disease_names]
diseases_cleaned = [item[0] if isinstance(item, list) else item for item in diseases]
df_encoded = df_encoded.drop(diseases_cleaned, axis = 1)
df_encoded.shape

(99, 290)

In [60]:
model_features = df_encoded.columns.tolist()
model_features.remove("disease")
X = df_encoded[model_features]
y = df_encoded["disease"]

In [61]:
y_encoded = pd.get_dummies(y)
y_encoded.shape

(99, 88)

In [62]:
y_encoded.head()

Unnamed: 0,acanthamoebkeratitis,acute appendicitis,acute kidney injury (aki),adenovirus,aflatoxicosis,alcoholism,allergic rhinitis,alzheimer's disease,anemia,anxiety disorder,aortic aneurysm,appendicitis,arthritis,asperger syndrome,asthma,atopic dermatitis,bacterial meningitis,bacterial meningitis.1,bipolar disorder,bladder cancer,blood clot,bronchitis,bursitis,cancer,celiac disease,cervical cancer,cervical cancer.1,chickenpox,cholesterol,chronic kidney disease,chronic obstructive pulmonary disease (copd),colorectal cancer,common cold,conjunctivitis,crohn's disease,dementia,depression,diabetes,diarrhea,eating disorder,epilepsy,epilepsy.1,erectile dysfunction,fever,fibromyalgi,fibromyalgia,flu,food poisoning,gallstones,glaucoma,gonorrhea,graves' disease,hashimoto's thyroiditis,heart attack,heart disease,heart failure,hepatitis,herpes,influenza,irritable bowel syndrome (ibs),kidney disease,kidney stones,lung cancer,lung disease,lupus,malaria,measles,meningitis,mononucleosis,parkinson's disease,peptic ulcer disease,peripheral artery disease,pneumonia,prostate cancer,rabies,rheumatoid arthritis,salmonella,shingles,stomach cancer,stroke,thyroid cancer,tuberculosis (tb),ulcerative colitis,urinary tract infection (uti),varicose veins,warts,warts.1,yeast infection
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size = 0.25, random_state=42)
X_eval, X_test, y_eval, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)


In [64]:
X_train_tensor = tf.convert_to_tensor(X_train.values, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test.values, dtype=tf.float32)
X_eval_tensor = tf.convert_to_tensor(X_eval.values, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float64)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float64)
y_eval_tensor = tf.convert_to_tensor(y_eval, dtype=tf.float64)

In [65]:
X_train_tensor

<tf.Tensor: shape=(74, 289), dtype=float32, numpy=
array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)>

In [66]:
with tf.device('/GPU:0'):
    model_2 = keras.Sequential([
        layers.Input(shape=(X_train_tensor.shape[1],)),
        layers.Dense(160, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(200, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(240, activation='tanh'),
        layers.BatchNormalization(),
        layers.Dense(240, activation='tanh'),
        layers.Dropout(0.2),
        layers.Dense(200, activation='relu'),
        layers.Dropout(0.1),
        layers.Dense(160, activation='relu'),
        layers.Dense(y_train_tensor.shape[1], activation='softmax')])

    model_2.compile(optimizer= 'adam', loss='categorical_crossentropy', metrics=['accuracy'])
    early_stopping = EarlyStopping(monitor='val_accuracy', patience=4, mode='max')
    history = model_2.fit(X_train_tensor, y_train_tensor, epochs=500, callbacks=[early_stopping],
                batch_size=16, validation_data=(X_eval_tensor, y_eval_tensor))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500


In [67]:
model_2.evaluate(X_test_tensor, y_test_tensor)



[4.554289817810059, 0.07692307978868484]

In [69]:
# If you remember in the first model, we took a row from the origial data to test the model
# We aren't going to do this here, let's REALLY test it
user_input = ['stomach_pain','acidity','chest_pain'] # This should be GERD

original_data = df_encoded.copy()

# We will change the strip_to_basic_tokens function just a little bit to be able to deal with the user input
def strip_to_basic_tokens(symptoms):
    symptoms = [symptom.strip().lower().replace(' ', '_').replace('_', ' ') for symptom in symptoms]
    return [re.sub(r'\s+', ' ', symptom) for symptom in symptoms]
# Apply strip_to_basic_tokens function to user input
user_input_stripped = strip_to_basic_tokens(user_input)

# Initialize MultiLabelBinarizer with all symptoms
mlb = MultiLabelBinarizer(classes=df_encoded.columns)

# Fit and transform user input
user_input_encoded = pd.DataFrame(mlb.fit_transform([user_input_stripped]), columns=mlb.classes_)

# Concatenate user input with original data
final_user_input = pd.concat([pd.DataFrame(columns=original_data.columns), user_input_encoded], axis=0)
final_user_input = final_user_input.drop(['disease'],axis = 1)
# Print the final user input shape
final_user_input.head()

Unnamed: 0,Unnamed: 1,abdominal cramps,abdominal pain,abnormal pap test results,abnormal vaginal bleeding,addiction specialist,aerophobia,allergist,allergy shots,an infection,antibiotics,antifungals,antimalarial drugs,antiviral medication,anxiety that interfere with daily life,appendectomy,bladder habits,bloating,blood in the sputum,blood in the stool,blood in the urine,blood transfusions,bloody stool,blurred vision,bulge in the wall of the aorta,burning,but may include medication,but symptoms can be treated with rest,but there are treatments that can help manage the symptoms,but treatment may include supportive care,can make it difficult to breathe,cardiologist,change in bowel habits,changes in appetite,changes in bowel,changes in personality or behavior,changes in urination,chemotherapy,chest pain,chest tightness,chills,chronic autoimmune disease that can affect any part of the body,chronic bronchitis,combination of both,compression,compression stockings,condition in which the heart cannot pump blood as well as it should,condition that affects the lungs,confusion,congestion,constipation,coordination,cough,cough that does not go away,coughing,coughing up blood,cryotherapy,cuts that are slow to heal,damage to the kidneys that can lead to kidney failure,death,decreased kidney function,dermatologist,diet,difficulty breathing,difficulty getting,difficulty speaking,difficulty swallowing,difficulty thinking,difficulty urinating,difficulty with balance,difficulty with social interaction,discharge from the penis or vagina,discharge from the vaginor penis,disturbance in eating behavior that can lead to serious health problems,dizziness,doctor,elevated body temperature,elevation,emergency medical services,emphysema,endocrinologist,energy levels,excessive worry,exercise,extreme mood swings,fainting,family doctor,fatigue,fear,fluids,frequent urination,from manito depression,gastroenterologist,gluten-free diet,gynecologist,headache,hematologist,high (15%),high (20%),high (25%),high (70%),high blood pressure,high levels of cholesterol in the blood,hoarseness,hopelessness,hormone therapy,hydrophobia,hyperthyroidism,hypothyroidism,ice,impaired judgment,incontinence,increased pressure in the eye that can damage the optic nerve,increased thirst,indigestion,infection,inflamed skin,inflammation in bursa,inflammation in the eye,inflammation in the joints,inflammation of the liver,itching,itchy,itchy eyes,itchy nose,jaundice,keeping an erection,laser therapy,lead to vision loss,lifestyle changes,lightheadedness,liver damage,liver transplantation,loose,loss of interest in activities,low (0.1%,low (0.1%),low (0.5%,low (0.5%),lump,lump in the neck,measles vaccine,medication,medication side effect,memory loss,memory problems,moderate (1%,moderate (1%),mouth,mucus production,muscle aches,narrowing of the stool,nausea,nephrologist,neurologist,neurosurgeon,night sweats,numbness,numbness on one side of the body,oncologist,ophthalmologist,orthopedist,other parts of the body,over-the-counter medications,oxygen therapy,pain,pain during sex,pain in the back or hips,pain in the lower abdomen,pain in the lower back or side,pain in the lower right abdomen,pain in the upper abdomen,pain in the upper right abdomen,pain or burning when urinating,painful rash that follows nerve path,painful urination,palate,pale skin,persistent cough,physical dependence on alcohol,physical therapy,pink eye,prescription medications,psychiatrist,psychologist,pulmonologist,rabies vaccine,radiation therapy,raised,rash,red,red eyes,redness,repetitive behaviors,rest,restricted interests,rheumatologist,rough growths on the skin,runny nose,runny or stuffy nose,sadness,seizures,sensitivity to cold,sensitivity to light,severe headache,sexually transmitted infection that can cause blisters on the genitals,sexually transmitted infection that can cause burning during urination,shortness of breath,sleep,slow movement,sneezing,sore throat,stiff neck,stiffness,stiffness in the joints,such as asthma,such as clot-busting medication,such as dehydration,such as diarrhea,such as high blood pressure,sudden weakness,support groups,surgeon,surgery,sweating,swelling,swollen,swollen lymph nodes,tenderness,tenderness in the affected area,the largest artery in the body,therapy,there is no cure for aflatoxicosis,there is no cure for alzheimer's disease,there is no cure for asperger syndrome,there is no cure for dementia,there is no cure for herpes,there is no cure for mononucleosis,throat,tingling at the bite site,tingling in the arms or legs,tolerance to alcohol,treatment for alcoholism may include counseling,treatment for anemidepends on the type of anemia,treatment for anxiety disorders may include therapy,treatment for asthmmay include medication,treatment for bipolar disorder may include medication,treatment for blood clot may include medication,treatment for copd may include medication,treatment for depression may include medication,treatment for eating disorders may include therapy,treatment for heart disease may include medication,treatment for hepatitis may include medication,treatment for ibs may include diet,treatment for the underlying cause,tremors,trouble seeing in one eye,twisted veins,unexplained bleeding or discharge,unexplained heat intolerance,unexplained weight loss,urgency to urinate,urgent care,urologist,usually in the legs,varies,varies depending on the bursa,varies depending on the level of cholesterol,varies depending on the type of anemia,varies depending on the type of anxiety disorder,varies depending on the type of arthritis,varies depending on the type of cancer,varies depending on the type of copd,varies depending on the type of eating disorder,varies depending on the type of epilepsy,varies depending on the type of hepatitis,vascular surgeon,vomiting,warm compresses,warmth,watery eyes,watery stools,weight loss,wheezing,which is an overactive thyroid,which is an underactive thyroid,which is small sac of fluid that cushions joints,widespread pain,withdrawal symptoms
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [70]:
user_tensor = tf.convert_to_tensor(final_user_input.values, dtype=tf.float32)
user_tensor[0]

<tf.Tensor: shape=(289,), dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [71]:
predict_proba = model_2.predict(user_tensor)
predicted_class_index = np.argmax(predict_proba)
prediction_encode = target_index[predicted_class_index]
inverse_label_encoding = {v: k for k, v in label_mapping.items()}
prediction = inverse_label_encoding[prediction_encode]
prediction



'pneumonia'

In [73]:
user_input = ['continuous_sneezing','watering_from_eyes'] # This should be Allergy

original_data = df_encoded.copy()

# Apply strip_to_basic_tokens function to user input
user_input_stripped = strip_to_basic_tokens(user_input)

# Fit and transform user input
user_input_encoded = pd.DataFrame(mlb.fit_transform([user_input_stripped]), columns=mlb.classes_)

# Concatenate user input with original data
final_user_input = pd.concat([pd.DataFrame(columns=original_data.columns), user_input_encoded], axis=0)
final_user_input = final_user_input.drop(['disease'],axis = 1)
# Print the final user input shape
final_user_input.head()

Unnamed: 0,Unnamed: 1,abdominal cramps,abdominal pain,abnormal pap test results,abnormal vaginal bleeding,addiction specialist,aerophobia,allergist,allergy shots,an infection,antibiotics,antifungals,antimalarial drugs,antiviral medication,anxiety that interfere with daily life,appendectomy,bladder habits,bloating,blood in the sputum,blood in the stool,blood in the urine,blood transfusions,bloody stool,blurred vision,bulge in the wall of the aorta,burning,but may include medication,but symptoms can be treated with rest,but there are treatments that can help manage the symptoms,but treatment may include supportive care,can make it difficult to breathe,cardiologist,change in bowel habits,changes in appetite,changes in bowel,changes in personality or behavior,changes in urination,chemotherapy,chest pain,chest tightness,chills,chronic autoimmune disease that can affect any part of the body,chronic bronchitis,combination of both,compression,compression stockings,condition in which the heart cannot pump blood as well as it should,condition that affects the lungs,confusion,congestion,constipation,coordination,cough,cough that does not go away,coughing,coughing up blood,cryotherapy,cuts that are slow to heal,damage to the kidneys that can lead to kidney failure,death,decreased kidney function,dermatologist,diet,difficulty breathing,difficulty getting,difficulty speaking,difficulty swallowing,difficulty thinking,difficulty urinating,difficulty with balance,difficulty with social interaction,discharge from the penis or vagina,discharge from the vaginor penis,disturbance in eating behavior that can lead to serious health problems,dizziness,doctor,elevated body temperature,elevation,emergency medical services,emphysema,endocrinologist,energy levels,excessive worry,exercise,extreme mood swings,fainting,family doctor,fatigue,fear,fluids,frequent urination,from manito depression,gastroenterologist,gluten-free diet,gynecologist,headache,hematologist,high (15%),high (20%),high (25%),high (70%),high blood pressure,high levels of cholesterol in the blood,hoarseness,hopelessness,hormone therapy,hydrophobia,hyperthyroidism,hypothyroidism,ice,impaired judgment,incontinence,increased pressure in the eye that can damage the optic nerve,increased thirst,indigestion,infection,inflamed skin,inflammation in bursa,inflammation in the eye,inflammation in the joints,inflammation of the liver,itching,itchy,itchy eyes,itchy nose,jaundice,keeping an erection,laser therapy,lead to vision loss,lifestyle changes,lightheadedness,liver damage,liver transplantation,loose,loss of interest in activities,low (0.1%,low (0.1%),low (0.5%,low (0.5%),lump,lump in the neck,measles vaccine,medication,medication side effect,memory loss,memory problems,moderate (1%,moderate (1%),mouth,mucus production,muscle aches,narrowing of the stool,nausea,nephrologist,neurologist,neurosurgeon,night sweats,numbness,numbness on one side of the body,oncologist,ophthalmologist,orthopedist,other parts of the body,over-the-counter medications,oxygen therapy,pain,pain during sex,pain in the back or hips,pain in the lower abdomen,pain in the lower back or side,pain in the lower right abdomen,pain in the upper abdomen,pain in the upper right abdomen,pain or burning when urinating,painful rash that follows nerve path,painful urination,palate,pale skin,persistent cough,physical dependence on alcohol,physical therapy,pink eye,prescription medications,psychiatrist,psychologist,pulmonologist,rabies vaccine,radiation therapy,raised,rash,red,red eyes,redness,repetitive behaviors,rest,restricted interests,rheumatologist,rough growths on the skin,runny nose,runny or stuffy nose,sadness,seizures,sensitivity to cold,sensitivity to light,severe headache,sexually transmitted infection that can cause blisters on the genitals,sexually transmitted infection that can cause burning during urination,shortness of breath,sleep,slow movement,sneezing,sore throat,stiff neck,stiffness,stiffness in the joints,such as asthma,such as clot-busting medication,such as dehydration,such as diarrhea,such as high blood pressure,sudden weakness,support groups,surgeon,surgery,sweating,swelling,swollen,swollen lymph nodes,tenderness,tenderness in the affected area,the largest artery in the body,therapy,there is no cure for aflatoxicosis,there is no cure for alzheimer's disease,there is no cure for asperger syndrome,there is no cure for dementia,there is no cure for herpes,there is no cure for mononucleosis,throat,tingling at the bite site,tingling in the arms or legs,tolerance to alcohol,treatment for alcoholism may include counseling,treatment for anemidepends on the type of anemia,treatment for anxiety disorders may include therapy,treatment for asthmmay include medication,treatment for bipolar disorder may include medication,treatment for blood clot may include medication,treatment for copd may include medication,treatment for depression may include medication,treatment for eating disorders may include therapy,treatment for heart disease may include medication,treatment for hepatitis may include medication,treatment for ibs may include diet,treatment for the underlying cause,tremors,trouble seeing in one eye,twisted veins,unexplained bleeding or discharge,unexplained heat intolerance,unexplained weight loss,urgency to urinate,urgent care,urologist,usually in the legs,varies,varies depending on the bursa,varies depending on the level of cholesterol,varies depending on the type of anemia,varies depending on the type of anxiety disorder,varies depending on the type of arthritis,varies depending on the type of cancer,varies depending on the type of copd,varies depending on the type of eating disorder,varies depending on the type of epilepsy,varies depending on the type of hepatitis,vascular surgeon,vomiting,warm compresses,warmth,watery eyes,watery stools,weight loss,wheezing,which is an overactive thyroid,which is an underactive thyroid,which is small sac of fluid that cushions joints,widespread pain,withdrawal symptoms
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [74]:
user_tensor = tf.convert_to_tensor(final_user_input.values, dtype=tf.float32)
user_tensor[0]

<tf.Tensor: shape=(289,), dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0

In [75]:
predict_proba = model_2.predict(user_tensor)
predicted_class_index = np.argmax(predict_proba)
prediction_encode = target_index[predicted_class_index]
inverse_label_encoding = {v: k for k, v in label_mapping.items()}
prediction = inverse_label_encoding[prediction_encode]
prediction



"alzheimer's disease"