## Disease Detection from symptoms

Source

https://www.kaggle.com/datasets/niyarrbarman/symptom2disease

Libraries

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [15]:
df=pd.read_csv('/content/Symptom2Disease.csv', index_col=0)
df.head(20)

Unnamed: 0,label,text
0,Psoriasis,I have been experiencing a skin rash on my arm...
1,Psoriasis,"My skin has been peeling, especially on my kne..."
2,Psoriasis,I have been experiencing joint pain in my fing...
3,Psoriasis,"There is a silver like dusting on my skin, esp..."
4,Psoriasis,"My nails have small dents or pits in them, and..."
5,Psoriasis,The skin on my palms and soles is thickened an...
6,Psoriasis,"The skin around my mouth, nose, and eyes is re..."
7,Psoriasis,My skin is very sensitive and reacts easily to...
8,Psoriasis,I have noticed a sudden peeling of skin at dif...
9,Psoriasis,The skin on my genitals is red and inflamed. I...


In [16]:
df.label.unique()

array(['Psoriasis', 'Varicose Veins', 'Typhoid', 'Chicken pox',
       'Impetigo', 'Dengue', 'Fungal infection', 'Common Cold',
       'Pneumonia', 'Dimorphic Hemorrhoids', 'Arthritis', 'Acne',
       'Bronchial Asthma', 'Hypertension', 'Migraine',
       'Cervical spondylosis', 'Jaundice', 'Malaria',
       'urinary tract infection', 'allergy',
       'gastroesophageal reflux disease', 'drug reaction',
       'peptic ulcer disease', 'diabetes'], dtype=object)

In [18]:
df['label_num']=df.label.map(dict(zip(df.label.unique(),pd.Series([i for i in range(24)]))))
df.head(20)

Unnamed: 0,label,text,label_num
0,Psoriasis,I have been experiencing a skin rash on my arm...,0
1,Psoriasis,"My skin has been peeling, especially on my kne...",0
2,Psoriasis,I have been experiencing joint pain in my fing...,0
3,Psoriasis,"There is a silver like dusting on my skin, esp...",0
4,Psoriasis,"My nails have small dents or pits in them, and...",0
5,Psoriasis,The skin on my palms and soles is thickened an...,0
6,Psoriasis,"The skin around my mouth, nose, and eyes is re...",0
7,Psoriasis,My skin is very sensitive and reacts easily to...,0
8,Psoriasis,I have noticed a sudden peeling of skin at dif...,0
9,Psoriasis,The skin on my genitals is red and inflamed. I...,0


In [19]:
lookup=dict(zip(df.label.unique(),pd.Series([i for i in range(24)])))
lookup

{'Psoriasis': 0,
 'Varicose Veins': 1,
 'Typhoid': 2,
 'Chicken pox': 3,
 'Impetigo': 4,
 'Dengue': 5,
 'Fungal infection': 6,
 'Common Cold': 7,
 'Pneumonia': 8,
 'Dimorphic Hemorrhoids': 9,
 'Arthritis': 10,
 'Acne': 11,
 'Bronchial Asthma': 12,
 'Hypertension': 13,
 'Migraine': 14,
 'Cervical spondylosis': 15,
 'Jaundice': 16,
 'Malaria': 17,
 'urinary tract infection': 18,
 'allergy': 19,
 'gastroesophageal reflux disease': 20,
 'drug reaction': 21,
 'peptic ulcer disease': 22,
 'diabetes': 23}

In [24]:
import spacy.cli
spacy.cli.download("en_core_web_lg")
import en_core_web_lg
nlp = en_core_web_lg.load()

[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_lg')


In [32]:
import spacy

nlp=spacy.load('en_core_web_lg')

def preprocess(text):
  list=[]
  for token in nlp(text):
    if token.is_punct or token.is_space:
      continue
    list.append(token.lemma_)
  return ' '.join(list)

In [33]:
df['preprocess']=df['text'].apply(preprocess)
df.head(10)

Unnamed: 0,label,text,label_num,preprocess
0,Psoriasis,I have been experiencing a skin rash on my arm...,0,I have be experience a skin rash on my arm leg...
1,Psoriasis,"My skin has been peeling, especially on my kne...",0,my skin have be peel especially on my knee elb...
2,Psoriasis,I have been experiencing joint pain in my fing...,0,I have be experience joint pain in my finger w...
3,Psoriasis,"There is a silver like dusting on my skin, esp...",0,there be a silver like dust on my skin especia...
4,Psoriasis,"My nails have small dents or pits in them, and...",0,my nail have small dent or pit in they and the...
5,Psoriasis,The skin on my palms and soles is thickened an...,0,the skin on my palm and sol be thicken and hav...
6,Psoriasis,"The skin around my mouth, nose, and eyes is re...",0,the skin around my mouth nose and eye be red a...
7,Psoriasis,My skin is very sensitive and reacts easily to...,0,my skin be very sensitive and react easily to ...
8,Psoriasis,I have noticed a sudden peeling of skin at dif...,0,I have notice a sudden peeling of skin at diff...
9,Psoriasis,The skin on my genitals is red and inflamed. I...,0,the skin on my genital be red and inflame it b...


In [34]:
df['vector']=df.preprocess.apply(lambda text: nlp(text).vector)
df.head()

Unnamed: 0,label,text,label_num,preprocess,vector
0,Psoriasis,I have been experiencing a skin rash on my arm...,0,I have be experience a skin rash on my arm leg...,"[-1.0520097, 2.0068107, -3.1425354, 1.3052415,..."
1,Psoriasis,"My skin has been peeling, especially on my kne...",0,my skin have be peel especially on my knee elb...,"[-1.3448839, 1.2253065, -4.0693727, 0.5828706,..."
2,Psoriasis,I have been experiencing joint pain in my fing...,0,I have be experience joint pain in my finger w...,"[-0.039949566, 1.4880179, -3.136055, 0.0100825..."
3,Psoriasis,"There is a silver like dusting on my skin, esp...",0,there be a silver like dust on my skin especia...,"[-1.5876127, 1.5015007, -3.8816297, 1.3765275,..."
4,Psoriasis,"My nails have small dents or pits in them, and...",0,my nail have small dent or pit in they and the...,"[-0.80246216, 2.5354614, -4.1618123, -0.066844..."


In [36]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train, y_test=train_test_split(
  df['vector'].values, df['label_num'], test_size=0.2, stratify=df['label_num'], random_state=122
)

In [40]:
X_train_d=np.stack(X_train)
X_test_d=np.stack(X_test)

In [41]:
from sklearn.preprocessing import MinMaxScaler
scaler =MinMaxScaler()
X_train_scaled=scaler.fit_transform(X_train_d)
X_test_scaled=scaler.transform(X_test_d)



In [42]:
from sklearn.svm import SVC

model=SVC(C=1, kernel='poly', degree=3, gamma='scale')
model.fit(X_train_scaled, y_train)
y_pred=model.predict(X_test_scaled)

In [45]:
from sklearn.metrics import classification_report
print('\n\n\n\n Classification Raport :\n\n\n', classification_report(y_test,y_pred))





 Classification Raport :


               precision    recall  f1-score   support

           0       0.89      0.80      0.84        10
           1       0.83      1.00      0.91        10
           2       0.75      0.60      0.67        10
           3       0.90      0.90      0.90        10
           4       1.00      0.90      0.95        10
           5       0.78      0.70      0.74        10
           6       1.00      1.00      1.00        10
           7       0.83      1.00      0.91        10
           8       1.00      1.00      1.00        10
           9       0.91      1.00      0.95        10
          10       1.00      1.00      1.00        10
          11       1.00      1.00      1.00        10
          12       1.00      0.90      0.95        10
          13       1.00      1.00      1.00        10
          14       1.00      1.00      1.00        10
          15       1.00      1.00      1.00        10
          16       1.00      1.00      1.00      

Testing of  prediction

In [46]:
testing_txt="There is a silver like dusting on my skin"
tp=preprocess(testing_txt)

In [48]:
tp=nlp(tp).vector

In [49]:
tp=tp.reshape(1,-1)

In [50]:
tp=scaler.transform(tp)

In [52]:
pred=model.predict(tp)
print(pred)

[11]


In [53]:
value=[i for i in lookup if lookup[i]==pred[0]]
print(value[0])


Acne


In [54]:
import pickle

with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)


with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
