In [1]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize 
from pattern.en import suggest
import re
from nltk.corpus import stopwords

In [2]:
#Step 1 : Tokenize
def tokenize(text):
    words = word_tokenize(text)
    return words

#Step 2 : Correct words where alphabets are repeated unncessarily
#amazzzzing => amazzing
def reduce_lengthening(text):
    pattern = re.compile(r"(.)\1{2,}")
    return pattern.sub(r"\1\1", text)

#Step 3: Remove punctuation
def remove_punctuation(text):
    #without_punct = text.translate(str.maketrans("",""), str.punctuation)
    without_punct = re.sub(r'\W',' ',text)#remove non word characters
    return without_punct

#Step 4: Remove URLs and user mentions
def remove_user_mentions_urls(text):
    text = re.sub(r"(?:\@|https?\://)\S+", "", text)
    return text

#Step 5: Remove numbers
def remove_numbers(text):
    text = re.sub(r'\d',' ',text)
    return text

#Step 6: Remove stop words
def remove_stop_words(text):
    words = tokenize(text)
    new =[]
    for word in words:
        if word not in stopwords.words('english'):
            new.append(word)
    text = ' '.join(new)
    return text

#Text cleaning pipeline
def clean_text(text):
    text = reduce_lengthening(text)
    text = remove_user_mentions_urls(text)
    text = remove_punctuation(text)
    text = remove_stop_words(text)
    return text




In [3]:
df1 = pd.read_csv("df_diseases.csv")
df1.shape

(1182, 10)

In [4]:
df1_clean = df1.dropna(subset=['symptoms'])
df1_clean.shape

(1098, 10)

<h3>Clean symptoms</h3>

In [5]:
df1_clean['clean_symptoms'] = df1_clean.apply(lambda x: clean_text(x["symptoms"]),axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [6]:
df1_clean.head()

Unnamed: 0.1,Unnamed: 0,name,link,symptoms,causes,risk_factor,overview,treatment,medication,home_remedies,clean_symptoms
0,0,Acanthosis nigricans,https://www.mayoclinic.org/diseases-conditions...,"[""Skin changes are the only signs of acanthosi...",['Acanthosis nigricans has been associated wit...,"['Acanthosis nigricans risk factors include:',...",['Acanthosis nigricans is a skin condition tha...,"['In many situations, treating the underlying ...",,,Skin changes signs acanthosis nigricans You no...
2,2,Achilles tendon rupture,https://www.mayoclinic.org/diseases-conditions...,"[""Although it's possible to have no signs or s...",['Your Achilles tendon helps you point your fo...,['Factors that may increase your risk of Achil...,['The Achilles tendon is a strong fibrous cord...,['Treatment for a ruptured Achilles tendon oft...,,,Although possible signs symptoms Achilles tend...
3,3,Acute coronary syndrome,https://www.mayoclinic.org/diseases-conditions...,['The signs and symptoms of acute coronary syn...,['Acute coronary syndrome usually results from...,['The risk factors for acute coronary syndrome...,['Acute coronary syndrome is a term used to de...,['The immediate goals of treatment for acute c...,"['Depending on your diagnosis, medications for...",['Heart healthy lifestyle changes are an impor...,The signs symptoms acute coronary syndrome usu...
4,4,Adenomyosis,https://www.mayoclinic.org/diseases-conditions...,"['Sometimes, adenomyosis causes no signs or sy...","[""The cause of adenomyosis isn't known. There ...","['Risk factors for adenomyosis include:', 'Mos...","['With adenomyosis, the same tissue that lines...","['Adenomyosis often goes away after menopause,...",,['To ease pelvic pain and cramping related to ...,Sometimes adenomyosis causes signs symptoms mi...
5,5,Adjustment disorders,https://www.mayoclinic.org/diseases-conditions...,['Signs and symptoms depend on the type of adj...,['Adjustment disorders are caused by significa...,['Some things may make you more likely to have...,['Adjustment disorders are stress-related cond...,['Many people with adjustment disorders find t...,['Medications such as antidepressants and anti...,['Here are some steps you can take to care for...,Signs symptoms depend type adjustment disorder...


In [7]:
df1_clean.to_csv("diseases_dataset.csv")

<h3>Re-evaluate for each query</h3>

In [37]:
df1_clean = pd.read_csv("diseases_dataset.csv")
#clean query
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

query = "ringing in ears, decreased hearing and giddiness"

clean_query = clean_text(query)

def get_similarity(clean_query,x):
    train_set = [clean_query,x['clean_symptoms']]
    tfidf_matrix_train   = TfidfVectorizer().fit_transform(train_set)
    score = cosine_similarity(tfidf_matrix_train[0:1], tfidf_matrix_train)[0][1]
    return score


In [38]:
df1_clean['score'] = df1_clean.apply(lambda x:get_similarity(clean_query,x),axis=1 )

In [39]:
ans = df1_clean.nlargest(10, ['score'])

In [40]:
ans


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,name,link,symptoms,causes,risk_factor,overview,treatment,medication,home_remedies,clean_symptoms,score
196,222,222,Earwax blockage,https://www.mayoclinic.org/diseases-conditions...,['Signs and symptoms of earwax blockage may in...,['The wax in your ears is secreted by glands i...,,['Earwax blockage occurs when earwax (cerumen)...,['\n Earwax removal by a doctorEarwax r...,,"[""If your eardrum doesn't contain a tube or ha...",Signs symptoms earwax blockage may include If ...,0.137905
58,68,68,Barotrauma (See: Airplane ear),https://www.mayoclinic.org/diseases-conditions...,['Airplane ear can occur in one or both ears. ...,['Airplane ear happens when there is an imbala...,['Any condition that blocks the eustachian tub...,"[""Airplane ear (ear barotrauma) is the stress ...","['For most people, airplane ear usually heals ...","['Your doctor might suggest you take:', 'To ea...",,Airplane ear occur one ears Common signs sympt...,0.137785
955,1031,1031,Tinnitus,https://www.mayoclinic.org/diseases-conditions...,['Tinnitus involves the sensation of hearing s...,['A number of health conditions can cause or w...,"['Anyone can experience tinnitus, but these fa...",['Tinnitus can be caused by broken or damaged ...,[],"[""Drugs can't cure tinnitus, but in some cases...","[""Often, tinnitus can't be treated. Some peopl...",Tinnitus involves sensation hearing sound exte...,0.134487
663,726,726,Presbycusis (See: Hearing loss),https://www.mayoclinic.org/diseases-conditions...,['Signs and symptoms of hearing loss may inclu...,"['To understand how hearing loss occurs, it ca...",['Factors that may damage or lead to loss of t...,['Hearing loss that occurs gradually as you ag...,['\n Hearing aid partsHearing aid parts...,,,Signs symptoms hearing loss may include If sud...,0.132771
466,512,512,Meniere's disease,https://www.mayoclinic.org/diseases-conditions...,"[""Signs and symptoms of Meniere's disease incl...",['Semicircular canals and otolith organs — the...,,"[""Meniere's disease is a disorder of the inner...","[""No cure exists for Meniere's disease. A numb...",['Your doctor may prescribe medications to tak...,"[""Certain self-care tactics can help reduce th...",Signs symptoms Meniere disease include After e...,0.094296
846,918,918,Spinal headaches,https://www.mayoclinic.org/diseases-conditions...,"['Spinal headache symptoms include:', 'Spinal ...",['Spinal headaches are caused by leakage of sp...,"['Risk factors for spinal headaches include:',...",['Spinal headaches occur in up to 40 percent o...,['Treatment for spinal headaches begins conser...,,,Spinal headache symptoms include Spinal headac...,0.091957
1053,1135,1135,Vestibular schwannoma (See: Acoustic neuroma),https://www.mayoclinic.org/diseases-conditions...,"[""Signs and symptoms of acoustic neuroma are o...",['The cause of acoustic neuromas appears to be...,"['In an autosomal dominant disorder, the mutat...",['An acoustic neuroma (vestibular schwannoma) ...,['Surgical removal of the tumor by an experien...,,,Signs symptoms acoustic neuroma often subtle m...,0.091586
517,567,567,Nasopharyngeal carcinoma,https://www.mayoclinic.org/diseases-conditions...,"['In its early stages, nasopharyngeal carcinom...",['Cancer begins when one or more genetic mutat...,['Researchers have identified some factors tha...,['The throat (pharynx) is a muscular tube that...,"[""You and your doctor work together to devise ...",,['Radiation therapy for nasopharyngeal carcino...,In early stages nasopharyngeal carcinoma may c...,0.085572
748,816,816,Ruptured eardrum (perforated eardrum),https://www.mayoclinic.org/diseases-conditions...,['Signs and symptoms of a ruptured eardrum may...,['The middle ear includes three small bones — ...,,['A ruptured (perforated) eardrum prevents the...,['\n TympanoplastyTympanoplastyIn some ...,,['A ruptured (perforated) eardrum usually heal...,Signs symptoms ruptured eardrum may include Ca...,0.084173
467,513,513,Meningioma,https://www.mayoclinic.org/diseases-conditions...,['Signs and symptoms of a meningioma typically...,"[""It isn't clear what causes a meningioma. Doc...","['Risk factors for a meningioma include:', 'Ra...",['Three layers of membranes known as meninges ...,['The treatment you receive for a meningioma d...,,,Signs symptoms meningioma typically begin grad...,0.059124
