In [1]:
import pandas as pd
import numpy as np
import string
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.corpus import wordnet
from nltk import pos_tag
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [2]:
df1 = pd.read_csv("train.csv")

In [3]:
df1.head()

Unnamed: 0,Desc,Sub_Department
0,"It all started subtly, a little itch here and ...",Infectious Disease Department
1,"My skin has become a battleground, erupting in...",Infectious Disease Department
2,"It started subtly, a slight itch at the corner...",Infectious Disease Department
3,"It started with an itch, a subtle irritation o...",Infectious Disease Department
4,This relentless itching beneath my skin has tu...,Infectious Disease Department


In [4]:
df1.head()["Desc"]

0    It all started subtly, a little itch here and ...
1    My skin has become a battleground, erupting in...
2    It started subtly, a slight itch at the corner...
3    It started with an itch, a subtle irritation o...
4    This relentless itching beneath my skin has tu...
Name: Desc, dtype: object

In [5]:
dfp = df1.drop_duplicates(subset='Desc')
dfp.head()

Unnamed: 0,Desc,Sub_Department
0,"It all started subtly, a little itch here and ...",Infectious Disease Department
1,"My skin has become a battleground, erupting in...",Infectious Disease Department
2,"It started subtly, a slight itch at the corner...",Infectious Disease Department
3,"It started with an itch, a subtle irritation o...",Infectious Disease Department
4,This relentless itching beneath my skin has tu...,Infectious Disease Department


In [6]:
dfp.shape

(1712, 2)

In [7]:
dfp = dfp.dropna()
dfp.shape

(1712, 2)

In [8]:
stopwords_list = stopwords.words('english')

lemmatizer = WordNetLemmatizer()

def my_tokenizer(doc):
    words = word_tokenize(doc)
    
    pos_tags = pos_tag(words)
    
    non_stopwords = [w for w in pos_tags if not w[0].lower() in stopwords_list]
    
    non_punctuation = [w for w in non_stopwords if not w[0] in string.punctuation]
    
    lemmas = []
    for w in non_punctuation:
        if w[1].startswith('J'):
            pos = wordnet.ADJ
        elif w[1].startswith('V'):
            pos = wordnet.VERB
        elif w[1].startswith('N'):
            pos = wordnet.NOUN
        elif w[1].startswith('R'):
            pos = wordnet.ADV
        else:
            pos = wordnet.NOUN
        
        lemmas.append(lemmatizer.lemmatize(w[0], pos))

    return lemmas

In [9]:
tfidf_vectorizer = TfidfVectorizer(tokenizer=my_tokenizer)
tfidf_matrix = tfidf_vectorizer.fit_transform(tuple(dfp['Desc']))
print(tfidf_matrix.shape)

(1712, 6241)


In [10]:
import joblib

# Save TF-IDF vectorizer
tfidf_vectorizer_filename = 'tfidf_vectorizer_model.joblib'
joblib.dump(tfidf_vectorizer, tfidf_vectorizer_filename)

# Save cosine similarity model (tfidf_matrix)
tfidf_matrix_filename = 'tfidf_matrix_model.joblib'
joblib.dump(tfidf_matrix, tfidf_matrix_filename)

['tfidf_matrix_model.joblib']

In [11]:
# Function to use the saved models
def load_models(tfidf_vectorizer_filename, tfidf_matrix_filename):
    loaded_tfidf_vectorizer = joblib.load(tfidf_vectorizer_filename)
    loaded_tfidf_matrix = joblib.load(tfidf_matrix_filename)
    return loaded_tfidf_vectorizer, loaded_tfidf_matrix

In [12]:
# Example usage of loading the models
loaded_tfidf_vectorizer, loaded_tfidf_matrix = load_models(tfidf_vectorizer_filename, tfidf_matrix_filename)

def ask_question_using_saved_models(Desc):
    query_vect = loaded_tfidf_vectorizer.transform([Desc])
    similarity = cosine_similarity(query_vect, loaded_tfidf_matrix)
    max_similarity = np.argmax(similarity, axis=None)
    
    print('Your Description of symptoms:', Desc)
    print('Closest Description found:', dfp.iloc[max_similarity]['Desc'])
    print('Similarity: {:.2%}'.format(similarity[0, max_similarity]))
    print('Department name:', dfp.iloc[max_similarity]['Sub_Department'])

In [13]:
ask_question_using_saved_models("It started subtly, a slight itch at the corner of my nail, barely noticeable. I figured it was just dry skin, nothing to worry about. But over the next few days, the itch intensified, spreading to other nails, turning into a burning sensation. I looked closer and saw the telltale signs - my nails were thickening, yellowing, and starting to crumble. Panic began to set in. The discomfort soon spread beyond my nails, with a persistent cough racking my chest and a strange fatigue dragging me down. My breath became shallow, each inhale a struggle. My skin erupted in a fiery rash, spreading across my body like wildfire. Every swallow felt like sandpaper scraping my throat. Even tasting my favorite foods became an ordeal. The once vibrant world around me dimmed, overshadowed by the relentless march of the infection within.")

Your Description of symptoms: It started subtly, a slight itch at the corner of my nail, barely noticeable. I figured it was just dry skin, nothing to worry about. But over the next few days, the itch intensified, spreading to other nails, turning into a burning sensation. I looked closer and saw the telltale signs - my nails were thickening, yellowing, and starting to crumble. Panic began to set in. The discomfort soon spread beyond my nails, with a persistent cough racking my chest and a strange fatigue dragging me down. My breath became shallow, each inhale a struggle. My skin erupted in a fiery rash, spreading across my body like wildfire. Every swallow felt like sandpaper scraping my throat. Even tasting my favorite foods became an ordeal. The once vibrant world around me dimmed, overshadowed by the relentless march of the infection within.
Closest Description found: It started subtly, a slight itch at the corner of my nail, barely noticeable. I figured it was just dry skin, nothi

In [15]:
import tkinter as tk
from tkinter import ttk, scrolledtext
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Assuming loaded_tfidf_vectorizer, loaded_tfidf_matrix, dfp are already defined

class QuestionAnswerGUI:
    def __init__(self, root):
        self.root = root
        self.root.title("Hospital Department Recommendation System")

        # Configure a style for colorful elements
        style = ttk.Style()
        style.configure("TButton", foreground="black", background="blue")
        style.configure("TLabel", foreground="green")

        # Create and set up GUI elements
        self.label = ttk.Label(root, text="Enter your Description of symptoms:", style="TLabel")
        self.label.pack(pady=10)
        custom_font = ('Arial', 22)

        # Use scrolledtext for scrollable entry
        self.question_entry = scrolledtext.ScrolledText(root, font=custom_font, width=100, height=10)
        self.question_entry.pack(pady=50, padx=10)

        self.ask_button = ttk.Button(root, text="Ask for Department Recommendation", command=self.ask_question, style="TButton")
        self.ask_button.pack(pady=10)

        # Use scrolledtext for scrollable output
        self.output_text = scrolledtext.ScrolledText(root, font=custom_font, width=100, height=10)
        self.output_text.pack(pady=10, padx=10)

    def ask_question(self):
        Desc = self.question_entry.get("1.0", tk.END).strip()  # Get text from the scrolledtext widget

        if Desc:
            query_vect = loaded_tfidf_vectorizer.transform([Desc])
            similarity = cosine_similarity(query_vect, loaded_tfidf_matrix)
            max_similarity = np.argmax(similarity, axis=None)

            output_text = f'Closest Description found: {dfp.iloc[max_similarity]["Desc"]}\n' \
                          f'Similarity: {similarity[0, max_similarity]:.2%}\n' \
                          f'Department Recommended: {dfp.iloc[max_similarity]["Sub_Department"]}'
            self.output_text.config(state=tk.NORMAL)  # Enable editing
            self.output_text.delete("1.0", tk.END)  # Clear previous text
            self.output_text.insert(tk.END, output_text)  # Insert new text
            self.output_text.config(state=tk.DISABLED)  # Disable editing
        else:
            self.output_text.config(state=tk.NORMAL)
            self.output_text.delete("1.0", tk.END)
            self.output_text.insert(tk.END, "Please enter a Description of the symptoms.")
            self.output_text.config(state=tk.DISABLED)

# Create an instance of the GUI
root = tk.Tk()
app = QuestionAnswerGUI(root)

# Run the GUI loop
root.mainloop()
