# **disease model**

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [3]:
disease_data_file = '/content/Final_Augmented_dataset_Diseases_and_Symptoms.csv (1).zip'
disease_data = pd.read_csv(disease_data_file)
disease_data.describe()

disease_data.head()

Unnamed: 0,diseases,anxiety and nervousness,depression,shortness of breath,depressive or psychotic symptoms,sharp chest pain,dizziness,insomnia,abnormal involuntary movements,chest tightness,...,stuttering or stammering,problems with orgasm,nose deformity,lump over jaw,sore in nose,hip weakness,back swelling,ankle stiffness or tightness,ankle weakness,neck weakness
0,panic disorder,1,0,1,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,panic disorder,0,0,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,panic disorder,1,1,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,panic disorder,1,0,0,1,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
4,panic disorder,1,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [6]:
import pandas as pd
import numpy as np
import json
from sklearn.preprocessing import LabelEncoder

# ===== MAIN FUNCTION: ENCODE DISEASES AND CREATE COMPLETE RECORD =====
def encode_diseases_complete(df, disease_column='diseases', save_files=True):

    print("Starting disease encoding process...")
    print(f"Original data shape: {df.shape}")
    print(f"Disease column: '{disease_column}'")

    # Check if disease column exists
    if disease_column not in df.columns:
        raise ValueError(f"Column '{disease_column}' not found in DataFrame")

    # Create label encoder
    encoder = LabelEncoder()

    # Create copy of DataFrame
    encoded_df = df.copy()

    # Get unique diseases before encoding
    unique_diseases = df[disease_column].unique()
    print(f"Found {len(unique_diseases)} unique diseases: {list(unique_diseases)}")

    # Encode diseases
    encoded_df[f'{disease_column}_encoded'] = encoder.fit_transform(encoded_df[disease_column])

    # Create comprehensive mapping dictionaries
    disease_to_number = {disease: int(code) for code, disease in enumerate(encoder.classes_)}
    number_to_disease = {int(code): disease for code, disease in enumerate(encoder.classes_)}

    # Create complete mapping record
    mappings = {
        'disease_to_number': disease_to_number,
        'number_to_disease': number_to_disease,
        'encoder_classes': encoder.classes_.tolist(),
        'total_diseases': len(unique_diseases),
        'encoding_info': {
            'method': 'LabelEncoder',
            'range': f'0 to {len(unique_diseases)-1}',
            'total_records': len(df)
        }
    }

    # Display mapping information
    print("\n" + "="*50)
    print("DISEASE ENCODING MAPPING")
    print("="*50)
    for disease, code in disease_to_number.items():
        print(f"  '{disease}' → {code}")

    # Show distribution
    print("\n" + "="*50)
    print("DISEASE DISTRIBUTION")
    print("="*50)
    distribution = encoded_df[f'{disease_column}_encoded'].value_counts().sort_index()
    for code, count in distribution.items():
        disease_name = number_to_disease[code]
        percentage = (count / len(encoded_df)) * 100
        print(f"  Code {code} ({disease_name}): {count} records ({percentage:.1f}%)")

    # Save files if requested
    if save_files:
        # Save mapping as JSON
        with open('disease_encoding_mapping.json', 'w') as f:
            json.dump(mappings, f, indent=2)

        # Save mapping as CSV
        mapping_df = pd.DataFrame({
            'Disease_Name': list(disease_to_number.keys()),
            'Disease_Code': list(disease_to_number.values())
        })
        mapping_df.to_csv('disease_encoding_mapping.csv', index=False)

        # Save encoded dataframe
        encoded_df.to_csv('data_with_encoded_diseases.csv', index=False)

        print("\n✓ Files saved:")
        print("  - disease_encoding_mapping.json")
        print("  - disease_encoding_mapping.csv")
        print("  - data_with_encoded_diseases.csv")

    return encoded_df, mappings

# ===== GROUPING FUNCTION WITH ENCODED DISEASES =====
def group_by_encoded_diseases(encoded_df, mappings, disease_column='diseases'):
    """
    Group data by encoded diseases and calculate averages
    """
    print("\n" + "="*50)
    print("GROUPING BY ENCODED DISEASES")
    print("="*50)

    # Select only numeric columns for grouping
    numeric_columns = encoded_df.select_dtypes(include=[np.number]).columns.tolist()

    # Remove the encoded disease column from the grouping calculation
    symptom_columns = [col for col in numeric_columns if not col.endswith('_encoded')]

    print(f"Grouping by: {disease_column}_encoded")
    print(f"Calculating averages for: {symptom_columns}")

    # Group by encoded diseases
    grouped_data = encoded_df[symptom_columns].groupby(encoded_df[f'{disease_column}_encoded']).mean().round(3)

    # Create version with disease names for reference
    grouped_with_names = grouped_data.copy()
    grouped_with_names.index = [f"Code_{idx}_({mappings['number_to_disease'][idx]})"
                               for idx in grouped_with_names.index]

    print("\nGrouped data by encoded diseases:")
    print(grouped_data)

    # Save grouped data
    grouped_data.to_csv('grouped_data_by_encoded_diseases.csv')
    grouped_with_names.to_csv('grouped_data_with_disease_names.csv')

    print("\n✓ Grouped data saved:")
    print("  - grouped_data_by_encoded_diseases.csv")
    print("  - grouped_data_with_disease_names.csv")

    return grouped_data, grouped_with_names

# ===== UTILITY FUNCTIONS =====
def load_disease_mapping(json_file='disease_encoding_mapping.json'):
    """Load disease mapping from saved JSON file"""
    with open(json_file, 'r') as f:
        return json.load(f)

def disease_name_to_code(disease_name, mappings):
    """Convert disease name to code"""
    return mappings['disease_to_number'].get(disease_name, None)

def disease_code_to_name(code, mappings):
    """Convert disease code to name"""
    return mappings['number_to_disease'].get(code, None)

def show_complete_mapping_info(mappings):
    """Display complete mapping information"""
    print("="*50)
    print("COMPLETE DISEASE MAPPING INFORMATION")
    print("="*50)
    print(f"Total diseases: {mappings['total_diseases']}")
    print(f"Encoding method: {mappings['encoding_info']['method']}")
    print(f"Code range: {mappings['encoding_info']['range']}")
    print(f"Total records: {mappings['encoding_info']['total_records']}")
    print("\nComplete mapping:")
    for disease, code in mappings['disease_to_number'].items():
        print(f"  '{disease}' ↔ {code}")
df, mappings = encode_diseases_complete(disease_data)
grouped_data, grouped_with_names= group_by_encoded_diseases(df, mappings)
code = disease_name_to_code('panic disorder', mappings)
name = disease_code_to_name(0, mappings)
show_complete_mapping_info(mappings)

df.head()


Starting disease encoding process...
Original data shape: (246945, 378)
Disease column: 'diseases'
Found 773 unique diseases: ['panic disorder', 'vocal cord polyp', 'turner syndrome', 'cryptorchidism', 'poisoning due to ethylene glycol', 'atrophic vaginitis', 'fracture of the hand', 'cellulitis or abscess of mouth', 'eye alignment disorder', 'headache after lumbar puncture', 'pyloric stenosis', 'salivary gland disorder', 'osteochondrosis', 'injury to the knee', 'metabolic disorder', 'vaginitis', 'sick sinus syndrome', 'tinnitus of unknown cause', 'glaucoma', 'eating disorder', 'transient ischemic attack', 'pyelonephritis', 'rotator cuff injury', 'chronic pain disorder', 'problem during pregnancy', 'liver cancer', 'atelectasis', 'injury to the hand', 'choledocholithiasis', 'injury to the hip', 'cirrhosis', 'thoracic aortic aneurysm', 'subdural hemorrhage', 'diabetic retinopathy', 'fibromyalgia', 'ischemia of the bowel', 'fetal alcohol syndrome', 'peritonitis', 'injury to the abdomen', '

Unnamed: 0,diseases,anxiety and nervousness,depression,shortness of breath,depressive or psychotic symptoms,sharp chest pain,dizziness,insomnia,abnormal involuntary movements,chest tightness,...,problems with orgasm,nose deformity,lump over jaw,sore in nose,hip weakness,back swelling,ankle stiffness or tightness,ankle weakness,neck weakness,diseases_encoded
0,panic disorder,1,0,1,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,531
1,panic disorder,0,0,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,531
2,panic disorder,1,1,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,531
3,panic disorder,1,0,0,1,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,531
4,panic disorder,1,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,531


In [8]:

# Split the data into training and testing sets
X = df.drop(["diseases", 'diseases_encoded'],axis=1)
y = df["diseases_encoded"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
from sklearn.tree import DecisionTreeRegressor


disease_model = DecisionTreeRegressor(random_state=1)

# Fit model
disease_model.fit(X_train, y_train)
#model fitting abhi work nhi kar rhi hai to ab ek different approach se solve karenge

In [13]:
#rather than calculting rmse we need to calculate efficiency of our program so for that ill make a function for efficiency kyuyi error values
# ka hai but values to ek disease bata rhi hai
def efficiency_cal(y_test,predicted_disease):
  lst=list(y_test)
  predicted_disease=list(predicted_disease)
  count=0
  for i in range(len(lst)):
    if lst[i]==predicted_disease[i]:
      count+=1
  return count*100/len(lst)


In [14]:
from sklearn.metrics import mean_absolute_error
predicted_disease = disease_model.predict(X_test)
efficiency_cal(y_test,predicted_disease)



76.65067120208954

abhi 76% efficiency a rhi hai which is not very good considering we want a better model

In [15]:
def get_efficiency(max_leaf_nodes, train_X, val_X, train_y, val_y):
    model = DecisionTreeRegressor(max_leaf_nodes=max_leaf_nodes, random_state=1)
    model.fit(train_X, train_y)
    preds_val = model.predict(val_X)
    efficiency=efficiency_cal(val_y, preds_val)
    return(efficiency)


In [None]:
import numpy as np
for max_leaf_nodes in np.logspace(4.3,4.6 , 20, dtype=int): # Using a more realistic range for max_leaf_nodes
    if max_leaf_nodes > 1: # Ensure max_leaf_nodes is at least 2
        my_efficiency = get_efficiency(max_leaf_nodes, X_train, X_test, y_train, y_test)
        print(f"Max leaf nodes: {max_leaf_nodes}, Efficiency: {my_efficiency:.8f}%")

Max leaf nodes: 19952, Efficiency: 50.87165158%
Max leaf nodes: 20691, Efficiency: 51.59448460%
Max leaf nodes: 21457, Efficiency: 52.73846403%
Max leaf nodes: 22251, Efficiency: 53.86219604%
Max leaf nodes: 23075, Efficiency: 54.90291360%
Max leaf nodes: 23930, Efficiency: 56.00842293%
Max leaf nodes: 24816, Efficiency: 57.30628278%
Max leaf nodes: 25735, Efficiency: 58.79851789%
Max leaf nodes: 26687, Efficiency: 60.11257567%
Max leaf nodes: 27676, Efficiency: 61.76274069%
Max leaf nodes: 28700, Efficiency: 63.20233250%


In [18]:
final_model=DecisionTreeRegressor(random_state=1)
final_model.fit(X,y)

curve fitting also suggests that maximum efficiency can only go till 76% idk why but ab jo hai so hai

### **now we will start making the text understanding model and in the end we will combine both of the models**

**our ppt involves urgency classifier as well. toh koi aisa model bhi dhund sako toh dhund lo jo yeh decide kar sake , nhi toh koi simple rule ke according hi kar de? Aur haa final output ko presentable bhi banana hai aur haa upar vale ki efficiency badhane ka toh sochna hi hai**

In [19]:
symptom_list=list(X.columns)
symptom_list

['anxiety and nervousness',
 'depression',
 'shortness of breath',
 'depressive or psychotic symptoms',
 'sharp chest pain',
 'dizziness',
 'insomnia',
 'abnormal involuntary movements',
 'chest tightness',
 'palpitations',
 'irregular heartbeat',
 'breathing fast',
 'hoarse voice',
 'sore throat',
 'difficulty speaking',
 'cough',
 'nasal congestion',
 'throat swelling',
 'diminished hearing',
 'lump in throat',
 'throat feels tight',
 'difficulty in swallowing',
 'skin swelling',
 'retention of urine',
 'groin mass',
 'leg pain',
 'hip pain',
 'suprapubic pain',
 'blood in stool',
 'lack of growth',
 'emotional symptoms',
 'elbow weakness',
 'back weakness',
 'pus in sputum',
 'symptoms of the scrotum and testes',
 'swelling of scrotum',
 'pain in testicles',
 'flatulence',
 'pus draining from ear',
 'jaundice',
 'mass in scrotum',
 'white discharge from eye',
 'irritable infant',
 'abusing alcohol',
 'fainting',
 'hostile behavior',
 'drug abuse',
 'sharp abdominal pain',
 'feeling 

In [20]:
!pip install fuzzywuzzy python-Levenshtein

Collecting fuzzywuzzy
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl.metadata (4.9 kB)
Collecting python-Levenshtein
  Downloading python_levenshtein-0.27.1-py3-none-any.whl.metadata (3.7 kB)
Collecting Levenshtein==0.27.1 (from python-Levenshtein)
  Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting rapidfuzz<4.0.0,>=3.9.0 (from Levenshtein==0.27.1->python-Levenshtein)
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Downloading python_levenshtein-0.27.1-py3-none-any.whl (9.4 kB)
Downloading levenshtein-0.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (161 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m161.7/161.7 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)


In [21]:
import re
import numpy as np
import pandas as pd
from fuzzywuzzy import fuzz, process
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import warnings
warnings.filterwarnings('ignore')

class AdvancedSymptomExtractor:
    """
    Advanced Symptom Extraction Model that uses multiple NLP techniques
    to identify symptoms from complex sentences and generate binary vectors.
    """

    def __init__(self, symptoms_list, similarity_threshold=0.6, fuzzy_threshold=75):
        """
        Initialize the symptom extractor

        Args:
            symptoms_list: List of symptom strings to detect
            similarity_threshold: Threshold for TF-IDF similarity (0-1)
            fuzzy_threshold: Threshold for fuzzy matching (0-100)
        """
        self.symptoms_list = symptoms_list
        self.n_symptoms = len(symptoms_list)
        self.similarity_threshold = similarity_threshold
        self.fuzzy_threshold = fuzzy_threshold

        # Initialize TF-IDF vectorizer for semantic similarity
        self.tfidf_vectorizer = TfidfVectorizer(
            lowercase=True,
            stop_words='english',
            ngram_range=(1, 3),
            max_features=5000
        )

        # Create preprocessed symptoms and anatomical context
        self._create_symptom_data()
        self._create_anatomical_context()

        print(f"✓ SymptomExtractor initialized with {self.n_symptoms} symptoms")

    def _simple_tokenize(self, text):
        """Simple tokenization without external dependencies"""
        text = text.lower()
        text = re.sub(r'[^\w\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text).strip()

        tokens = text.split()
        stop_words = {'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'the', 'is', 'are', 'was', 'were'}
        tokens = [token for token in tokens if len(token) > 2 and token not in stop_words]

        return tokens

    def _create_symptom_data(self):
        """Create processed symptoms and variations"""
        self.processed_symptoms = []
        self.symptom_keywords = []
        self.symptom_variations = {}

        for i, symptom in enumerate(self.symptoms_list):
            # Process symptom
            tokens = self._simple_tokenize(symptom)
            processed = ' '.join(tokens)
            self.processed_symptoms.append(processed)
            self.symptom_keywords.append(tokens)

            # Create variations
            variations = [symptom.lower(), processed]

            # Add medical variations
            if 'pain' in symptom.lower():
                variations.extend([
                    symptom.lower().replace('pain', 'ache'),
                    symptom.lower().replace('pain', 'hurt'),
                    symptom.lower().replace('pain', 'discomfort')
                ])

            if 'difficulty' in symptom.lower():
                variations.extend([
                    symptom.lower().replace('difficulty', 'trouble'),
                    symptom.lower().replace('difficulty', 'problem')
                ])

            self.symptom_variations[i] = list(set(variations))

        # Fit TF-IDF on all symptoms
        all_symptom_text = self.processed_symptoms + [var for vars_list in self.symptom_variations.values() for var in vars_list]
        self.tfidf_matrix = self.tfidf_vectorizer.fit_transform(all_symptom_text)

    def _create_anatomical_context(self):
        """Create anatomical context for better specificity"""
        self.anatomical_regions = {
            'chest': ['chest', 'breast', 'thorax', 'heart', 'lung'],
            'head': ['head', 'skull', 'brain', 'face', 'forehead'],
            'throat': ['throat', 'neck', 'pharynx', 'larynx'],
            'abdomen': ['abdomen', 'belly', 'stomach', 'gut'],
            'back': ['back', 'spine', 'vertebra'],
            'leg': ['leg', 'thigh', 'knee', 'calf', 'foot', 'ankle'],
            'arm': ['arm', 'elbow', 'wrist', 'hand', 'finger', 'shoulder'],
            'eye': ['eye', 'vision', 'sight', 'pupil'],
            'ear': ['ear', 'hearing', 'auditory'],
            'urinary': ['urine', 'bladder', 'kidney', 'urinary'],
            'reproductive': ['testicle', 'scrotum', 'vagina', 'reproductive']
        }

    def _check_anatomical_context(self, user_tokens, pain_symptom):
        """Check if pain mention has appropriate anatomical context"""
        pain_symptom_lower = pain_symptom.lower()

        # Map pain symptoms to their anatomical regions
        anatomical_map = {
            'chest': ['sharp chest pain', 'chest tightness'],
            'leg': ['leg pain'],
            'head': ['headache'],
            'back': ['back pain'],
            'arm': ['arm pain', 'elbow pain', 'wrist pain', 'hand pain', 'finger pain'],
            'abdomen': ['sharp abdominal pain', 'abdominal pain', 'lower abdominal pain'],
            'reproductive': ['pain in testicles', 'testicular pain'],
            'hip': ['hip pain'],
            'urinary': ['suprapubic pain']
        }

        # Find which anatomical region this pain belongs to
        target_region = None
        for region, symptoms in anatomical_map.items():
            if any(pain_symptom_lower in s or s in pain_symptom_lower for s in symptoms):
                target_region = region
                break

        if target_region is None:
            return False

        # Check if user mentions words related to this anatomical region
        region_words = self.anatomical_regions.get(target_region, [])
        user_has_region_context = bool(user_tokens.intersection(set(region_words)))

        return user_has_region_context

    def extract_symptoms(self, user_input):
        """
        Extract symptoms from user input text using contextual matching

        Args:
            user_input: String containing user's description

        Returns:
            Binary numpy array where 1 indicates symptom present, 0 indicates absent
        """
        binary_vector = np.zeros(self.n_symptoms, dtype=int)
        user_lower = user_input.lower()
        user_tokens = set(self._simple_tokenize(user_input))

        for i, symptom in enumerate(self.symptoms_list):
            symptom_lower = symptom.lower()

            # For pain symptoms, check anatomical context
            if 'pain' in symptom_lower:
                if self._check_anatomical_context(user_tokens, symptom):
                    binary_vector[i] = 1
                    continue

            # 1. Exact phrase matching (high confidence)
            if symptom_lower in user_lower:
                binary_vector[i] = 1
                continue

            # 2. High fuzzy match
            if fuzz.partial_ratio(user_lower, symptom_lower) >= 85:
                binary_vector[i] = 1
                continue

            # 3. Strong keyword overlap for multi-word symptoms
            symptom_tokens = set(self.symptom_keywords[i])
            if len(symptom_tokens) > 1:
                overlap = len(user_tokens.intersection(symptom_tokens))
                if overlap >= len(symptom_tokens) * 0.8:  # 80% of keywords must match
                    binary_vector[i] = 1
                    continue

            # 4. Single word exact match for single-word symptoms
            elif len(symptom_tokens) == 1:
                if symptom_tokens.issubset(user_tokens):
                    binary_vector[i] = 1

        return binary_vector

    def get_detected_symptoms(self, user_input):
        """
        Get list of detected symptoms with their confidence scores

        Args:
            user_input: String containing user's description

        Returns:
            tuple: (list of detected symptoms with metadata, binary vector)
        """
        binary_vector = self.extract_symptoms(user_input)
        detected_symptoms = []

        for i, is_present in enumerate(binary_vector):
            if is_present:
                detected_symptoms.append({
                    'index': i,
                    'symptom': self.symptoms_list[i],
                    'confidence': self._calculate_confidence(user_input, i)
                })

        return detected_symptoms, binary_vector

    def _calculate_confidence(self, user_input, symptom_index):
        """Calculate confidence score for detected symptom"""
        symptom = self.symptoms_list[symptom_index]

        # Fuzzy matching confidence
        fuzzy_score = fuzz.partial_ratio(user_input.lower(), symptom.lower()) / 100.0

        # Keyword matching confidence
        user_tokens = set(self._simple_tokenize(user_input))
        symptom_tokens = set(self.symptom_keywords[symptom_index])
        keyword_score = len(user_tokens.intersection(symptom_tokens)) / len(symptom_tokens)

        # TF-IDF confidence
        user_processed = ' '.join(self._simple_tokenize(user_input))
        user_tfidf = self.tfidf_vectorizer.transform([user_processed])
        symptom_tfidf = self.tfidf_vectorizer.transform([self.processed_symptoms[symptom_index]])
        tfidf_score = cosine_similarity(user_tfidf, symptom_tfidf)[0][0]

        # Combine scores (weighted average)
        confidence = (0.3 * fuzzy_score + 0.4 * keyword_score + 0.3 * tfidf_score)
        return min(confidence, 1.0)  # Cap at 1.0

In [28]:
symptom_detector = AdvancedSymptomExtractor(symptom_list)


user_input = input('please mention about your symptoms')
binary_vector = symptom_detector.extract_symptoms(user_input)
print(f"Binary vector: {binary_vector}")

detected_symptoms, binary_vector = symptom_detector.get_detected_symptoms(user_input)

print(f"Input: '{user_input}'")
print(f"Detected {len(detected_symptoms)} symptoms:")
for symptom in detected_symptoms:
  print(f"  - {symptom['symptom']} (confidence: {symptom['confidence']:.3f})")

✓ SymptomExtractor initialized with 377 symptoms
please mention about your symptomsdiarrhea stomach ache fever head ache nausea
Binary vector: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0]
Input: 'diarrhea stomach ache fever head ache nausea'
Detected 8 symptoms:
  - sharp abdomi

In [29]:
final_vector=[binary_vector]
final_val=final_model.predict(final_vector)
final_val


array([49.])