# 6. Model Deployment & API

This notebook implements the model deployment and API for symptom-based disease prediction.

## Objectives
1. Load trained model and components
2. Implement prediction pipeline
3. Create FastAPI application
4. Test API endpoints
5. Generate API documentation

In [None]:
import numpy as np
import pandas as pd
import json
import joblib
from pathlib import Path
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import uvicorn
from sentence_transformers import SentenceTransformer
from typing import List, Dict
import spacy
from symspellpy import SymSpell, Verbosity
from rapidfuzz import fuzz
import warnings
warnings.filterwarnings('ignore')

## 1. Load Model & Components

In [None]:
def load_components():
    """Load all required components for prediction."""
    # Load model and metadata
    model = joblib.load('../models/best_model.joblib')
    
    with open('../models/model_metadata.json', 'r') as f:
        model_metadata = json.load(f)
    
    # Load NLP components
    with open('../data/processed/symptom_vocab.json', 'r') as f:
        symptom_vocab = json.load(f)
        
    symptom_embeddings = np.load('../data/processed/symptom_embeddings.npy')
    
    # Load NLP models
    nlp = spacy.load('en_core_web_md')
    model_st = SentenceTransformer('all-MiniLM-L6-v2')
    
    return {
        'model': model,
        'metadata': model_metadata,
        'symptom_vocab': symptom_vocab,
        'symptom_embeddings': symptom_embeddings,
        'nlp': nlp,
        'sentence_transformer': model_st
    }

# Load all components
components = load_components()
print("Loaded components:")
for name in components:
    print(f"- {name}")

## 2. Implement Prediction Pipeline

In [None]:
class SymptomPredictor:
    def __init__(self, components):
        """Initialize predictor with components."""
        self.model = components['model']
        self.metadata = components['metadata']
        self.symptom_vocab = components['symptom_vocab']
        self.symptom_embeddings = components['symptom_embeddings']
        self.nlp = components['nlp']
        self.sentence_transformer = components['sentence_transformer']
        
        # Initialize spell checker
        self.spell = SymSpell(max_dictionary_edit_distance=2)
        for symptom in self.symptom_vocab:
            clean_name = self.symptom_vocab[symptom]['clean_name']
            for word in clean_name.split():
                self.spell.create_dictionary_entry(word, 1)
    
    def preprocess_text(self, text):
        """Preprocess input text."""
        # Lowercase
        text = text.lower()
        
        # Process with spaCy
        doc = self.nlp(text)
        
        # Tokenize and lemmatize
        tokens = [token.lemma_ for token in doc 
                 if not token.is_stop and not token.is_punct]
        
        return ' '.join(tokens)
    
    def correct_spelling(self, text):
        """Correct spelling in text."""
        words = text.split()
        corrected = []
        
        for word in words:
            suggestions = self.spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
            if suggestions:
                corrected.append(suggestions[0].term)
            else:
                corrected.append(word)
        
        return ' '.join(corrected)
    
    def extract_symptoms(self, text, threshold_fuzzy=85, threshold_semantic=0.72):
        """Extract symptoms from text."""
        matched_symptoms = []
        text_embedding = self.sentence_transformer.encode([text])[0]
        
        # Create binary vector
        binary_vector = np.zeros(len(self.symptom_vocab))
        
        for idx, (symptom, data) in enumerate(self.symptom_vocab.items()):
            clean_name = data['clean_name']
            
            # Try fuzzy matching
            fuzzy_score = fuzz.partial_ratio(clean_name, text)
            if fuzzy_score >= threshold_fuzzy:
                matched_symptoms.append({
                    'symptom': symptom,
                    'match_type': 'fuzzy',
                    'score': fuzzy_score
                })
                binary_vector[idx] = 1
                continue
            
            # Try semantic matching
            symptom_emb = np.array(data['embedding'])
            semantic_score = np.dot(text_embedding, symptom_emb)
            if semantic_score >= threshold_semantic:
                matched_symptoms.append({
                    'symptom': symptom,
                    'match_type': 'semantic',
                    'score': float(semantic_score)
                })
                binary_vector[idx] = 1
        
        return matched_symptoms, binary_vector
    
    def prepare_features(self, binary_vector):
        """Prepare feature vector for prediction."""
        # Add derived features as in training
        features = list(binary_vector)
        
        # Symptom count
        features.append(binary_vector.sum())
        
        # Weighted symptoms
        weights = np.array([self.symptom_vocab[s]['frequency'] 
                           for s in self.symptom_vocab])
        weights = weights / weights.sum()
        features.append(float(np.dot(binary_vector, weights)))
        
        # Embedding features
        active_embeddings = self.symptom_embeddings[binary_vector.astype(bool)]
        if len(active_embeddings) > 0:
            agg_embedding = active_embeddings.mean(axis=0)
        else:
            agg_embedding = np.zeros(self.symptom_embeddings.shape[1])
        
        features.extend(agg_embedding.tolist())
        
        return np.array(features).reshape(1, -1)
    
    def predict(self, text):
        """Make prediction from raw text input."""
        # Preprocess
        processed = self.preprocess_text(text)
        corrected = self.correct_spelling(processed)
        
        # Extract symptoms
        matched_symptoms, binary_vector = self.extract_symptoms(corrected)
        
        if not matched_symptoms:
            raise ValueError("No symptoms detected in input text")
        
        # Prepare features
        features = self.prepare_features(binary_vector)
        
        # Make prediction
        disease = self.model.predict(features)[0]
        probabilities = self.model.predict_proba(features)[0]
        
        # Get top 3 predictions
        top_indices = np.argsort(probabilities)[-3:][::-1]
        predictions = [
            {
                'disease': self.model.classes_[idx],
                'probability': float(probabilities[idx])
            }
            for idx in top_indices
        ]
        
        return {
            'detected_symptoms': matched_symptoms,
            'predictions': predictions
        }

# Initialize predictor
predictor = SymptomPredictor(components)

## 3. Create FastAPI Application

In [None]:
# Define request/response models
class PredictionRequest(BaseModel):
    text: str
    
class SymptomMatch(BaseModel):
    symptom: str
    match_type: str
    score: float
    
class Prediction(BaseModel):
    disease: str
    probability: float
    
class PredictionResponse(BaseModel):
    detected_symptoms: List[SymptomMatch]
    predictions: List[Prediction]

# Create FastAPI app
app = FastAPI(
    title="Disease Prediction API",
    description="API for predicting diseases from symptom descriptions",
    version="1.0.0"
)

@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    """Predict disease from symptom description."""
    try:
        result = predictor.predict(request.text)
        return result
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=500, detail="Internal server error")

## 4. Test API

In [None]:
def test_api():
    """Test API with sample inputs."""
    test_cases = [
        "I have a severe headache and high fever",
        "experiencing chest pain and difficulty breathing",
        "feeling dizzy and nautious with blurred vision",  # Misspelled
        ""  # Empty input
    ]
    
    print("Testing API endpoints:\n")
    for text in test_cases:
        print(f"Input: {text}")
        try:
            result = predictor.predict(text)
            print("\nDetected Symptoms:")
            for symptom in result['detected_symptoms']:
                print(f"- {symptom['symptom']} ({symptom['match_type']}, {symptom['score']:.2f})")
            
            print("\nPredictions:")
            for pred in result['predictions']:
                print(f"- {pred['disease']}: {pred['probability']:.2%}")
        except Exception as e:
            print(f"Error: {str(e)}")
        print("\n" + "-"*50 + "\n")

test_api()

## 5. Start API Server

In [None]:
if __name__ == "__main__":
    # Start API server
    uvicorn.run(app, host="0.0.0.0", port=8000)