<a href="https://colab.research.google.com/github/DevLaukey/-Party_With_me/blob/main/Defribilators.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""
# Medical Device AI System
This notebook implements an AI system for processing medical device queries, specifically for defibrillators.

## Setup Instructions:
1. Run the dependency installation cell
2. Restart the runtime when prompted
3. Upload your defibrillator data file when prompted
4. Run the remaining cells in order


In [1]:
!pip install torch transformers pandas numpy openpyxl




In [2]:
# Import Libraries and Setup
import torch
import transformers
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import numpy as np
from typing import List, Dict, Tuple, Optional
import torch.nn as nn
from google.colab import files
import io
import json

# Clear GPU memory if needed
if torch.cuda.is_available():
    torch.cuda.empty_cache()

In [3]:
# Define AI Model Classes
class MedicalDeviceAI:
    def __init__(self, model_name: str = "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.base_model = AutoModel.from_pretrained(model_name).to(self.device)
        self.setup_model_heads()

    ENTITY_TYPES = {
        "DEVICE_ATTRIBUTES": [
            "MANUFACTURER",
            "BRAND",
            "MODEL",
            "MANUFACTURER_CODE",
            "COUNTRY_ORIGIN"
        ],
        "TECHNICAL_SPECS": [
            "CHARGE_TIME",
            "ENERGY_OUTPUT",
            "POWER_SOURCE",
            "WEIGHT",
            "DIMENSIONS",
            "DISPLAY_TYPE",
            "CONTACT_TYPE"
        ],
        "COMMERCIAL": [
            "PRICE_RANGE",
            "PRICE_EXACT",
            "QUANTITY",
            "UNSPSC_CODE",
            "HCPCS_CODE"
        ],
        "USAGE": [
            "APPLICATION",
            "OPERATION_TYPE",
            "PROMPT_TYPE"
        ]
    }

    INTENTS = [
        "TECHNICAL_INQUIRY",
        "PRICING_INQUIRY",
        "COMPATIBILITY_CHECK",
        "REGULATORY_INQUIRY",
        "COMPARISON_REQUEST",
        "AVAILABILITY_CHECK",
        "SUPPORT_REQUEST",
        "PURCHASE_INTENT",
        "SPECIFICATION_MATCH",
        "URGENT_ORDER"
    ]

    def setup_model_heads(self):
        self.flat_entities = [
            entity for category in self.ENTITY_TYPES.values()
            for entity in category
        ]

        self.entity_extractor = nn.ModuleDict({
            category: nn.Sequential(
                nn.Linear(768, 256),
                nn.ReLU(),
                nn.Dropout(0.1),
                nn.Linear(256, len(entities) * 2)
            ).to(self.device)
            for category, entities in self.ENTITY_TYPES.items()
        })

        self.intent_classifier = nn.Sequential(
            nn.Linear(768, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, len(self.INTENTS))
        ).to(self.device)

        self.spec_matcher = nn.Sequential(
            nn.Linear(768, 384),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(384, len(self.ENTITY_TYPES["TECHNICAL_SPECS"]))
        ).to(self.device)

    def preprocess_text(self, text: str) -> torch.Tensor:
        inputs = self.tokenizer(
            text,
            return_tensors="pt",
            padding=True,
            truncation=True,
            max_length=512
        ).to(self.device)
        return inputs

    def process_device_query(self, text: str) -> Dict:
        inputs = self.preprocess_text(text)

        with torch.no_grad():
            outputs = self.base_model(**inputs)
            sequence_output = outputs.last_hidden_state
            pooled_output = sequence_output[:, 0, :]

            intent_logits = self.intent_classifier(pooled_output)
            intent_probs = torch.softmax(intent_logits, dim=1)
            intent_idx = torch.argmax(intent_probs, dim=1)

            entities = {}
            for category, extractor in self.entity_extractor.items():
                category_outputs = extractor(sequence_output)
                category_outputs = category_outputs.view(
                    -1, len(self.ENTITY_TYPES[category]), 2
                )

                category_entities = {}
                for idx, entity_type in enumerate(self.ENTITY_TYPES[category]):
                    start_pos = category_outputs[0, idx, 0].argmax().item()
                    end_pos = category_outputs[0, idx, 1].argmax().item()

                    if start_pos < end_pos:
                        entity_text = self.tokenizer.decode(
                            inputs.input_ids[0][start_pos:end_pos]
                        )
                        category_entities[entity_type] = entity_text

                entities[category] = category_entities

        return {
            "intent": self.INTENTS[intent_idx.item()],
            "confidence": intent_probs.max().item(),
            "entities": entities,
            "raw_text": text
        }


In [18]:

class DefibrillatorMatcher:
    def __init__(self, product_data: List[Dict]):
        self.product_data = product_data
        self.energy_index = {}  # Initialize this attribute
        self.weight_index = {}  # Initialize this attribute
        self.dimensions_index = {}  # Initialize this attribute
        self.setup_technical_indices()  # Call this in __init__

    def setup_technical_indices(self):
        for product in self.product_data:
            # Energy Output Indexing
            energy_output = product.get("Energy Output")
            if energy_output:
                # Extract only numeric values, handling various formats
                energy_values = []
                for x in str(energy_output).split(","):
                    # Extract numeric values, filter out empty results
                    numeric_val = ''.join(filter(lambda c: c.isdigit() or c == '.', str(x)))
                    if numeric_val:  # Only convert if there's a non-empty string
                        try:
                            energy_values.append(float(numeric_val))
                        except ValueError:
                            continue  # Skip values that can't be converted

                # Only create index if we have valid energy values
                if energy_values:
                    self.energy_index.setdefault(
                        (min(energy_values), max(energy_values)),
                        []
                    ).append(product)

            # Weight Indexing
            weight = product.get("Weight")
            if weight:
                try:
                    # Remove 'lbs.' and convert to float
                    weight_val = float(str(weight).replace("lbs.", "").strip())
                    weight_range = (weight_val - 1, weight_val + 1)
                    self.weight_index.setdefault(weight_range, []).append(product)
                except ValueError:
                    continue

            # Dimensions Indexing
            dimensions = product.get("Dimensions")
            if dimensions:
                try:
                    # Extract numeric values from dimensions
                    dim_values = [
                        float(x) for x in
                        ''.join(filter(lambda c: c.isdigit() or c == '.' or c == ' ', str(dimensions))).split()
                    ]
                    if dim_values:
                        self.dimensions_index.setdefault(
                            (min(dim_values), max(dim_values)),
                            []
                        ).append(product)
                except ValueError:
                    continue

    def match_products(self, query_results: Dict) -> List[Dict]:
        matches = []
        tech_specs = query_results["entities"].get("TECHNICAL_SPECS", {})

        candidates = self.product_data.copy()

        if "ENERGY_OUTPUT" in tech_specs:
            energy_req = tech_specs["ENERGY_OUTPUT"]
            candidates = [
                product for product in candidates
                if self._matches_energy_requirement(product, energy_req)
            ]

        if "WEIGHT" in tech_specs:
            weight_req = tech_specs["WEIGHT"]
            candidates = [
                product for product in candidates
                if self._matches_weight_requirement(product, weight_req)
            ]

        for product in candidates:
            score = self._calculate_match_score(product, query_results)
            if score > 0.5:
                matches.append({
                    "product": product,
                    "score": score,
                    "matching_features": self._get_matching_features(
                        product, query_results
                    )
                })

        matches.sort(key=lambda x: x["score"], reverse=True)
        return matches[:5]

    def _matches_energy_requirement(self, product: Dict, requirement: str) -> bool:
      if not product.get("Energy Output"):
          return False

      try:
          # Extract numeric values from both product and requirement
          prod_energy = [
              float(''.join(filter(lambda c: c.isdigit() or c == '.', str(x))))
              for x in str(product["Energy Output"]).split(",")
          ]

          # Extract numeric value from requirement
          req_energy = float(''.join(filter(lambda c: c.isdigit() or c == '.', requirement)))

          return min(prod_energy) <= req_energy <= max(prod_energy)
      except (ValueError, IndexError):
          return False

    def _calculate_match_score(self, product: Dict, query: Dict) -> float:
        score = 0.0
        weights = {
            "TECHNICAL_SPECS": 0.4,
            "COMMERCIAL": 0.3,
            "DEVICE_ATTRIBUTES": 0.2,
            "USAGE": 0.1
        }

        for category, weight in weights.items():
            if category in query["entities"]:
                category_score = self._calculate_category_score(
                    product,
                    query["entities"][category]
                )
                score += category_score * weight

        return score

    def _calculate_category_score(self, product: Dict, requirements: Dict) -> float:
        if not requirements:
            return 1.0

        matches = 0
        total = len(requirements)

        for attr, value in requirements.items():
            if attr in product and self._values_match(product[attr], value):
                matches += 1

        return matches / total if total > 0 else 1.0

    def _values_match(self, prod_value: str, req_value: str) -> bool:
        if not prod_value or not req_value:
            return False

        prod_value = str(prod_value).lower()
        req_value = str(req_value).lower()

        if any(x in req_value for x in ["to", "-", "between"]):
            return self._check_range_match(prod_value, req_value)

        return prod_value == req_value or req_value in prod_value

    def _check_range_match(self, prod_value: str, req_value: str) -> bool:
        try:
            prod_num = float(''.join(
                c for c in prod_value if c.isdigit() or c == '.'
            ))

            if "to" in req_value:
                min_val, max_val = map(
                    float,
                    req_value.split("to")
                )
            elif "-" in req_value:
                min_val, max_val = map(
                    float,
                    req_value.split("-")
                )
            else:
                parts = req_value.split()
                min_val = float(parts[1])
                max_val = float(parts[3])

            return min_val <= prod_num <= max_val

        except (ValueError, IndexError):
            return False

    def _get_matching_features(self, product: Dict, query: Dict) -> Dict[str, List[str]]:
        matches = {}

        for category, requirements in query["entities"].items():
            category_matches = []
            for attr, value in requirements.items():
                if attr in product and self._values_match(product[attr], value):
                    category_matches.append(attr)
            if category_matches:
                matches[category] = category_matches

        return matches

def inference_pipeline(model: MedicalDeviceAI, matcher: DefibrillatorMatcher, query: str) -> Dict:
    query_results = model.process_device_query(query)

    if query_results["intent"] in [
        "TECHNICAL_INQUIRY",
        "PRICING_INQUIRY",
        "SPECIFICATION_MATCH",
        "PURCHASE_INTENT"
    ]:
        product_matches = matcher.match_products(query_results)
        query_results["product_matches"] = product_matches

    return query_results



"""
## Upload Defibrillator Data
Please upload your DefribilatorsOnly.xlsx file when prompted
"""


In [14]:

# Cell 6: Load Data
print("Please upload your DefribilatorsOnly.xlsx file")
uploaded = files.upload()

def load_defibrillator_data():
    filename = list(uploaded.keys())[0]
    df = pd.read_excel(io.BytesIO(uploaded[filename]))
    print(f"Loaded {len(df)} defibrillator records")
    print("\nColumns found:", df.columns.tolist())
    return df.to_dict('records')

products = load_defibrillator_data()


Please upload your DefribilatorsOnly.xlsx file


Saving Defribilators-Only.xlsx to Defribilators-Only (2).xlsx
Loaded 82 defibrillator records

Columns found: ['Product Title', 'Price', 'Manufacturer_code', 'Model', 'Brand', 'Manufacturer_code.1', 'Brand.1', 'Manufacturer', 'Country of Origin', 'Application', 'Charge Time', 'Contact Type', 'Dimensions', 'Display Type', 'Energy Output', 'Operation Type', 'Power Source', 'Prompt Type', 'UNSPSC Code', 'Weight', 'images', 'features', 'HCPCS', 'FSA Eligible - Sell UOM', 'product_options_display_types', 'product_options_operation_types', 'product_options_weights', 'product_options_dimensions', 'product_options_contact_types']


In [19]:

# Cell 7: Initialize Model
model = MedicalDeviceAI()
matcher = DefibrillatorMatcher(products)


In [20]:

# Cell 8: Query Function
def query_model(user_query: str):
    """Process a user query and display results"""
    print(f"Processing query: {user_query}\n")

    results = inference_pipeline(model, matcher, user_query)

    print("Query Analysis:")
    print(f"Intent: {results['intent']}")
    print(f"Confidence: {results['confidence']:.2f}")

    print("\nDetected Specifications:")
    for category, entities in results['entities'].items():
        if entities:
            print(f"\n{category}:")
            for entity_type, value in entities.items():
                print(f"  - {entity_type}: {value}")

    if 'product_matches' in results and results['product_matches']:
        print("\nMatching Products:")
        for idx, match in enumerate(results['product_matches'], 1):
            print(f"\n{idx}. {match['product'].get('Product Title', 'Unknown Product')}")
            print(f"   Match Score: {match['score']:.2f}")
            print("   Matching Features:")
            for category, features in match['matching_features'].items():
                print(f"   - {category}: {', '.join(features)}")
            print(f"   Price: ${match['product'].get('Price', 'N/A')}")
    else:
        print("\nNo product matches found for this query.")


In [21]:

# Cell 9: Test Queries
# Example usage:
query_model("I need a defibrillator with LCD display and fast charge time")
query_model("Show me Zoll defibrillators under $30000")
query_model("What's the lightest automatic defibrillator available?")

Processing query: I need a defibrillator with LCD display and fast charge time

Query Analysis:
Intent: SPECIFICATION_MATCH
Confidence: 0.15

Detected Specifications:

Matching Products:

1. Defibrillator Auotmatic ZOLL® X Series 
   Match Score: 1.00
   Matching Features:
   Price: $27850.0

2. Defibrillator Unit Automatic Zoll® X Series Electrode Pads Contact
   Match Score: 1.00
   Matching Features:
   Price: $27850.0

3. Defibrillator Unit Manual Operation Zoll® R Series with, Nellcor, OneStep Pacing, NIBP
   Match Score: 1.00
   Matching Features:
   Price: $15750.0

4. Defibrillator with OneStep Pacing, SpO2 Manual Operation Zoll® R Series
   Match Score: 1.00
   Matching Features:
   Price: $16100.0

5. Refurbished Defibrillator Unit Semi-Automatic Zoll® X Series
   Match Score: 1.00
   Matching Features:
   Price: $19500.0
Processing query: Show me Zoll defibrillators under $30000

Query Analysis:
Intent: SPECIFICATION_MATCH
Confidence: 0.18

Detected Specifications:

Matching