In [None]:
#Intent classification using a machine learning model

In [1]:
#1. Setup and Dependencies


import numpy as np
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
import spacy
import json
from typing import Dict, List, Tuple, Optional
import random

# Load English language model for spaCy
nlp = spacy.load("en_core_web_sm")

In [3]:
#2. Data Preparation
# Sample training data for intent recognition
training_data = [
    {"text": "I want to book a flight to New York", "intent": "flight_booking", "entities": {"destination": "New York"}},
    {"text": "Find me a hotel in Paris for next week", "intent": "hotel_booking", "entities": {"location": "Paris", "date": "next week"}},
    {"text": "Book a flight from London to Tokyo on June 15th", "intent": "flight_booking", "entities": {"origin": "London", "destination": "Tokyo", "date": "June 15th"}},
    {"text": "I need a hotel room in Berlin from May 20 to May 25", "intent": "hotel_booking", "entities": {"location": "Berlin", "check_in": "May 20", "check_out": "May 25"}},
    {"text": "Show me flights to Dubai under $500", "intent": "flight_booking", "entities": {"destination": "Dubai", "price": "under $500"}},
    {"text": "Find a 5-star hotel in Rome with a pool", "intent": "hotel_booking", "entities": {"location": "Rome", "stars": "5", "amenities": "pool"}},
    {"text": "What's the cheapest flight to Sydney next month?", "intent": "flight_booking", "entities": {"destination": "Sydney", "date": "next month", "price": "cheapest"}},
    {"text": "I'd like to reserve a business class flight to Singapore", "intent": "flight_booking", "entities": {"destination": "Singapore", "class": "business"}},
    {"text": "Book me a hotel near the beach in Miami", "intent": "hotel_booking", "entities": {"location": "Miami", "near": "beach"}},
    {"text": "Find flights from Chicago to Los Angeles tomorrow", "intent": "flight_booking", "entities": {"origin": "Chicago", "destination": "Los Angeles", "date": "tomorrow"}},
    {"text": "I need a hotel with free wifi in Tokyo", "intent": "hotel_booking", "entities": {"location": "Tokyo", "amenities": "free wifi"}},
    {"text": "What are the flight options to London this weekend?", "intent": "flight_booking", "entities": {"destination": "London", "date": "this weekend"}},
    {"text": "Show me pet-friendly hotels in San Francisco", "intent": "hotel_booking", "entities": {"location": "San Francisco", "amenities": "pet-friendly"}},
    {"text": "Book a round-trip flight to Paris in July", "intent": "flight_booking", "entities": {"destination": "Paris", "date": "July", "trip_type": "round-trip"}},
    {"text": "I want to cancel my hotel reservation", "intent": "hotel_cancellation", "entities": {}},
    {"text": "Cancel my flight to Berlin", "intent": "flight_cancellation", "entities": {"destination": "Berlin"}},
    {"text": "What's the status of my flight?", "intent": "flight_status", "entities": {}},
    {"text": "Modify my hotel booking", "intent": "hotel_modification", "entities": {}},
    {"text": "Change my flight to an earlier date", "intent": "flight_modification", "entities": {"date": "earlier"}},
    {"text": "What amenities does the hotel offer?", "intent": "hotel_inquiry", "entities": {}},
    {"text": "What's the baggage allowance for my flight?", "intent": "flight_inquiry", "entities": {}}
]

# Convert to DataFrame for easier processing
df = pd.DataFrame(training_data)
df.head()

Unnamed: 0,text,intent,entities
0,I want to book a flight to New York,flight_booking,{'destination': 'New York'}
1,Find me a hotel in Paris for next week,hotel_booking,"{'location': 'Paris', 'date': 'next week'}"
2,Book a flight from London to Tokyo on June 15th,flight_booking,"{'origin': 'London', 'destination': 'Tokyo', '..."
3,I need a hotel room in Berlin from May 20 to M...,hotel_booking,"{'location': 'Berlin', 'check_in': 'May 20', '..."
4,Show me flights to Dubai under $500,flight_booking,"{'destination': 'Dubai', 'price': 'under $500'}"


In [4]:
#3. Intent Classification Model

class IntentClassifier:
    def __init__(self):
        self.model = Pipeline([
            ('tfidf', TfidfVectorizer()),
            ('clf', RandomForestClassifier(n_estimators=100))
        ])
        self.intent_labels = None
    
    def train(self, texts: List[str], intents: List[str]):
        """Train the intent classification model"""
        self.intent_labels = list(set(intents))
        self.model.fit(texts, intents)
    
    def predict(self, text: str) -> str:
        """Predict intent for a given text"""
        if not self.model:
            raise ValueError("Model not trained yet")
        return self.model.predict([text])[0]
    
    def evaluate(self, X_test, y_test):
        """Evaluate model performance"""
        y_pred = self.model.predict(X_test)
        print(classification_report(y_test, y_pred))

# Train the intent classifier
intent_classifier = IntentClassifier()
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['intent'], test_size=0.2, random_state=42
)
intent_classifier.train(X_train, y_train)

# Evaluate the model
print("Intent Classification Evaluation:")
intent_classifier.evaluate(X_test, y_test)

Intent Classification Evaluation:
                     precision    recall  f1-score   support

     flight_booking       0.50      1.00      0.67         1
flight_cancellation       0.00      0.00      0.00         1
      hotel_booking       0.67      1.00      0.80         2
 hotel_modification       0.00      0.00      0.00         1

           accuracy                           0.60         5
          macro avg       0.29      0.50      0.37         5
       weighted avg       0.37      0.60      0.45         5



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [5]:
#4. Entity Recognition System

class EntityExtractor:
    def __init__(self):
        self.flight_entities = ['origin', 'destination', 'date', 'time', 'airline', 
                              'flight_number', 'price', 'class', 'passengers', 'trip_type']
        self.hotel_entities = ['location', 'check_in', 'check_out', 'guests', 
                              'rooms', 'price', 'stars', 'amenities', 'hotel_name']
        self.common_entities = ['date', 'time', 'price']
    
    def extract_entities(self, text: str, intent: str) -> Dict[str, str]:
        """Extract entities based on the detected intent"""
        doc = nlp(text)
        entities = {}
        
        if intent == 'flight_booking':
            entities.update(self._extract_flight_entities(doc))
        elif intent == 'hotel_booking':
            entities.update(self._extract_hotel_entities(doc))
        elif intent in ['flight_cancellation', 'flight_modification', 'flight_inquiry']:
            entities.update(self._extract_flight_entities(doc))
        elif intent in ['hotel_cancellation', 'hotel_modification', 'hotel_inquiry']:
            entities.update(self._extract_hotel_entities(doc))
        
        # Extract common entities
        entities.update(self._extract_dates(doc))
        entities.update(self._extract_prices(doc))
        
        return entities
    
    def _extract_flight_entities(self, doc) -> Dict[str, str]:
        """Extract flight-specific entities"""
        entities = {}
        
        # Extract origin and destination (cities after "from" and "to")
        for i, token in enumerate(doc):
            if token.text.lower() == 'from' and i+1 < len(doc):
                entities['origin'] = doc[i+1].text
            elif token.text.lower() == 'to' and i+1 < len(doc):
                entities['destination'] = doc[i+1].text
        
        # Extract flight class
        for token in doc:
            if token.text.lower() in ['economy', 'business', 'first', 'premium']:
                entities['class'] = token.text.lower()
        
        return entities
    
    def _extract_hotel_entities(self, doc) -> Dict[str, str]:
        """Extract hotel-specific entities"""
        entities = {}
        
        # Extract location (city after "in" or "at")
        for i, token in enumerate(doc):
            if token.text.lower() in ['in', 'at', 'near'] and i+1 < len(doc):
                entities['location'] = doc[i+1].text
        
        # Extract star rating
        for token in doc:
            if token.like_num and 'star' in token.text.lower():
                entities['stars'] = token.text
        
        # Extract amenities
        amenities = []
        for token in doc:
            if token.text.lower() in ['pool', 'wifi', 'gym', 'spa', 'breakfast', 'parking']:
                amenities.append(token.text.lower())
        if amenities:
            entities['amenities'] = ', '.join(amenities)
        
        return entities
    
    def _extract_dates(self, doc) -> Dict[str, str]:
        """Extract date-related entities"""
        entities = {}
        date_phrases = ['today', 'tomorrow', 'next week', 'next month', 'this weekend']
        
        # Check for specific date phrases
        for chunk in doc.noun_chunks:
            if chunk.text.lower() in date_phrases:
                entities['date'] = chunk.text
                return entities
        
        # Use spaCy's entity recognition for dates
        for ent in doc.ents:
            if ent.label_ == 'DATE':
                entities['date'] = ent.text
            elif ent.label_ == 'TIME':
                entities['time'] = ent.text
        
        return entities
    
    def _extract_prices(self, doc) -> Dict[str, str]:
        """Extract price-related entities"""
        entities = {}
        
        # Look for price patterns
        for token in doc:
            if token.text.startswith('$'):
                entities['price'] = token.text
            elif token.text.lower() in ['cheap', 'cheapest', 'expensive', 'affordable']:
                entities['price'] = token.text.lower()
        
        # Look for price ranges
        for i, token in enumerate(doc):
            if token.text.lower() == 'under' and i+1 < len(doc) and doc[i+1].text.startswith('$'):
                entities['price'] = f"under {doc[i+1].text}"
            elif token.text.lower() == 'over' and i+1 < len(doc) and doc[i+1].text.startswith('$'):
                entities['price'] = f"over {doc[i+1].text}"
        
        return entities

entity_extractor = EntityExtractor()