In [12]:
import json
import pandas as pd
import random
import os
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
import joblib
from rapidfuzz import process, fuzz


In [13]:
def clean_text(text):
    return text.lower().strip()

def extract_package_type(user_input):
    types = ['beach', 'adventure', 'honeymoon', 'family', 'budget']
    for t in types:
        if t in user_input.lower():
            return t
    return None


In [14]:
MODEL_FILE = "intent_clf.pkl"

# Load intents.json
with open("intents.json", "r", encoding="utf-8") as f:
    intents_data = json.load(f)

patterns, labels = [], []
for intent in intents_data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        labels.append(intent['intent'])

if os.path.exists(MODEL_FILE):
    intent_clf = joblib.load(MODEL_FILE)
    print("Loaded intent classifier from disk.")
else:
    X_train, X_test, y_train, y_test = train_test_split(patterns, labels, test_size=0.2, random_state=42)
    intent_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LogisticRegression(max_iter=500))
    ])
    intent_clf.fit(X_train, y_train)
    joblib.dump(intent_clf, MODEL_FILE)
    print(f"Trained intent classifier and saved to {MODEL_FILE}.")


Trained intent classifier and saved to intent_clf.pkl.


In [15]:
packages = pd.read_csv("packages.csv")
packages['type'] = packages['type'].str.lower()
packages['destination'] = packages['destination'].str.lower()

faqs = pd.read_csv("faq.csv")
faqs['question'] = faqs['question'].str.lower()
faqs['answer'] = faqs['answer']


In [16]:
def find_best_faq(user_input):
    question_list = faqs['question'].tolist()
    best_match, score, idx = process.extractOne(user_input, question_list, scorer=fuzz.token_sort_ratio)
    return faqs.iloc[idx]['answer']

def find_best_packages(user_input, top_k=3):
    package_type = extract_package_type(user_input)
    if package_type:
        filtered = packages[packages['type'].str.contains(package_type, case=False)]
    else:
        filtered = packages.copy()
    
    if filtered.empty:
        return []
    
    package_list = (filtered['type'] + ' ' + filtered['destination'] + ' ' + filtered['description']).tolist()
    matches = process.extract(user_input, package_list, scorer=fuzz.token_sort_ratio, limit=top_k)
    
    recommended = []
    for match_text, score, idx in matches:
        recommended.append(filtered.iloc[idx])
    return recommended


In [17]:
print("Data and model ready. You can now use intent_clf in main.py.")


Data and model ready. You can now use intent_clf in main.py.
