In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from nltk.corpus import stopwords
import nltk

# הורדת רשימת ה-Stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# **Dataset: שאלות נפוצות ושיוך כוונות**
data = {
    "question": [
        # Baggage Policy
        "What is the baggage allowance?",
        "How much luggage can I bring?",
        "What are the baggage restrictions?",
        "Can I bring extra luggage?",
        "What is the maximum weight for checked luggage?",
        "Are there size restrictions for hand luggage?",
        "Can I take a carry-on bag?",
        "How much does extra baggage cost?",
        "Can I bring sports equipment?",
        "Are musical instruments allowed on board?",
        "Is there a fee for overweight luggage?",
        "Can I bring liquids in my carry-on?",
        "What is the policy for fragile items?",
        "Can I carry electronics in my hand luggage?",
        "Are there any prohibited items in baggage?",
        "How do I label my luggage?",
        "What happens if my luggage is lost?",
        "What should I do if my luggage is damaged?",
        "Can I bring my pet as part of my luggage?",
        "How many bags can I check in for free?",

        # Cancellation Policy
        "Can I cancel my flight?",
        "What is the cancellation policy?",
        "How much does it cost to cancel a flight?",
        "Can I get a refund if I cancel my flight?",
        "What is the deadline for cancellations?",
        "Are cancellations allowed for all tickets?",
        "How long does it take to get a refund?",
        "Can I cancel only part of my booking?",
        "What happens if I cancel my flight last minute?",
        "Do I get a full refund if I cancel within 24 hours?",
        "Can I cancel my flight online?",
        "Is there a fee for flight cancellations?",
        "What is the refund policy for cancellations?",
        "Can I cancel an international flight?",
        "What documents do I need to cancel a booking?",
        "Can I transfer my ticket to someone else instead of canceling?",
        "Is there a difference between economy and business class cancellations?",
        "Can I cancel a group booking?",
        "What happens if the airline cancels my flight?",
        "Can I cancel my flight due to medical reasons?",

        # Upgrade Cost
        "How much does it cost to upgrade?",
        "What is the upgrade fee for business class?",
        "Can I upgrade my seat after booking?",
        "Is it cheaper to upgrade at the airport?",
        "What are the benefits of upgrading?",
        "Can I upgrade using frequent flyer points?",
        "How do I request an upgrade?",
        "What is the upgrade policy?",
        "Can I upgrade only one leg of my trip?",
        "Are upgrades available for all flights?",
        "How do I check if upgrades are available?",
        "What is the cost difference between economy and business class?",
        "Can I upgrade with cash at check-in?",
        "Is there a bidding system for upgrades?",
        "Are there discounts for upgrades?",
        "Can I upgrade my flight online?",
        "How long before the flight can I request an upgrade?",
        "Is there a waiting list for upgrades?",
        "Are upgrades guaranteed after payment?",
        "What happens if I upgrade and the flight is canceled?"
    ],
    "intent": [
        # 20 Baggage Policy Intents
        *["baggage_policy"] * 20,
        # 20 Cancellation Policy Intents
        *["cancellation_policy"] * 20,
        # 20 Upgrade Cost Intents
        *["upgrade_cost"] * 20
    ]
}

# **Mapping Intent to Predefined Responses**
responses = {
    "baggage_policy": "Each passenger can bring 23kg of checked luggage.",
    "cancellation_policy": "You can cancel your flight up to 24 hours before departure.",
    "upgrade_cost": "The upgrade cost depends on the destination. Contact support for details."
}

# **Convert to DataFrame**
df = pd.DataFrame(data)

# **Split the Data**
X_train, X_test, y_train, y_test = train_test_split(
    df["question"], df["intent"], test_size=0.3, random_state=42
)

# **Vectorize Questions with Stopwords and N-grams**
vectorizer = CountVectorizer(
    ngram_range=(1, 2),  # שילוב של Unigrams ו-Bigrams
    stop_words=stop_words  # שימוש ב-Stopwords
)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# **Train the Model**
classifier = LogisticRegression(max_iter=200)
classifier.fit(X_train_vec, y_train)

# **Evaluate the Model**
y_pred = classifier.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# **Test the Model with New Questions**
new_questions = [
    "How much luggage can I bring?",
    "Can I cancel my flight online?",
    "How much does it cost to upgrade to business class?"
]
new_questions_vec = vectorizer.transform(new_questions)
predicted_intents = classifier.predict(new_questions_vec)

# **Provide Responses for New Questions**
for question, intent in zip(new_questions, predicted_intents):
    response = responses.get(intent, "Sorry, I don't have an answer for that.")
    print(f"Question: {question}")
    print(f"Intent: {intent}")
    print(f"Response: {response}")
    print("-" * 50)
