## Step 1: Install Required Libraries

In [None]:
!pip install numpy pandas nltk tensorflow scikit-learn matplotlib seaborn

## Step 2: Import Libraries

In [None]:
import numpy as np
import pandas as pd
import re
import json
import pickle
import zipfile
import os

# NLP Libraries
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Deep Learning
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.utils import to_categorical

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

print("‚úÖ All libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")

## Step 3: Data Collection and Preparation

Creating a comprehensive hotel reviews dataset with various sentiments and categories.

In [None]:
# Hotel Reviews Dataset
# Categories: Cleanliness, Food, Staff, Amenities, Overall Experience

reviews_data = [
    # POSITIVE REVIEWS - Cleanliness
    {"review": "The room was spotlessly clean and fresh. Housekeeping did an amazing job!", "sentiment": "positive", "category": "cleanliness"},
    {"review": "Immaculate bathroom and bedroom. The sheets smelled like fresh laundry.", "sentiment": "positive", "category": "cleanliness"},
    {"review": "I was impressed by how clean the entire hotel was. Not a speck of dust anywhere.", "sentiment": "positive", "category": "cleanliness"},
    {"review": "The cleaning staff maintains excellent hygiene standards throughout the property.", "sentiment": "positive", "category": "cleanliness"},
    {"review": "Room was pristine and well-maintained. Very satisfied with cleanliness.", "sentiment": "positive", "category": "cleanliness"},
    {"review": "Housekeeping was thorough and left the room sparkling clean every day.", "sentiment": "positive", "category": "cleanliness"},
    {"review": "The hotel maintains impeccable cleanliness standards. Truly impressive!", "sentiment": "positive", "category": "cleanliness"},
    {"review": "Every corner of my room was spotless. Great attention to hygiene.", "sentiment": "positive", "category": "cleanliness"},
    
    # POSITIVE REVIEWS - Food
    {"review": "The breakfast buffet was extraordinary! So many delicious options to choose from.", "sentiment": "positive", "category": "food"},
    {"review": "Restaurant food was absolutely delicious. The chef is incredibly talented.", "sentiment": "positive", "category": "food"},
    {"review": "Best hotel dining experience ever! The menu variety was impressive.", "sentiment": "positive", "category": "food"},
    {"review": "Room service was prompt and the food quality exceeded expectations.", "sentiment": "positive", "category": "food"},
    {"review": "Amazing culinary experience at the hotel restaurant. Highly recommend the local cuisine.", "sentiment": "positive", "category": "food"},
    {"review": "The food quality was outstanding. Fresh ingredients and excellent presentation.", "sentiment": "positive", "category": "food"},
    {"review": "Breakfast was fantastic with fresh fruits, pastries, and made-to-order eggs.", "sentiment": "positive", "category": "food"},
    {"review": "The restaurant offers amazing cuisine. Every meal was a delight!", "sentiment": "positive", "category": "food"},
    
    # POSITIVE REVIEWS - Staff
    {"review": "Staff was incredibly friendly and went above and beyond to help us.", "sentiment": "positive", "category": "staff"},
    {"review": "The reception team was welcoming and efficient. Check-in was a breeze.", "sentiment": "positive", "category": "staff"},
    {"review": "Concierge service was exceptional. They arranged everything perfectly.", "sentiment": "positive", "category": "staff"},
    {"review": "All staff members were polite, professional, and always smiling.", "sentiment": "positive", "category": "staff"},
    {"review": "The hotel employees made our stay memorable with their hospitality.", "sentiment": "positive", "category": "staff"},
    {"review": "Front desk staff was extremely helpful and accommodating.", "sentiment": "positive", "category": "staff"},
    {"review": "The bellboy was courteous and helpful. Staff training is excellent here.", "sentiment": "positive", "category": "staff"},
    {"review": "Service was top-notch. The staff anticipated our needs before we asked.", "sentiment": "positive", "category": "staff"},
    
    # POSITIVE REVIEWS - Amenities
    {"review": "The swimming pool was beautiful and the gym had all modern equipment.", "sentiment": "positive", "category": "amenities"},
    {"review": "Loved the spa facilities! The massage was absolutely relaxing.", "sentiment": "positive", "category": "amenities"},
    {"review": "Free WiFi was fast and reliable throughout my stay.", "sentiment": "positive", "category": "amenities"},
    {"review": "The rooftop bar offered stunning views. Great amenities overall.", "sentiment": "positive", "category": "amenities"},
    {"review": "Room had all modern amenities including smart TV and coffee maker.", "sentiment": "positive", "category": "amenities"},
    {"review": "The fitness center was well-equipped and open 24 hours.", "sentiment": "positive", "category": "amenities"},
    {"review": "Excellent pool area with comfortable loungers and great service.", "sentiment": "positive", "category": "amenities"},
    {"review": "The hotel gym was fantastic with all the latest equipment.", "sentiment": "positive", "category": "amenities"},
    
    # NEGATIVE REVIEWS - Cleanliness
    {"review": "The room was dirty and had stains on the carpet. Very disappointing.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "Found hair in the bathroom and dust under the bed. Unacceptable!", "sentiment": "negative", "category": "cleanliness"},
    {"review": "Housekeeping never came to clean our room during our 3-day stay.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "The bathroom was disgusting with mold in the shower. Terrible hygiene.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "Sheets looked used and there were bugs in the room. Horrible experience.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "The room smelled musty and the towels were not fresh.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "Dirty room with stained sheets. Cleanliness was below standard.", "sentiment": "negative", "category": "cleanliness"},
    {"review": "Found cockroaches in the bathroom. Absolutely disgusting!", "sentiment": "negative", "category": "cleanliness"},
    
    # NEGATIVE REVIEWS - Food
    {"review": "The breakfast was cold and tasteless. Very limited options available.", "sentiment": "negative", "category": "food"},
    {"review": "Restaurant food was overpriced and quality was poor.", "sentiment": "negative", "category": "food"},
    {"review": "Got food poisoning from the hotel restaurant. Terrible experience.", "sentiment": "negative", "category": "food"},
    {"review": "Room service took 2 hours and the food arrived cold.", "sentiment": "negative", "category": "food"},
    {"review": "The buffet selection was disappointing and food was stale.", "sentiment": "negative", "category": "food"},
    {"review": "Worst hotel food I've ever had. Tasteless and overcooked.", "sentiment": "negative", "category": "food"},
    {"review": "The restaurant was dirty and food was not fresh.", "sentiment": "negative", "category": "food"},
    {"review": "Breakfast was terrible with limited choices and cold items.", "sentiment": "negative", "category": "food"},
    
    # NEGATIVE REVIEWS - Staff
    {"review": "Staff was rude and unhelpful. They ignored our requests.", "sentiment": "negative", "category": "staff"},
    {"review": "Reception desk kept us waiting for 30 minutes with no apology.", "sentiment": "negative", "category": "staff"},
    {"review": "The manager was dismissive when we complained about issues.", "sentiment": "negative", "category": "staff"},
    {"review": "Staff seemed untrained and couldn't answer basic questions.", "sentiment": "negative", "category": "staff"},
    {"review": "Terrible customer service. No one seemed to care about guests.", "sentiment": "negative", "category": "staff"},
    {"review": "Front desk was incredibly rude and unprofessional.", "sentiment": "negative", "category": "staff"},
    {"review": "Staff attitude was horrible. They made us feel unwelcome.", "sentiment": "negative", "category": "staff"},
    {"review": "The employees were lazy and unresponsive to our needs.", "sentiment": "negative", "category": "staff"},
    
    # NEGATIVE REVIEWS - Amenities
    {"review": "The pool was closed for maintenance during our entire stay.", "sentiment": "negative", "category": "amenities"},
    {"review": "WiFi was extremely slow and kept disconnecting constantly.", "sentiment": "negative", "category": "amenities"},
    {"review": "The gym equipment was broken and outdated. Very disappointing.", "sentiment": "negative", "category": "amenities"},
    {"review": "Air conditioning didn't work and the room was unbearably hot.", "sentiment": "negative", "category": "amenities"},
    {"review": "No hot water in the bathroom. Basic amenities were lacking.", "sentiment": "negative", "category": "amenities"},
    {"review": "The elevator was broken and we had to climb 10 floors.", "sentiment": "negative", "category": "amenities"},
    {"review": "TV didn't work and the minibar was empty despite being advertised.", "sentiment": "negative", "category": "amenities"},
    {"review": "Pool was dirty and the gym was closed without notice.", "sentiment": "negative", "category": "amenities"},
    
    # NEUTRAL REVIEWS
    {"review": "The hotel was okay. Nothing special but nothing terrible either.", "sentiment": "neutral", "category": "overall"},
    {"review": "Average experience. Room was decent but not luxurious.", "sentiment": "neutral", "category": "overall"},
    {"review": "Standard hotel stay. Met expectations but didn't exceed them.", "sentiment": "neutral", "category": "overall"},
    {"review": "It was fine for the price. Basic accommodation nothing fancy.", "sentiment": "neutral", "category": "overall"},
    {"review": "The room was adequate. Some good points and some areas to improve.", "sentiment": "neutral", "category": "overall"},
    {"review": "Regular hotel experience. Not bad but not memorable.", "sentiment": "neutral", "category": "overall"},
    {"review": "The stay was acceptable. Room was clean but small.", "sentiment": "neutral", "category": "overall"},
    {"review": "Mediocre experience overall. Some things were good, others not.", "sentiment": "neutral", "category": "overall"},
    {"review": "The hotel is decent for a business trip. Nothing extraordinary.", "sentiment": "neutral", "category": "overall"},
    {"review": "It serves its purpose. Basic amenities available.", "sentiment": "neutral", "category": "overall"},
    
    # Additional Mixed Reviews
    {"review": "Wonderful location and beautiful decor. Loved every moment of our stay!", "sentiment": "positive", "category": "overall"},
    {"review": "Perfect honeymoon destination. Everything was absolutely perfect.", "sentiment": "positive", "category": "overall"},
    {"review": "Will definitely come back! Best hotel experience in years.", "sentiment": "positive", "category": "overall"},
    {"review": "Never staying here again. Worst hotel experience of my life.", "sentiment": "negative", "category": "overall"},
    {"review": "Overpriced for what you get. Expected much better quality.", "sentiment": "negative", "category": "overall"},
    {"review": "Complete waste of money. I want a full refund.", "sentiment": "negative", "category": "overall"},
    {"review": "The view from the room was breathtaking. Excellent value for money.", "sentiment": "positive", "category": "amenities"},
    {"review": "Noisy neighbors and thin walls. Couldn't sleep properly.", "sentiment": "negative", "category": "amenities"},
    {"review": "The bed was incredibly comfortable. Slept like a baby!", "sentiment": "positive", "category": "amenities"},
    {"review": "Mattress was old and lumpy. Back pain after sleeping.", "sentiment": "negative", "category": "amenities"},
]

# Create DataFrame
df = pd.DataFrame(reviews_data)
print(f"üìä Total reviews collected: {len(df)}")
print(f"\nüìà Sentiment Distribution:")
print(df['sentiment'].value_counts())
print(f"\nüìÅ Category Distribution:")
print(df['category'].value_counts())

In [None]:
# Data Augmentation - Create more training samples
augmented_reviews = []

# Positive augmentation phrases
positive_phrases = [
    "Absolutely loved the {0}! Exceeded all my expectations.",
    "The {0} was fantastic and made our stay wonderful.",
    "Impressed by the excellent {0}. Will definitely return!",
    "Outstanding {0}! The hotel really knows how to treat guests.",
    "The {0} here is world-class. Highly recommend!",
    "Amazing {0}! Can't say enough good things about it.",
    "The {0} exceeded expectations. Top quality!",
    "Wonderful experience with the {0}. Simply perfect!"
]

# Negative augmentation phrases
negative_phrases = [
    "Very disappointed with the {0}. Not worth the money.",
    "The {0} was terrible and ruined our vacation.",
    "Awful {0}! Never experienced something this bad.",
    "The {0} needs serious improvement. Unacceptable!",
    "Horrible {0}! I expected much better for the price.",
    "The {0} was a complete disaster. Very upset.",
    "Worst {0} I've ever seen. Absolutely terrible!",
    "The {0} was unacceptable. Major disappointment."
]

categories_keywords = {
    "cleanliness": ["cleanliness", "housekeeping", "room hygiene", "cleaning service"],
    "food": ["food quality", "restaurant", "breakfast buffet", "dining experience"],
    "staff": ["staff service", "customer service", "hospitality", "front desk"],
    "amenities": ["pool", "gym facilities", "WiFi connection", "room amenities"]
}

for category, keywords in categories_keywords.items():
    for keyword in keywords:
        for phrase in positive_phrases:
            augmented_reviews.append({
                "review": phrase.format(keyword),
                "sentiment": "positive",
                "category": category
            })
        for phrase in negative_phrases:
            augmented_reviews.append({
                "review": phrase.format(keyword),
                "sentiment": "negative",
                "category": category
            })

# Add neutral reviews
neutral_phrases = [
    "The {0} was okay. Neither good nor bad.",
    "Average {0}. Could be better but acceptable.",
    "The {0} was standard. Nothing remarkable.",
    "Moderate {0}. Met basic expectations."
]

for category, keywords in categories_keywords.items():
    for keyword in keywords:
        for phrase in neutral_phrases:
            augmented_reviews.append({
                "review": phrase.format(keyword),
                "sentiment": "neutral",
                "category": category
            })

# Combine original and augmented data
df_augmented = pd.DataFrame(augmented_reviews)
df = pd.concat([df, df_augmented], ignore_index=True)

print(f"üìä Total reviews after augmentation: {len(df)}")
print(f"\nüìà Sentiment Distribution:")
print(df['sentiment'].value_counts())

## Step 4: Text Preprocessing

In [None]:
# Initialize NLP tools
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Keep some important words for sentiment
important_words = {'not', 'no', 'never', 'neither', 'nobody', 'nothing', 
                   'nowhere', 'hardly', 'barely', 'very', 'really', 'absolutely'}
stop_words = stop_words - important_words

def preprocess_text(text):
    """Clean and preprocess text for model training"""
    # Convert to lowercase
    text = text.lower()
    
    # Remove special characters and punctuation
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Tokenize
    tokens = word_tokenize(text)
    
    # Remove stopwords and lemmatize
    tokens = [lemmatizer.lemmatize(token) for token in tokens 
              if token not in stop_words and len(token) > 2]
    
    return ' '.join(tokens)

# Apply preprocessing
df['cleaned_review'] = df['review'].apply(preprocess_text)

print("‚úÖ Text preprocessing completed!")
print("\nüìù Sample preprocessed reviews:")
for i in range(3):
    print(f"\nOriginal: {df['review'].iloc[i]}")
    print(f"Cleaned: {df['cleaned_review'].iloc[i]}")

## Step 5: Prepare Data for LSTM Model

In [None]:
# Encode labels
sentiment_mapping = {'negative': 0, 'neutral': 1, 'positive': 2}
df['sentiment_encoded'] = df['sentiment'].map(sentiment_mapping)

category_mapping = {'cleanliness': 0, 'food': 1, 'staff': 2, 'amenities': 3, 'overall': 4}
df['category_encoded'] = df['category'].map(category_mapping)

# Tokenization
MAX_WORDS = 5000
MAX_LEN = 100

tokenizer = Tokenizer(num_words=MAX_WORDS, oov_token='<OOV>')
tokenizer.fit_on_texts(df['cleaned_review'])

# Convert text to sequences
sequences = tokenizer.texts_to_sequences(df['cleaned_review'])
X = pad_sequences(sequences, maxlen=MAX_LEN, padding='post', truncating='post')

# Prepare labels
y_sentiment = to_categorical(df['sentiment_encoded'], num_classes=3)
y_category = to_categorical(df['category_encoded'], num_classes=5)

print(f"‚úÖ Data prepared for LSTM!")
print(f"üìä Vocabulary size: {len(tokenizer.word_index)}")
print(f"üìê Input shape: {X.shape}")
print(f"üéØ Sentiment labels shape: {y_sentiment.shape}")
print(f"üìÅ Category labels shape: {y_category.shape}")

In [None]:
# Split data for sentiment model
X_train, X_test, y_train_sent, y_test_sent = train_test_split(
    X, y_sentiment, test_size=0.2, random_state=42, stratify=df['sentiment_encoded']
)

# Split for category model
_, _, y_train_cat, y_test_cat = train_test_split(
    X, y_category, test_size=0.2, random_state=42, stratify=df['sentiment_encoded']
)

print(f"‚úÖ Data split completed!")
print(f"üìà Training samples: {X_train.shape[0]}")
print(f"üß™ Testing samples: {X_test.shape[0]}")

## Step 6: Build LSTM Model for Sentiment Analysis

In [None]:
# Build Bidirectional LSTM Model for Sentiment
EMBEDDING_DIM = 128

def build_sentiment_model():
    model = Sequential([
        Embedding(MAX_WORDS, EMBEDDING_DIM, input_length=MAX_LEN),
        Bidirectional(LSTM(128, return_sequences=True)),
        Dropout(0.3),
        Bidirectional(LSTM(64)),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(3, activation='softmax')  # 3 classes: negative, neutral, positive
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

sentiment_model = build_sentiment_model()
sentiment_model.summary()

In [None]:
# Build Category Detection Model
def build_category_model():
    model = Sequential([
        Embedding(MAX_WORDS, EMBEDDING_DIM, input_length=MAX_LEN),
        Bidirectional(LSTM(64, return_sequences=True)),
        Dropout(0.3),
        Bidirectional(LSTM(32)),
        Dropout(0.3),
        Dense(32, activation='relu'),
        Dense(5, activation='softmax')  # 5 categories
    ])
    
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

category_model = build_category_model()
category_model.summary()

## Step 7: Train the Models

In [None]:
# Callbacks
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# Train Sentiment Model
print("üöÄ Training Sentiment Model...")
sentiment_history = sentiment_model.fit(
    X_train, y_train_sent,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

print("\n‚úÖ Sentiment Model Training Completed!")

In [None]:
# Train Category Model
print("üöÄ Training Category Model...")
category_history = category_model.fit(
    X_train, y_train_cat,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stopping],
    verbose=1
)

print("\n‚úÖ Category Model Training Completed!")

## Step 8: Visualize Training Results

In [None]:
# Plot training history
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Sentiment Model - Accuracy
axes[0, 0].plot(sentiment_history.history['accuracy'], label='Training Accuracy')
axes[0, 0].plot(sentiment_history.history['val_accuracy'], label='Validation Accuracy')
axes[0, 0].set_title('Sentiment Model - Accuracy')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Accuracy')
axes[0, 0].legend()
axes[0, 0].grid(True)

# Sentiment Model - Loss
axes[0, 1].plot(sentiment_history.history['loss'], label='Training Loss')
axes[0, 1].plot(sentiment_history.history['val_loss'], label='Validation Loss')
axes[0, 1].set_title('Sentiment Model - Loss')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('Loss')
axes[0, 1].legend()
axes[0, 1].grid(True)

# Category Model - Accuracy
axes[1, 0].plot(category_history.history['accuracy'], label='Training Accuracy')
axes[1, 0].plot(category_history.history['val_accuracy'], label='Validation Accuracy')
axes[1, 0].set_title('Category Model - Accuracy')
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Accuracy')
axes[1, 0].legend()
axes[1, 0].grid(True)

# Category Model - Loss
axes[1, 1].plot(category_history.history['loss'], label='Training Loss')
axes[1, 1].plot(category_history.history['val_loss'], label='Validation Loss')
axes[1, 1].set_title('Category Model - Loss')
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Loss')
axes[1, 1].legend()
axes[1, 1].grid(True)

plt.tight_layout()
plt.savefig('training_history.png', dpi=300, bbox_inches='tight')
plt.show()

print("üìä Training visualization saved!")

## Step 9: Evaluate Models

In [None]:
# Evaluate Sentiment Model
print("üìä Sentiment Model Evaluation:")
print("="*50)

sentiment_loss, sentiment_acc = sentiment_model.evaluate(X_test, y_test_sent, verbose=0)
print(f"Test Loss: {sentiment_loss:.4f}")
print(f"Test Accuracy: {sentiment_acc:.4f}")

# Predictions
y_pred_sent = sentiment_model.predict(X_test)
y_pred_sent_classes = np.argmax(y_pred_sent, axis=1)
y_true_sent_classes = np.argmax(y_test_sent, axis=1)

# Classification Report
print("\nüìã Classification Report:")
print(classification_report(
    y_true_sent_classes, 
    y_pred_sent_classes,
    target_names=['Negative', 'Neutral', 'Positive']
))

In [None]:
# Confusion Matrix for Sentiment
cm = confusion_matrix(y_true_sent_classes, y_pred_sent_classes)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Negative', 'Neutral', 'Positive'],
            yticklabels=['Negative', 'Neutral', 'Positive'])
plt.title('Sentiment Analysis - Confusion Matrix', fontsize=14)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Visualize Sentiment Distribution
sentiment_distribution = {'positive': 0, 'negative': 0, 'neutral': 0}
reverse_sentiment_mapping = {0: 'negative', 1: 'neutral', 2: 'positive'}

for pred in y_pred_sent_classes:
    sentiment = reverse_sentiment_mapping[pred]
    sentiment_distribution[sentiment] += 1

labels = list(sentiment_distribution.keys())
values = list(sentiment_distribution.values())
colors = ['#ff6b6b', '#ffd93d', '#6bcb77']

plt.figure(figsize=(10, 6))
bars = plt.bar(labels, values, color=colors, edgecolor='black', linewidth=1.5)

# Add value labels on bars
for bar, val in zip(bars, values):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
             str(val), ha='center', va='bottom', fontsize=14, fontweight='bold')

plt.title('Sentiment Distribution of Predictions', fontsize=16, fontweight='bold')
plt.xlabel('Sentiment', fontsize=12)
plt.ylabel('Count', fontsize=12)
plt.savefig('sentiment_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

## Step 10: Test the Model with Custom Reviews

In [None]:
def predict_review(review_text):
    """Predict sentiment and category for a given review"""
    # Preprocess
    cleaned = preprocess_text(review_text)
    
    # Tokenize and pad
    sequence = tokenizer.texts_to_sequences([cleaned])
    padded = pad_sequences(sequence, maxlen=MAX_LEN, padding='post', truncating='post')
    
    # Predict sentiment
    sentiment_pred = sentiment_model.predict(padded, verbose=0)
    sentiment_class = np.argmax(sentiment_pred[0])
    sentiment_confidence = sentiment_pred[0][sentiment_class] * 100
    
    # Predict category
    category_pred = category_model.predict(padded, verbose=0)
    category_class = np.argmax(category_pred[0])
    category_confidence = category_pred[0][category_class] * 100
    
    # Map to labels
    sentiment_labels = {0: 'Negative üòû', 1: 'Neutral üòê', 2: 'Positive üòä'}
    category_labels = {0: 'Cleanliness üßπ', 1: 'Food üçΩÔ∏è', 2: 'Staff üë®‚Äçüíº', 3: 'Amenities üèä', 4: 'Overall üè®'}
    
    return {
        'review': review_text,
        'sentiment': sentiment_labels[sentiment_class],
        'sentiment_confidence': f"{sentiment_confidence:.1f}%",
        'category': category_labels[category_class],
        'category_confidence': f"{category_confidence:.1f}%",
        'sentiment_scores': {
            'negative': f"{sentiment_pred[0][0]*100:.1f}%",
            'neutral': f"{sentiment_pred[0][1]*100:.1f}%",
            'positive': f"{sentiment_pred[0][2]*100:.1f}%"
        }
    }

# Test with sample reviews
test_reviews = [
    "The room was absolutely amazing and the staff were incredibly helpful!",
    "Terrible experience. The bathroom was dirty and the food was cold.",
    "It was an okay stay. Nothing special but met basic expectations.",
    "The swimming pool was fantastic and the gym had great equipment!",
    "Staff was rude and the WiFi didn't work at all."
]

print("üß™ Testing Model with Sample Reviews:")
print("="*70)

for review in test_reviews:
    result = predict_review(review)
    print(f"\nüìù Review: {result['review']}")
    print(f"   üé≠ Sentiment: {result['sentiment']} ({result['sentiment_confidence']})")
    print(f"   üìÅ Category: {result['category']} ({result['category_confidence']})")
    print(f"   üìä Scores: {result['sentiment_scores']}")
    print("-"*70)

## Step 11: Save Models and Create Download Package

In [None]:
# Create output directory
os.makedirs('hotel_sentiment_model', exist_ok=True)

# Save sentiment model
sentiment_model.save('hotel_sentiment_model/sentiment_model.h5')
print("‚úÖ Sentiment model saved!")

# Save category model
category_model.save('hotel_sentiment_model/category_model.h5')
print("‚úÖ Category model saved!")

# Save tokenizer
with open('hotel_sentiment_model/tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
print("‚úÖ Tokenizer saved!")

# Save model configuration
config = {
    'max_words': MAX_WORDS,
    'max_len': MAX_LEN,
    'embedding_dim': EMBEDDING_DIM,
    'sentiment_labels': {0: 'negative', 1: 'neutral', 2: 'positive'},
    'category_labels': {0: 'cleanliness', 1: 'food', 2: 'staff', 3: 'amenities', 4: 'overall'},
    'model_version': '1.0',
    'training_date': '2024'
}

with open('hotel_sentiment_model/config.json', 'w') as f:
    json.dump(config, f, indent=4)
print("‚úÖ Configuration saved!")

# Save preprocessing function
preprocess_code = '''
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

# Download required NLTK data
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('punkt_tab', quiet=True)

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))
important_words = {'not', 'no', 'never', 'neither', 'nobody', 'nothing', 
                   'nowhere', 'hardly', 'barely', 'very', 'really', 'absolutely'}
stop_words = stop_words - important_words

def preprocess_text(text):
    """Clean and preprocess text for model prediction"""
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\\s]', '', text)
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(token) for token in tokens 
              if token not in stop_words and len(token) > 2]
    return ' '.join(tokens)
'''

with open('hotel_sentiment_model/preprocessing.py', 'w') as f:
    f.write(preprocess_code)
print("‚úÖ Preprocessing script saved!")

In [None]:
# Create ZIP file for download
zip_filename = 'hotel_sentiment_model.zip'

with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk('hotel_sentiment_model'):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, '.')
            zipf.write(file_path, arcname)

# Get file size
file_size = os.path.getsize(zip_filename) / (1024 * 1024)  # MB

print(f"\nüéâ Model package created successfully!")
print(f"üì¶ File: {zip_filename}")
print(f"üìè Size: {file_size:.2f} MB")
print("\nüìÅ Package contents:")
print("   - sentiment_model.h5 (LSTM sentiment classifier)")
print("   - category_model.h5 (Category classifier)")
print("   - tokenizer.pickle (Text tokenizer)")
print("   - config.json (Model configuration)")
print("   - preprocessing.py (Text preprocessing functions)")

In [None]:
# Download the ZIP file (for Google Colab)
try:
    from google.colab import files
    files.download(zip_filename)
    print("\n‚¨áÔ∏è Download started! Check your browser downloads.")
except:
    print(f"\nüì• Running locally - find the file at: {os.path.abspath(zip_filename)}")

## üéâ Training Complete!

### What's in the downloaded package:

1. **sentiment_model.h5** - Trained LSTM model for sentiment classification (Positive/Negative/Neutral)
2. **category_model.h5** - Trained LSTM model for category detection (Cleanliness/Food/Staff/Amenities/Overall)
3. **tokenizer.pickle** - Fitted tokenizer for text preprocessing
4. **config.json** - Model configuration and label mappings
5. **preprocessing.py** - Python script with text preprocessing functions

### Next Steps:
1. Download the `hotel_sentiment_model.zip` file
2. Extract it to your FastAPI backend's `model/` directory
3. Use the models with the provided FastAPI backend