<a href="https://colab.research.google.com/github/sabbir179/test2/blob/main/HospitalityGuestExperience.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
from faker import Faker

fake = Faker()

# Generate the dataset
np.random.seed(42)
num_records = 100

data = {
    "GuestID": [fake.unique.uuid4() for _ in range(num_records)],
    "Name": [fake.name() for _ in range(num_records)],
    "Age": np.random.randint(18, 70, num_records),
    "Gender": np.random.choice(["Male", "Female"], num_records),
    "MembershipLevel": np.random.choice(["Silver", "Gold", "Platinum"], num_records),
    "LastStayDate": [fake.date_this_year() for _ in range(num_records)],
    "RoomTypePreferred": np.random.choice(["Single", "Double", "Suite"], num_records),
    "AverageStayLength": np.random.randint(1, 15, num_records),
    "LastFeedbackRating": np.random.randint(1, 6, num_records),
    "SpecialRequests": np.random.choice(
        ["None", "Extra Pillows", "Vegan Meals", "Late Checkout"], num_records
    ),
    "SpaUsage": np.random.choice([True, False], num_records),
    "DiningPreference": np.random.choice(
        ["Room Service", "Restaurant", "Buffet"], num_records
    ),
}

df = pd.DataFrame(data)

# Save the dataset to a CSV file
df.to_csv("/mnt/data/fake_hilton_guest_data.csv", index=False)
df.head()


Unnamed: 0,GuestID,Name,Age,Gender,MembershipLevel,LastStayDate,RoomTypePreferred,AverageStayLength,LastFeedbackRating,SpecialRequests,SpaUsage,DiningPreference
0,2bc63fac-492d-4f1e-a137-aeb2afe3d59d,John George,56,Male,Gold,2024-04-09,Single,2,1,,True,Restaurant
1,e1ddae8b-1fae-4b7d-b236-9e840307ba0a,Kristine Stevens,69,Male,Gold,2024-06-21,Double,11,5,Vegan Meals,True,Restaurant
2,25217a3b-1ff7-4c15-89c4-e78e01cb9f98,Stacy Johnson,46,Male,Platinum,2024-01-05,Single,10,5,Late Checkout,False,Restaurant
3,d1d1afb5-3d10-4bae-88d0-c0cd619115bd,Mitchell Sutton,32,Male,Gold,2024-06-13,Double,2,1,Late Checkout,False,Restaurant
4,7852e431-887a-44ac-a3b2-95e602651270,Wendy Marquez,60,Male,Platinum,2024-03-05,Single,10,5,Vegan Meals,True,Room Service


In [2]:
pip install Faker

Collecting Faker
  Downloading Faker-26.0.0-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Faker
Successfully installed Faker-26.0.0


In [6]:
df.size


1200

Step-2: Load and Explore the Dataset

In [7]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/mnt/data/fake_hilton_guest_data.csv')

# Display the first few rows of the dataset
df.head()


Unnamed: 0,GuestID,Name,Age,Gender,MembershipLevel,LastStayDate,RoomTypePreferred,AverageStayLength,LastFeedbackRating,SpecialRequests,SpaUsage,DiningPreference
0,2bc63fac-492d-4f1e-a137-aeb2afe3d59d,John George,56,Male,Gold,2024-04-09,Single,2,1,,True,Restaurant
1,e1ddae8b-1fae-4b7d-b236-9e840307ba0a,Kristine Stevens,69,Male,Gold,2024-06-21,Double,11,5,Vegan Meals,True,Restaurant
2,25217a3b-1ff7-4c15-89c4-e78e01cb9f98,Stacy Johnson,46,Male,Platinum,2024-01-05,Single,10,5,Late Checkout,False,Restaurant
3,d1d1afb5-3d10-4bae-88d0-c0cd619115bd,Mitchell Sutton,32,Male,Gold,2024-06-13,Double,2,1,Late Checkout,False,Restaurant
4,7852e431-887a-44ac-a3b2-95e602651270,Wendy Marquez,60,Male,Platinum,2024-03-05,Single,10,5,Vegan Meals,True,Room Service


#Step-3: Data Analysis

**Customer Segmentation**

Let's segment the customers based on their membership level and average stay length

In [8]:
# Segment customers based on MembershipLevel and AverageStayLength
segmentation = df.groupby(['MembershipLevel']).agg({
    'AverageStayLength': 'mean',
    'SpaUsage': 'mean',
    'LastFeedbackRating': 'mean'
}).reset_index()

segmentation


Unnamed: 0,MembershipLevel,AverageStayLength,SpaUsage,LastFeedbackRating
0,Gold,6.741935,0.483871,3.064516
1,Platinum,7.75,0.527778,3.222222
2,Silver,8.151515,0.575758,3.121212


**Preference Analysis**

Let's analyze the preferences for room types and dining options.

In [9]:
# Analyze RoomTypePreferred
room_type_preference = df['RoomTypePreferred'].value_counts(normalize=True).reset_index()
room_type_preference.columns = ['RoomTypePreferred', 'Proportion']

# Analyze DiningPreference
dining_preference = df['DiningPreference'].value_counts(normalize=True).reset_index()
dining_preference.columns = ['DiningPreference', 'Proportion']

room_type_preference, dining_preference


(  RoomTypePreferred  Proportion
 0            Single        0.38
 1            Double        0.32
 2             Suite        0.30,
   DiningPreference  Proportion
 0     Room Service        0.42
 1       Restaurant        0.34
 2           Buffet        0.24)

#Step-4: Predictive Modeling

It'll create a simple recommendation system based on past behavior and preferences.

In [10]:
# Simple recommendation based on SpaUsage
df['RecommendedService'] = df['SpaUsage'].apply(lambda x: 'Spa Discount' if x else 'Dining Discount')

df[['GuestID', 'Name', 'SpaUsage', 'RecommendedService']]


Unnamed: 0,GuestID,Name,SpaUsage,RecommendedService
0,2bc63fac-492d-4f1e-a137-aeb2afe3d59d,John George,True,Spa Discount
1,e1ddae8b-1fae-4b7d-b236-9e840307ba0a,Kristine Stevens,True,Spa Discount
2,25217a3b-1ff7-4c15-89c4-e78e01cb9f98,Stacy Johnson,False,Dining Discount
3,d1d1afb5-3d10-4bae-88d0-c0cd619115bd,Mitchell Sutton,False,Dining Discount
4,7852e431-887a-44ac-a3b2-95e602651270,Wendy Marquez,True,Spa Discount
...,...,...,...,...
95,1d3371d1-3527-4443-9bd2-2582129840b7,Mary Shelton,False,Dining Discount
96,e3465e7e-cb2b-49be-8159-e3277f2af0f7,Charles Morales,False,Dining Discount
97,e0e4d4fb-2871-43ea-aa0f-8c0ee0d3ccf4,Brian Wright,False,Dining Discount
98,919f9fd2-77d0-4145-a193-84a03c46191e,Tommy Reynolds,True,Spa Discount


#Step-5: Implement Personalization

For the purpose of this example, let's assume we send personalized offers to guests.

In [11]:
# Create personalized offers
def create_offer(row):
    if row['SpaUsage']:
        return "Offer: 20% off on Spa services"
    elif row['DiningPreference'] == 'Room Service':
        return "Offer: Complimentary room service"
    else:
        return "Offer: Free breakfast buffet"

df['PersonalizedOffer'] = df.apply(create_offer, axis=1)
df[['GuestID', 'Name', 'PersonalizedOffer']]


Unnamed: 0,GuestID,Name,PersonalizedOffer
0,2bc63fac-492d-4f1e-a137-aeb2afe3d59d,John George,Offer: 20% off on Spa services
1,e1ddae8b-1fae-4b7d-b236-9e840307ba0a,Kristine Stevens,Offer: 20% off on Spa services
2,25217a3b-1ff7-4c15-89c4-e78e01cb9f98,Stacy Johnson,Offer: Free breakfast buffet
3,d1d1afb5-3d10-4bae-88d0-c0cd619115bd,Mitchell Sutton,Offer: Free breakfast buffet
4,7852e431-887a-44ac-a3b2-95e602651270,Wendy Marquez,Offer: 20% off on Spa services
...,...,...,...
95,1d3371d1-3527-4443-9bd2-2582129840b7,Mary Shelton,Offer: Complimentary room service
96,e3465e7e-cb2b-49be-8159-e3277f2af0f7,Charles Morales,Offer: Free breakfast buffet
97,e0e4d4fb-2871-43ea-aa0f-8c0ee0d3ccf4,Brian Wright,Offer: Complimentary room service
98,919f9fd2-77d0-4145-a193-84a03c46191e,Tommy Reynolds,Offer: 20% off on Spa services


lkl

In [12]:
# Distribution of Personalized Offers
offer_distribution = df['PersonalizedOffer'].value_counts().reset_index()
offer_distribution.columns = ['PersonalizedOffer', 'Count']
offer_distribution


Unnamed: 0,PersonalizedOffer,Count
0,Offer: 20% off on Spa services,53
1,Offer: Free breakfast buffet,26
2,Offer: Complimentary room service,21
